Refactor scheduler to use new configapi

pull/89/head
Aloïs Micard 3 years ago
parent a1f8313246
commit 278f8e58c2
No known key found for this signature in database
GPG Key ID: 1A0EB82F071F5EFE

@ -1,10 +1,16 @@
package client
import (
"github.com/creekorful/trandoshan/internal/event"
"time"
)
//go:generate mockgen -destination=../client_mock/client_mock.go -package=client_mock . Client
const (
forbiddenMimeTypes = "forbidden-mime-types"
forbiddenHostnames = "forbidden-hostnames"
ForbiddenMimeTypesKey = "forbidden-mime-types"
ForbiddenHostnamesKey = "forbidden-hostnames"
RefreshDelayKey = "refresh-delay"
)
// ForbiddenMimeType is the mime types who's crawling is forbidden
@ -20,6 +26,11 @@ type ForbiddenHostname struct {
Hostname string `json:"hostname"`
}
// RefreshDelay is the refresh delay for re-crawling
type RefreshDelay struct {
Delay time.Duration `json:"delay"`
}
// Client is a nice client interface for the ConfigAPI
type Client interface {
GetForbiddenMimeTypes() ([]ForbiddenMimeType, error)
@ -27,4 +38,14 @@ type Client interface {
GetForbiddenHostnames() ([]ForbiddenHostname, error)
SetForbiddenHostnames(values []ForbiddenHostname) error
GetRefreshDelay() (RefreshDelay, error)
SetRefreshDelay(value RefreshDelay) error
}
type client struct {
}
func NewConfigClient(configApiURL string, subscriber event.Subscriber, keys []string) (Client, error) {
return nil, nil // TODO
}

@ -4,7 +4,7 @@ import (
"errors"
"fmt"
"github.com/creekorful/trandoshan/api"
"github.com/creekorful/trandoshan/internal/duration"
"github.com/creekorful/trandoshan/internal/configapi/client"
"github.com/creekorful/trandoshan/internal/event"
"github.com/creekorful/trandoshan/internal/logging"
"github.com/creekorful/trandoshan/internal/util"
@ -38,18 +38,7 @@ func GetApp() *cli.App {
util.GetHubURI(),
util.GetAPIURIFlag(),
util.GetAPITokenFlag(),
&cli.StringFlag{
Name: "refresh-delay",
Usage: "Duration before allowing crawl of existing resource (none = never)",
},
&cli.StringSliceFlag{
Name: "forbidden-extensions",
Usage: "Extensions to disable scheduling for (i.e png, exe, css, ...) (the dot will be added automatically)",
},
&cli.StringSliceFlag{
Name: "forbidden-hostnames",
Usage: "Hostnames to disable scheduling for",
},
util.GetConfigApiURIFlag(),
},
Action: execute,
}
@ -58,15 +47,11 @@ func GetApp() *cli.App {
func execute(ctx *cli.Context) error {
logging.ConfigureLogger(ctx)
refreshDelay := duration.ParseDuration(ctx.String("refresh-delay"))
log.Info().
Str("ver", ctx.App.Version).
Str("hub-uri", ctx.String("hub-uri")).
Str("api-uri", ctx.String("api-uri")).
Strs("forbidden-exts", ctx.StringSlice("forbidden-extensions")).
Strs("forbidden-hostnames", ctx.StringSlice("forbidden-hostnames")).
Dur("refresh-delay", refreshDelay).
Str("config-api-uri", ctx.String("config-api-uri")).
Msg("Starting tdsh-scheduler")
// Create the API client
@ -79,11 +64,16 @@ func execute(ctx *cli.Context) error {
}
defer sub.Close()
// Create the ConfigAPI client
keys := []string{client.ForbiddenMimeTypesKey, client.ForbiddenHostnamesKey, client.RefreshDelayKey}
configClient, err := client.NewConfigClient(ctx.String("config-api-uri"), sub, keys)
if err != nil {
return err
}
state := state{
apiClient: apiClient,
refreshDelay: refreshDelay,
forbiddenExtensions: ctx.StringSlice("forbidden-extensions"),
forbiddenHostnames: ctx.StringSlice("forbidden-hostnames"),
apiClient: apiClient,
configClient: configClient,
}
if err := sub.SubscribeAsync(event.FoundURLExchange, "schedulingQueue", state.handleURLFoundEvent); err != nil {
@ -107,10 +97,8 @@ func execute(ctx *cli.Context) error {
}
type state struct {
apiClient api.API
refreshDelay time.Duration
forbiddenExtensions []string
forbiddenHostnames []string
apiClient api.API
configClient client.Client
}
func (state *state) handleURLFoundEvent(subscriber event.Subscriber, body io.Reader) error {
@ -137,24 +125,32 @@ func (state *state) handleURLFoundEvent(subscriber event.Subscriber, body io.Rea
}
// Make sure extension is not forbidden
for _, ext := range state.forbiddenExtensions {
if strings.HasSuffix(u.Path, "."+ext) {
return fmt.Errorf("%s (.%s) %w", u, ext, errExtensionNotAllowed)
if mimeTypes, err := state.configClient.GetForbiddenMimeTypes(); err == nil {
for _, mimeType := range mimeTypes {
for _, ext := range mimeType.Extensions {
if strings.HasSuffix(u.Path, "."+ext) {
return fmt.Errorf("%s (.%s) %w", u, ext, errExtensionNotAllowed)
}
}
}
}
// Make sure hostname is not forbidden
for _, hostname := range state.forbiddenHostnames {
if strings.Contains(u.Hostname(), hostname) {
return fmt.Errorf("%s %w", u, errHostnameNotAllowed)
if hostnames, err := state.configClient.GetForbiddenHostnames(); err == nil {
for _, hostname := range hostnames {
if strings.Contains(u.Hostname(), hostname.Hostname) {
return fmt.Errorf("%s %w", u, errHostnameNotAllowed)
}
}
}
// If we want to allow re-schedule of existing crawled resources we need to retrieve only resources
// that are newer than `now - refreshDelay`.
endDate := time.Time{}
if state.refreshDelay != -1 {
endDate = time.Now().Add(-state.refreshDelay)
if refreshDelay, err := state.configClient.GetRefreshDelay(); err == nil {
if refreshDelay.Delay != -1 {
endDate = time.Now().Add(-refreshDelay.Delay)
}
}
params := api.ResSearchParams{

@ -6,6 +6,8 @@ import (
"fmt"
"github.com/creekorful/trandoshan/api"
"github.com/creekorful/trandoshan/api_mock"
"github.com/creekorful/trandoshan/internal/configapi/client"
"github.com/creekorful/trandoshan/internal/configapi/client_mock"
"github.com/creekorful/trandoshan/internal/event"
"github.com/creekorful/trandoshan/internal/event_mock"
"github.com/golang/mock/gomock"
@ -18,6 +20,7 @@ func TestHandleMessageNotOnion(t *testing.T) {
apiClientMock := api_mock.NewMockAPI(mockCtrl)
subscriberMock := event_mock.NewMockSubscriber(mockCtrl)
configClientMock := client_mock.NewMockClient(mockCtrl)
msg := bytes.NewReader(nil)
subscriberMock.EXPECT().
@ -26,9 +29,8 @@ func TestHandleMessageNotOnion(t *testing.T) {
Return(nil)
s := state{
apiClient: apiClientMock,
refreshDelay: -1,
forbiddenExtensions: []string{},
apiClient: apiClientMock,
configClient: configClientMock,
}
if err := s.handleURLFoundEvent(subscriberMock, msg); !errors.Is(err, errNotOnionHostname) {
@ -42,13 +44,13 @@ func TestHandleMessageWrongProtocol(t *testing.T) {
apiClientMock := api_mock.NewMockAPI(mockCtrl)
subscriberMock := event_mock.NewMockSubscriber(mockCtrl)
configClientMock := client_mock.NewMockClient(mockCtrl)
msg := bytes.NewReader(nil)
s := state{
apiClient: apiClientMock,
refreshDelay: -1,
forbiddenExtensions: []string{},
apiClient: apiClientMock,
configClient: configClientMock,
}
for _, protocol := range []string{"irc", "ftp"} {
@ -69,6 +71,7 @@ func TestHandleMessageAlreadyCrawled(t *testing.T) {
apiClientMock := api_mock.NewMockAPI(mockCtrl)
subscriberMock := event_mock.NewMockSubscriber(mockCtrl)
configClientMock := client_mock.NewMockClient(mockCtrl)
msg := bytes.NewReader(nil)
subscriberMock.EXPECT().
@ -85,10 +88,13 @@ func TestHandleMessageAlreadyCrawled(t *testing.T) {
SearchResources(&params).
Return([]api.ResourceDto{}, int64(1), nil)
configClientMock.EXPECT().GetForbiddenMimeTypes().Return([]client.ForbiddenMimeType{{Extensions: []string{"png"}}}, nil)
configClientMock.EXPECT().GetForbiddenHostnames().Return([]client.ForbiddenHostname{}, nil)
configClientMock.EXPECT().GetRefreshDelay().Return(client.RefreshDelay{Delay: -1}, nil)
s := state{
apiClient: apiClientMock,
refreshDelay: -1,
forbiddenExtensions: []string{"png"},
apiClient: apiClientMock,
configClient: configClientMock,
}
if err := s.handleURLFoundEvent(subscriberMock, msg); !errors.Is(err, errShouldNotSchedule) {
@ -102,6 +108,7 @@ func TestHandleMessageForbiddenExtensions(t *testing.T) {
apiClientMock := api_mock.NewMockAPI(mockCtrl)
subscriberMock := event_mock.NewMockSubscriber(mockCtrl)
configClientMock := client_mock.NewMockClient(mockCtrl)
msg := bytes.NewReader(nil)
subscriberMock.EXPECT().
@ -109,10 +116,11 @@ func TestHandleMessageForbiddenExtensions(t *testing.T) {
SetArg(1, event.FoundURLEvent{URL: "https://example.onion/image.png?id=12&test=2"}).
Return(nil)
configClientMock.EXPECT().GetForbiddenMimeTypes().Return([]client.ForbiddenMimeType{{Extensions: []string{"png"}}}, nil)
s := state{
apiClient: apiClientMock,
refreshDelay: -1,
forbiddenExtensions: []string{"png"},
apiClient: apiClientMock,
configClient: configClientMock,
}
if err := s.handleURLFoundEvent(subscriberMock, msg); !errors.Is(err, errExtensionNotAllowed) {
@ -126,28 +134,29 @@ func TestHandleMessageHostnameForbidden(t *testing.T) {
apiClientMock := api_mock.NewMockAPI(mockCtrl)
subscriberMock := event_mock.NewMockSubscriber(mockCtrl)
configClientMock := client_mock.NewMockClient(mockCtrl)
type test struct {
url string
forbiddenHostnames []string
forbiddenHostnames []client.ForbiddenHostname
}
tests := []test{
{
url: "https://facebookcorewwwi.onion/image.png?id=12&test=2",
forbiddenHostnames: []string{"facebookcorewwwi.onion"},
forbiddenHostnames: []client.ForbiddenHostname{{Hostname: "facebookcorewwwi.onion"}},
},
{
url: "https://google.onion:9099",
forbiddenHostnames: []string{"google.onion"},
forbiddenHostnames: []client.ForbiddenHostname{{Hostname: "google.onion"}},
},
{
url: "http://facebook.onion:443/news/test.php?id=12&username=test",
forbiddenHostnames: []string{"facebook.onion"},
forbiddenHostnames: []client.ForbiddenHostname{{Hostname: "facebook.onion"}},
},
{
url: "https://www.facebookcorewwwi.onion/recover/initiate?ars=facebook_login",
forbiddenHostnames: []string{"facebookcorewwwi.onion"},
forbiddenHostnames: []client.ForbiddenHostname{{Hostname: "facebookcorewwwi.onion"}},
},
}
@ -158,11 +167,12 @@ func TestHandleMessageHostnameForbidden(t *testing.T) {
SetArg(1, event.FoundURLEvent{URL: test.url}).
Return(nil)
configClientMock.EXPECT().GetForbiddenMimeTypes().Return([]client.ForbiddenMimeType{}, nil)
configClientMock.EXPECT().GetForbiddenHostnames().Return(test.forbiddenHostnames, nil)
s := state{
apiClient: apiClientMock,
refreshDelay: -1,
forbiddenExtensions: []string{},
forbiddenHostnames: test.forbiddenHostnames,
apiClient: apiClientMock,
configClient: configClientMock,
}
if err := s.handleURLFoundEvent(subscriberMock, msg); !errors.Is(err, errHostnameNotAllowed) {
@ -177,6 +187,7 @@ func TestHandleMessage(t *testing.T) {
apiClientMock := api_mock.NewMockAPI(mockCtrl)
subscriberMock := event_mock.NewMockSubscriber(mockCtrl)
configClientMock := client_mock.NewMockClient(mockCtrl)
msg := bytes.NewReader(nil)
subscriberMock.EXPECT().
@ -197,10 +208,13 @@ func TestHandleMessage(t *testing.T) {
Publish(&event.NewURLEvent{URL: "https://example.onion"}).
Return(nil)
configClientMock.EXPECT().GetForbiddenMimeTypes().Return([]client.ForbiddenMimeType{}, nil)
configClientMock.EXPECT().GetForbiddenHostnames().Return([]client.ForbiddenHostname{}, nil)
configClientMock.EXPECT().GetRefreshDelay().Return(client.RefreshDelay{Delay: -1}, nil)
s := state{
apiClient: apiClientMock,
refreshDelay: -1,
forbiddenExtensions: []string{},
apiClient: apiClientMock,
configClient: configClientMock,
}
if err := s.handleURLFoundEvent(subscriberMock, msg); err != nil {

@ -0,0 +1,12 @@
package util
import "github.com/urfave/cli/v2"
// GetConfigApiURIFlag return the cli flag to set config api uri
func GetConfigApiURIFlag() *cli.StringFlag {
return &cli.StringFlag{
Name: "config-api-uri",
Usage: "URI to the ConfigAPI server",
Required: true,
}
}
Loading…
Cancel
Save