Merge pull request #55 from creekorful/improved-scheduler

Improve scheduling
pull/56/head^2
Aloïs Micard 4 years ago committed by GitHub
commit b4dcfb4d70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -87,7 +87,15 @@ func handleMessage(apiClient api.Client) messaging.MsgHandler {
}
// Finally push found URLs
publishedURLS := map[string]string{}
for _, url := range urls {
if _, exist := publishedURLS[url]; exist {
log.Trace().
Str("url", url).
Msg("Skipping duplicate URL")
continue
}
log.Trace().
Str("url", url).
Msg("Publishing found URL")
@ -98,6 +106,8 @@ func handleMessage(apiClient api.Client) messaging.MsgHandler {
Str("err", err.Error()).
Msg("Error while publishing URL")
}
publishedURLS[url] = url
}
return nil

@ -42,12 +42,15 @@ This is sparta
t.Fail()
}
if len(urls) == 0 {
if len(urls) != 2 {
t.FailNow()
}
if urls[0] != "https://google.com/test?test=test" {
t.Fail()
}
if urls[1] != "https://example.org" {
t.Fail()
}
if resDto.Description != "Zhello world" {
t.Fail()
@ -77,7 +80,7 @@ func TestHandleMessage(t *testing.T) {
body := `
<title>Creekorful Inc</title>
This is sparta
This is sparta (hosted on https://example.org)
<a href="https://google.com/test?test=test#12">
@ -106,6 +109,8 @@ This is sparta
}}).Return(api.ResourceDto{}, nil)
// make sure we are pushing found URLs
// should be called only one time
subscriberMock.EXPECT().
PublishMsg(&messaging.URLFoundMsg{URL: "https://example.org"}).
Return(nil)

@ -92,6 +92,12 @@ func handleMessage(apiClient api.Client, refreshDelay time.Duration, forbiddenEx
return nil // Technically not an error
}
// Make sure protocol is allowed
if !strings.HasPrefix(u.Scheme, "http") {
log.Trace().Stringer("url", u).Msg("URL has invalid scheme")
return nil // Technically not an error
}
// Make sure extension is not forbidden
for _, ext := range forbiddenExtensions {
if strings.HasSuffix(u.Path, "."+ext) {

@ -1,6 +1,7 @@
package scheduler
import (
"fmt"
"github.com/creekorful/trandoshan/api"
"github.com/creekorful/trandoshan/api_mock"
"github.com/creekorful/trandoshan/internal/messaging"
@ -47,6 +48,27 @@ func TestHandleMessageNotOnion(t *testing.T) {
}
}
func TestHandleMessageWrongProtocol(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
apiClientMock := api_mock.NewMockClient(mockCtrl)
subscriberMock := messaging_mock.NewMockSubscriber(mockCtrl)
msg := nats.Msg{}
for _, protocol := range []string{"irc", "ftp"} {
subscriberMock.EXPECT().
ReadMsg(&msg, &messaging.URLFoundMsg{}).
SetArg(1, messaging.URLFoundMsg{URL: fmt.Sprintf("%s://example.onion", protocol)}).
Return(nil)
if err := handleMessage(apiClientMock, -1, []string{})(subscriberMock, &msg); err != nil {
t.FailNow()
}
}
}
func TestHandleMessageAlreadyCrawled(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()

Loading…
Cancel
Save