Merge remote-tracking branch 'origin/develop' into 54-extract-http-headers

pull/56/head
Aloïs Micard 4 years ago
commit 3b320d49c7
No known key found for this signature in database
GPG Key ID: 1A0EB82F071F5EFE

@ -88,7 +88,15 @@ func handleMessage(apiClient api.Client) messaging.MsgHandler {
}
// Finally push found URLs
publishedURLS := map[string]string{}
for _, url := range urls {
if _, exist := publishedURLS[url]; exist {
log.Trace().
Str("url", url).
Msg("Skipping duplicate URL")
continue
}
log.Trace().
Str("url", url).
Msg("Publishing found URL")
@ -99,6 +107,8 @@ func handleMessage(apiClient api.Client) messaging.MsgHandler {
Str("err", err.Error()).
Msg("Error while publishing URL")
}
publishedURLS[url] = url
}
return nil

@ -42,12 +42,15 @@ This is sparta
t.Fail()
}
if len(urls) == 0 {
if len(urls) != 2 {
t.FailNow()
}
if urls[0] != "https://google.com/test?test=test" {
t.Fail()
}
if urls[1] != "https://example.org" {
t.Fail()
}
if resDto.Description != "Zhello world" {
t.Fail()
@ -77,7 +80,7 @@ func TestHandleMessage(t *testing.T) {
body := `
<title>Creekorful Inc</title>
This is sparta
This is sparta (hosted on https://example.org)
<a href="https://google.com/test?test=test#12">
@ -111,6 +114,8 @@ This is sparta
}}).Return(api.ResourceDto{}, nil)
// make sure we are pushing found URLs
// should be called only one time
subscriberMock.EXPECT().
PublishMsg(&messaging.URLFoundMsg{URL: "https://example.org"}).
Return(nil)

@ -92,6 +92,12 @@ func handleMessage(apiClient api.Client, refreshDelay time.Duration, forbiddenEx
return nil // Technically not an error
}
// Make sure protocol is allowed
if !strings.HasPrefix(u.Scheme, "http") {
log.Trace().Stringer("url", u).Msg("URL has invalid scheme")
return nil // Technically not an error
}
// Make sure extension is not forbidden
for _, ext := range forbiddenExtensions {
if strings.HasSuffix(u.Path, "."+ext) {

@ -1,6 +1,7 @@
package scheduler
import (
"fmt"
"github.com/creekorful/trandoshan/api"
"github.com/creekorful/trandoshan/api_mock"
"github.com/creekorful/trandoshan/internal/messaging"
@ -47,6 +48,27 @@ func TestHandleMessageNotOnion(t *testing.T) {
}
}
func TestHandleMessageWrongProtocol(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
apiClientMock := api_mock.NewMockClient(mockCtrl)
subscriberMock := messaging_mock.NewMockSubscriber(mockCtrl)
msg := nats.Msg{}
for _, protocol := range []string{"irc", "ftp"} {
subscriberMock.EXPECT().
ReadMsg(&msg, &messaging.URLFoundMsg{}).
SetArg(1, messaging.URLFoundMsg{URL: fmt.Sprintf("%s://example.onion", protocol)}).
Return(nil)
if err := handleMessage(apiClientMock, -1, []string{})(subscriberMock, &msg); err != nil {
t.FailNow()
}
}
}
func TestHandleMessageAlreadyCrawled(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()

Loading…
Cancel
Save