You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bathyscaphe/internal/crawler/crawler_test.go

209 lines
5.8 KiB
Go

package crawler
import (
"errors"
"github.com/creekorful/bathyscaphe/internal/clock_mock"
"github.com/creekorful/bathyscaphe/internal/configapi/client"
"github.com/creekorful/bathyscaphe/internal/configapi/client_mock"
"github.com/creekorful/bathyscaphe/internal/event"
"github.com/creekorful/bathyscaphe/internal/event_mock"
"github.com/creekorful/bathyscaphe/internal/http"
"github.com/creekorful/bathyscaphe/internal/http_mock"
"github.com/creekorful/bathyscaphe/internal/process"
"github.com/creekorful/bathyscaphe/internal/process_mock"
"github.com/creekorful/bathyscaphe/internal/test"
"github.com/golang/mock/gomock"
"strings"
"testing"
"time"
)
func TestState_Name(t *testing.T) {
s := State{}
if s.Name() != "crawler" {
t.Fail()
}
}
func TestState_Features(t *testing.T) {
s := State{}
test.CheckProcessFeatures(t, &s, []process.Feature{process.EventFeature, process.ConfigFeature, process.CrawlingFeature})
}
func TestState_CustomFlags(t *testing.T) {
s := State{}
test.CheckProcessCustomFlags(t, &s, nil)
}
func TestState_Initialize(t *testing.T) {
test.CheckInitialize(t, &State{}, func(p *process_mock.MockProviderMockRecorder) {
p.HTTPClient()
p.Clock()
p.ConfigClient([]string{client.AllowedMimeTypesKey, client.ForbiddenHostnamesKey})
})
}
func TestState_Subscribers(t *testing.T) {
s := State{}
test.CheckProcessSubscribers(t, &s, []test.SubscriberDef{
{Queue: "crawlingQueue", Exchange: "url.new"},
})
}
func TestHandleNewURLEvent(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
subscriberMock := event_mock.NewMockSubscriber(mockCtrl)
httpClientMock := http_mock.NewMockClient(mockCtrl)
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
clockMock := clock_mock.NewMockClock(mockCtrl)
configClientMock := client_mock.NewMockClient(mockCtrl)
s := State{
httpClient: httpClientMock,
configClient: configClientMock,
clock: clockMock,
}
type test struct {
// the incoming url
url string
// the response headers
responseHeaders map[string]string
// the response body
responseBody string
// internal state: allowed mime types
allowedMimeTypes []client.MimeType
// The expected error
err error
}
tests := []test{
{
url: "https://example.onion/image.png?id=12&test=2",
responseHeaders: map[string]string{"Content-Type": "text/plain", "Server": "Debian"},
responseBody: "Hello",
allowedMimeTypes: []client.MimeType{
{ContentType: "text/plain", Extensions: nil},
{ContentType: "text/css", Extensions: nil},
},
},
{
url: "https://example.onion",
responseHeaders: map[string]string{"Content-Type": "text/plain"},
responseBody: "Hello",
allowedMimeTypes: []client.MimeType{},
},
{
url: "https://example.onion",
responseHeaders: map[string]string{"Content-Type": "text/plain"},
responseBody: "Hello",
allowedMimeTypes: []client.MimeType{
{
ContentType: "text/",
Extensions: nil,
},
},
},
{
url: "https://example.onion/image.png",
responseHeaders: map[string]string{"Content-Type": "image/png"},
responseBody: "Hello",
allowedMimeTypes: []client.MimeType{
{
ContentType: "text/plain",
Extensions: nil,
},
},
err: errContentTypeNotAllowed,
},
{
url: "https://downhostname.onion",
responseHeaders: map[string]string{"Content-Type": "text/plain"},
responseBody: "Hello",
allowedMimeTypes: []client.MimeType{
{
ContentType: "text/plain",
Extensions: nil,
},
},
err: http.ErrTimeout,
},
}
for _, test := range tests {
msg := event.RawMessage{}
subscriberMock.EXPECT().
Read(&msg, &event.NewURLEvent{}).
SetArg(1, event.NewURLEvent{URL: test.url}).
Return(nil)
// mock crawling
switch test.err {
case http.ErrTimeout:
httpClientMock.EXPECT().Get(test.url).Return(httpResponseMock, http.ErrTimeout)
subscriberMock.EXPECT().PublishEvent(&event.TimeoutURLEvent{URL: test.url}).Return(nil)
break
default:
httpResponseMock.EXPECT().Headers().Return(test.responseHeaders)
httpClientMock.EXPECT().Get(test.url).Return(httpResponseMock, nil)
// mock config retrieval
configClientMock.EXPECT().GetAllowedMimeTypes().Return(test.allowedMimeTypes, nil)
break
}
configClientMock.EXPECT().GetForbiddenHostnames().Return([]client.ForbiddenHostname{}, nil)
if test.err == nil {
httpResponseMock.EXPECT().Headers().Return(test.responseHeaders)
httpResponseMock.EXPECT().Body().Return(strings.NewReader(test.responseBody))
tn := time.Now()
clockMock.EXPECT().Now().Return(tn)
// if test should pass expect event publishing
subscriberMock.EXPECT().PublishEvent(&event.NewResourceEvent{
URL: test.url,
Body: test.responseBody,
Headers: test.responseHeaders,
Time: tn,
}).Return(nil)
}
err := s.handleNewURLEvent(subscriberMock, msg)
if test.err == nil && err != nil {
t.Errorf("test should have passed but has failed with: %s", err)
}
if !errors.Is(err, test.err) {
t.Errorf("test shouldn't have passed but hasn't returned expected error: %s", err)
}
}
}
func TestHandleNewURLEventHostnameForbidden(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
subscriberMock := event_mock.NewMockSubscriber(mockCtrl)
configClientMock := client_mock.NewMockClient(mockCtrl)
s := State{
configClient: configClientMock,
}
msg := event.RawMessage{}
subscriberMock.EXPECT().
Read(&msg, &event.NewURLEvent{}).
SetArg(1, event.NewURLEvent{URL: "https://l.facebookcorewwwi.onion/test.php"}).
Return(nil)
configClientMock.EXPECT().GetForbiddenHostnames().
Return([]client.ForbiddenHostname{{Hostname: "facebookcorewwwi.onion"}}, nil)
if err := s.handleNewURLEvent(subscriberMock, msg); !errors.Is(err, errHostnameNotAllowed) {
t.Fail()
}
}