Add mock http client & response

pull/56/head
Aloïs Micard 4 years ago
parent 59aa2cf86f
commit 5f657cfc74
No known key found for this signature in database
GPG Key ID: 1A0EB82F071F5EFE

@ -3,6 +3,7 @@ package crawler
import (
"crypto/tls"
"fmt"
"github.com/creekorful/trandoshan/internal/http"
"github.com/creekorful/trandoshan/internal/logging"
"github.com/creekorful/trandoshan/internal/messaging"
"github.com/creekorful/trandoshan/internal/util"
@ -11,6 +12,7 @@ import (
"github.com/urfave/cli/v2"
"github.com/valyala/fasthttp"
"github.com/valyala/fasthttp/fasthttpproxy"
"io/ioutil"
"strings"
"time"
)
@ -57,7 +59,7 @@ func execute(ctx *cli.Context) error {
Msg("Starting tdsh-crawler")
// Create the HTTP client
httpClient := &fasthttp.Client{
httpClient := http.NewFastHTTPClient(&fasthttp.Client{
// Use given TOR proxy to reach the hidden services
Dial: fasthttpproxy.FasthttpSocksDialer(ctx.String("tor-uri")),
// Disable SSL verification since we do not really care about this
@ -65,7 +67,7 @@ func execute(ctx *cli.Context) error {
ReadTimeout: time.Second * 5,
WriteTimeout: time.Second * 5,
Name: ctx.String("user-agent"),
}
})
// Create the NATS subscriber
sub, err := messaging.NewSubscriber(ctx.String("nats-uri"))
@ -84,7 +86,7 @@ func execute(ctx *cli.Context) error {
return nil
}
func handleMessage(httpClient *fasthttp.Client, allowedContentTypes []string) messaging.MsgHandler {
func handleMessage(httpClient http.Client, allowedContentTypes []string) messaging.MsgHandler {
return func(sub messaging.Subscriber, msg *nats.Msg) error {
var urlMsg messaging.URLTodoMsg
if err := sub.ReadMsg(msg, &urlMsg); err != nil {
@ -110,34 +112,17 @@ func handleMessage(httpClient *fasthttp.Client, allowedContentTypes []string) me
}
}
func crawURL(httpClient *fasthttp.Client, url string, allowedContentTypes []string) (string, map[string]string, error) {
func crawURL(httpClient http.Client, url string, allowedContentTypes []string) (string, map[string]string, error) {
log.Debug().Str("url", url).Msg("Processing URL")
// Query the website
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseRequest(req)
defer fasthttp.ReleaseResponse(resp)
req.SetRequestURI(url)
if err := httpClient.Do(req, resp); err != nil {
r, err := httpClient.Get(url)
if err != nil {
return "", nil, err
}
switch code := resp.StatusCode(); {
case code > 302:
return "", nil, fmt.Errorf("non-managed error code %d", code)
// follow redirect
case code == 301 || code == 302:
if location := string(resp.Header.Peek("Location")); location != "" {
return crawURL(httpClient, location, allowedContentTypes)
}
}
// Determinate if content type is allowed
allowed := false
contentType := string(resp.Header.Peek("Content-Type"))
contentType := r.Headers()["Content-Type"]
for _, allowedContentType := range allowedContentTypes {
if strings.Contains(contentType, allowedContentType) {
allowed = true
@ -150,11 +135,10 @@ func crawURL(httpClient *fasthttp.Client, url string, allowedContentTypes []stri
return "", nil, err
}
// Parse headers
headers := map[string]string{}
resp.Header.VisitAll(func(key, value []byte) {
headers[string(key)] = string(value) // TODO manage multiple values?
})
return string(resp.Body()), headers, nil
// Ready body
b, err := ioutil.ReadAll(r.Body())
if err != nil {
return "", nil, err
}
return string(b), r.Headers(), nil
}

@ -1 +1,118 @@
package crawler
import (
"github.com/creekorful/trandoshan/internal/http_mock"
"github.com/creekorful/trandoshan/internal/messaging"
"github.com/creekorful/trandoshan/internal/messaging_mock"
"github.com/golang/mock/gomock"
"github.com/nats-io/nats.go"
"strings"
"testing"
)
func TestCrawlURLForbiddenContentType(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
httpClientMock := http_mock.NewMockClient(mockCtrl)
url := "https://example.onion"
allowedContentTypes := []string{"text/plain"}
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
httpResponseMock.EXPECT().Headers().Return(map[string]string{"Content-Type": "image/png"})
httpClientMock.EXPECT().Get(url).Return(httpResponseMock, nil)
body, headers, err := crawURL(httpClientMock, url, allowedContentTypes)
if body != "" || headers != nil || err == nil {
t.Fail()
}
}
func TestCrawlURLSameContentType(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
httpClientMock := http_mock.NewMockClient(mockCtrl)
url := "https://example.onion"
allowedContentTypes := []string{"text/plain"}
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
httpResponseMock.EXPECT().Headers().Times(2).Return(map[string]string{"Content-Type": "text/plain"})
httpResponseMock.EXPECT().Body().Return(strings.NewReader("Hello"))
httpClientMock.EXPECT().Get(url).Return(httpResponseMock, nil)
body, headers, err := crawURL(httpClientMock, url, allowedContentTypes)
if err != nil {
t.Fail()
}
if body != "Hello" {
t.Fail()
}
if len(headers) != 1 {
t.Fail()
}
if headers["Content-Type"] != "text/plain" {
t.Fail()
}
}
func TestCrawlURLNoContentTypeFiltering(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
httpClientMock := http_mock.NewMockClient(mockCtrl)
url := "https://example.onion"
allowedContentTypes := []string{""}
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
httpResponseMock.EXPECT().Headers().Times(2).Return(map[string]string{"Content-Type": "text/plain"})
httpResponseMock.EXPECT().Body().Return(strings.NewReader("Hello"))
httpClientMock.EXPECT().Get(url).Return(httpResponseMock, nil)
body, headers, err := crawURL(httpClientMock, url, allowedContentTypes)
if err != nil {
t.Fail()
}
if body != "Hello" {
t.Fail()
}
if len(headers) != 1 {
t.Fail()
}
if headers["Content-Type"] != "text/plain" {
t.Fail()
}
}
func TestHandleMessage(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
subscriberMock := messaging_mock.NewMockSubscriber(mockCtrl)
httpClientMock := http_mock.NewMockClient(mockCtrl)
httpResponseMock := http_mock.NewMockResponse(mockCtrl)
msg := nats.Msg{}
subscriberMock.EXPECT().
ReadMsg(&msg, &messaging.URLTodoMsg{}).
SetArg(1, messaging.URLTodoMsg{URL: "https://example.onion/image.png?id=12&test=2"}).
Return(nil)
httpResponseMock.EXPECT().Headers().Times(2).Return(map[string]string{"Content-Type": "text/plain", "Server": "Debian"})
httpResponseMock.EXPECT().Body().Return(strings.NewReader("Hello"))
httpClientMock.EXPECT().Get("https://example.onion/image.png?id=12&test=2").Return(httpResponseMock, nil)
subscriberMock.EXPECT().PublishMsg(&messaging.NewResourceMsg{
URL: "https://example.onion/image.png?id=12&test=2",
Body: "Hello",
Headers: map[string]string{"Content-Type": "text/plain", "Server": "Debian"},
}).Return(nil)
if err := handleMessage(httpClientMock, []string{"text/plain", "text/css"})(subscriberMock, &msg); err != nil {
t.Fail()
}
}

@ -0,0 +1,52 @@
package http
//go:generate mockgen -destination=../http_mock/client_mock.go -package=http_mock . Client
import (
"fmt"
"github.com/valyala/fasthttp"
)
// Client is an HTTP client
type Client interface {
// Get the corresponding URL
// this methods follows redirections
Get(URL string) (Response, error)
}
type client struct {
c *fasthttp.Client
}
// NewFastHTTPClient create a new Client using fasthttp.Client as backend
func NewFastHTTPClient(c *fasthttp.Client) Client {
return &client{c: c}
}
func (c *client) Get(URL string) (Response, error) {
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseRequest(req)
defer fasthttp.ReleaseResponse(resp)
req.SetRequestURI(URL)
if err := c.c.Do(req, resp); err != nil {
return nil, err
}
switch code := resp.StatusCode(); {
case code > 302:
return nil, fmt.Errorf("non-managed error code %d", code)
// follow redirect
case code == 301 || code == 302:
if location := string(resp.Header.Peek("Location")); location != "" {
return c.Get(location)
}
}
r := &response{}
resp.CopyTo(&r.raw)
return r, nil
}

@ -0,0 +1,33 @@
package http
//go:generate mockgen -destination=../http_mock/response_mock.go -package=http_mock . Response
import (
"bytes"
"github.com/valyala/fasthttp"
"io"
)
// Response is an HTTP response
type Response interface {
// Headers returns the response headers
Headers() map[string]string
// Body return the response body
Body() io.Reader
}
type response struct {
raw fasthttp.Response
}
func (r *response) Headers() map[string]string {
headers := map[string]string{}
r.raw.Header.VisitAll(func(key, value []byte) {
headers[string(key)] = string(value) // TODO manage multiple values?
})
return headers
}
func (r *response) Body() io.Reader {
return bytes.NewReader(r.raw.Body())
}
Loading…
Cancel
Save