Finalize whole implementation

pull/24/head
Aloïs Micard 4 years ago
parent 6081a6a7c2
commit 742ccbaa79
No known key found for this signature in database
GPG Key ID: 1A0EB82F071F5EFE

@ -34,11 +34,19 @@ Ensure you have at least 3GB of memory as the Elasticsearch stack docker will re
Since the API is explosed on localhost:15005, one can use it to start the crawling process:
using trandoshanctl executable:
```sh
trandoshanctl schedule https://www.facebookcorewwwi.onion
```
or using the docker image:
```sh
feeder --api-uri http://localhost:15005 --url https://www.facebookcorewwwi.onion
docker run creekorful/trandoshanctl schedule https://www.facebookcorewwwi.onion
```
this will 'force' the API to publish given URL in crawling queue.
this will schedule given URL for crawling.
## How to view results

@ -23,7 +23,7 @@ type ResourceDto struct {
type Client interface {
SearchResources(url string) ([]ResourceDto, error)
AddResource(res ResourceDto) (ResourceDto, error)
AddURL(url string) error
ScheduleURL(url string) error
}
type client struct {
@ -51,7 +51,7 @@ func (c *client) AddResource(res ResourceDto) (ResourceDto, error) {
return resourceDto, err
}
func (c *client) AddURL(url string) error {
func (c *client) ScheduleURL(url string) error {
targetEndpoint := fmt.Sprintf("%s/v1/urls", c.baseURL)
_, err := jsonPost(c.httpClient, targetEndpoint, url, nil)
return err

@ -13,12 +13,12 @@ RUN go mod download
COPY . .
# Test then build app
RUN go build -v github.com/creekorful/trandoshan/cmd/tdsh-feeder
RUN go build -v github.com/creekorful/trandoshan/cmd/trandoshanctl
# runtime image
FROM alpine:latest
COPY --from=builder /app/tdsh-feeder /app/
COPY --from=builder /app/trandoshanctl /app/
WORKDIR /app/
ENTRYPOINT ["./tdsh-feeder"]
ENTRYPOINT ["./trandoshanctl"]

@ -1,12 +1,12 @@
package main
import (
"github.com/creekorful/trandoshan/internal/feeder"
"github.com/creekorful/trandoshan/internal/trandoshanctl"
"os"
)
func main() {
app := feeder.GetApp()
app := trandoshanctl.GetApp()
if err := app.Run(os.Args); err != nil {
os.Exit(1)
}

@ -52,28 +52,32 @@ func GetApp() *cli.App {
}
}
func execute(ctx *cli.Context) error {
logging.ConfigureLogger(ctx)
func execute(c *cli.Context) error {
logging.ConfigureLogger(c)
e := echo.New()
e.HideBanner = true
log.Info().Str("ver", ctx.App.Version).Msg("Starting tdsh-api")
log.Info().Str("ver", c.App.Version).Msg("Starting tdsh-api")
log.Debug().Str("uri", ctx.String("elasticsearch-uri")).Msg("Using Elasticsearch server")
log.Debug().Str("uri", ctx.String("nats-uri")).Msg("Using NATS server")
log.Debug().Str("uri", c.String("elasticsearch-uri")).Msg("Using Elasticsearch server")
log.Debug().Str("uri", c.String("nats-uri")).Msg("Using NATS server")
// Connect to the NATS server
nc, err := nats.Connect(ctx.String("nats-uri"))
nc, err := nats.Connect(c.String("nats-uri"))
if err != nil {
log.Err(err).Str("uri", ctx.String("nats-uri")).Msg("Error while connecting to NATS server")
log.Err(err).Str("uri", c.String("nats-uri")).Msg("Error while connecting to NATS server")
return err
}
defer nc.Close()
// Create Elasticsearch client
es, err := elastic.NewClient(
elastic.SetURL(ctx.String("elasticsearch-uri")),
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
es, err := elastic.DialContext(ctx,
elastic.SetURL(c.String("elasticsearch-uri")),
elastic.SetSniff(false),
elastic.SetHealthcheck(false),
)
if err != nil {
@ -81,10 +85,15 @@ func execute(ctx *cli.Context) error {
return err
}
// Setup ES one for all
if err := setupElasticSearch(ctx, es); err != nil {
return err
}
// Add endpoints
e.GET("/v1/resources", searchResources(es))
e.POST("/v1/resources", addResource(es))
e.POST("/v1/urls", addURL(nc))
e.POST("/v1/urls", scheduleURL(nc))
log.Info().Msg("Successfully initialized tdsh-api. Waiting for requests")
@ -155,11 +164,11 @@ func addResource(es *elastic.Client) echo.HandlerFunc {
log.Debug().Str("url", resourceDto.URL).Msg("Successfully saved resource")
return c.NoContent(http.StatusCreated)
return c.JSON(http.StatusCreated, resourceDto)
}
}
func addURL(nc *nats.Conn) echo.HandlerFunc {
func scheduleURL(nc *nats.Conn) echo.HandlerFunc {
return func(c echo.Context) error {
var url string
if err := json.NewDecoder(c.Request().Body).Decode(&url); err != nil {
@ -178,3 +187,22 @@ func addURL(nc *nats.Conn) echo.HandlerFunc {
return nil
}
}
func setupElasticSearch(ctx context.Context, es *elastic.Client) error {
// Setup index if doesn't exist
exist, err := es.IndexExists(resourcesIndex).Do(ctx)
if err != nil {
log.Err(err).Str("index", resourcesIndex).Msg("Error while checking if index exist")
return err
}
if !exist {
log.Debug().Str("index", resourcesIndex).Msg("Creating missing index")
if _, err := es.CreateIndex(resourcesIndex).Do(ctx); err != nil {
log.Err(err).Str("index", resourcesIndex).Msg("Error while creating index")
return err
}
} else {
log.Debug().Msg("index exist")
}
return nil
}

@ -1,49 +0,0 @@
package feeder
import (
"github.com/creekorful/trandoshan/api"
"github.com/creekorful/trandoshan/internal/util/logging"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
)
// GetApp return the feeder app
func GetApp() *cli.App {
return &cli.App{
Name: "tdsh-feeder",
Version: "0.3.0",
Usage: "Trandoshan feeder process",
Flags: []cli.Flag{
logging.GetLogFlag(),
&cli.StringFlag{
Name: "api-uri",
Usage: "URI to the API server",
Required: true,
},
&cli.StringFlag{
Name: "url",
Usage: "URL to send to the crawler",
Required: true,
},
},
Action: execute,
}
}
func execute(ctx *cli.Context) error {
logging.ConfigureLogger(ctx)
log.Info().Str("ver", ctx.App.Version).Msg("Starting tdsh-feeder")
log.Debug().Str("uri", ctx.String("api-uri")).Msg("Using API server")
apiClient := api.NewClient(ctx.String("api-uri"))
if err := apiClient.AddURL(ctx.String("url")); err != nil {
log.Err(err).Msg("Unable to publish URL")
return err
}
log.Info().Str("url", ctx.String("url")).Msg("URL successfully sent to the crawler")
return nil
}

@ -72,7 +72,7 @@ func handleMessage(apiClient api.Client) natsutil.MsgHandler {
return err
}
log.Debug().Str("url", urlMsg.URL).Msg("Processing URL: %s")
log.Debug().Str("url", urlMsg.URL).Msg("Processing URL")
u, err := url.Parse(urlMsg.URL)
if err != nil {

@ -0,0 +1,58 @@
package trandoshanctl
import (
"fmt"
"github.com/creekorful/trandoshan/api"
"github.com/creekorful/trandoshan/internal/util/logging"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
)
// GetApp returns the Trandoshan CLI app
func GetApp() *cli.App {
return &cli.App{
Name: "trandoshanctl",
Version: "0.3.0",
Usage: "Trandoshan CLI",
Flags: []cli.Flag{
logging.GetLogFlag(),
&cli.StringFlag{
Name: "api-uri",
Usage: "URI to the API server",
Value: "http://localhost:15005",
},
},
Commands: []*cli.Command{
{
Name: "schedule",
Usage: "Schedule crawling for given URL",
Action: schedule,
ArgsUsage: "URL",
},
},
Before: before,
}
}
func before(ctx *cli.Context) error {
logging.ConfigureLogger(ctx)
return nil
}
func schedule(c *cli.Context) error {
if c.NArg() == 0 {
return fmt.Errorf("missing argument URL")
}
url := c.Args().First()
apiClient := api.NewClient(c.String("api-uri"))
if err := apiClient.ScheduleURL(url); err != nil {
log.Err(err).Str("url", url).Msg("Unable to schedule crawling for URL")
return err
}
log.Info().Str("url", url).Msg("Successfully schedule crawling")
return nil
}

@ -7,7 +7,7 @@ if [ "$1" ]; then
fi
# build docker images
for path in build/docker/Dockerfile-*; do
name=$(echo "$path" | cut -d'-' -f2)
docker build . -f "$path" -t "creekorful/tdsh-$name:$tag"
for path in build/docker/Dockerfile.*; do
name=$(echo "$path" | cut -d'.' -f2)
docker build . -f "$path" -t "creekorful/$name:$tag"
done

Loading…
Cancel
Save