You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bathyscaphe/internal/blacklister/blacklister.go

191 lines
4.7 KiB
Go

package blacklister
import (
"fmt"
"github.com/darkspot-org/bathyscaphe/internal/cache"
configapi "github.com/darkspot-org/bathyscaphe/internal/configapi/client"
"github.com/darkspot-org/bathyscaphe/internal/event"
chttp "github.com/darkspot-org/bathyscaphe/internal/http"
"github.com/darkspot-org/bathyscaphe/internal/process"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
"net/http"
"net/url"
)
var errAlreadyBlacklisted = fmt.Errorf("hostname is already blacklisted")
// State represent the application state
type State struct {
configClient configapi.Client
hostnameCache cache.Cache
httpClient chttp.Client
}
// Name return the process name
func (state *State) Name() string {
return "blacklister"
}
// Description return the process description
func (state *State) Description() string {
return `
The blacklisting component. It consumes timeout URL event and will try to
crawl the hostname index page to determinate if the whole hostname does not
respond. If the hostname does not respond after a retry policy, it will
be blacklisted by the process and further crawling event involving the hostname
will be discarded by the crawling process. This allow us to not waste time
crawling for nothing.
This process consumes the 'url.timeout' event.`
}
// Features return the process features
func (state *State) Features() []process.Feature {
return []process.Feature{process.EventFeature, process.ConfigFeature, process.CacheFeature, process.CrawlingFeature}
}
// CustomFlags return process custom flags
func (state *State) CustomFlags() []cli.Flag {
return []cli.Flag{}
}
// Initialize the process
func (state *State) Initialize(provider process.Provider) error {
hostnameCache, err := provider.Cache("down-hostname")
if err != nil {
return err
}
state.hostnameCache = hostnameCache
configClient, err := provider.ConfigClient([]string{configapi.ForbiddenHostnamesKey, configapi.BlackListConfigKey})
if err != nil {
return err
}
state.configClient = configClient
httpClient, err := provider.HTTPClient()
if err != nil {
return err
}
state.httpClient = httpClient
return nil
}
// Subscribers return the process subscribers
func (state *State) Subscribers() []process.SubscriberDef {
return []process.SubscriberDef{
{Exchange: event.TimeoutURLExchange, Queue: "blacklistingQueue", Handler: state.handleTimeoutURLEvent},
}
}
// HTTPHandler returns the HTTP API the process expose
func (state *State) HTTPHandler() http.Handler {
return nil
}
func (state *State) handleTimeoutURLEvent(subscriber event.Subscriber, msg event.RawMessage) error {
var evt event.TimeoutURLEvent
if err := subscriber.Read(&msg, &evt); err != nil {
return err
}
u, err := url.Parse(evt.URL)
if err != nil {
return err
}
// Make sure hostname is not already 'blacklisted'
forbiddenHostnames, err := state.configClient.GetForbiddenHostnames()
if err != nil {
return err
}
// prevent duplicates
found := false
for _, hostname := range forbiddenHostnames {
if hostname.Hostname == u.Hostname() {
found = true
break
}
}
if found {
return fmt.Errorf("%s %w", u.Hostname(), errAlreadyBlacklisted)
}
// Check by ourselves if the hostname doesn't respond
_, err = state.httpClient.Get(fmt.Sprintf("%s://%s", u.Scheme, u.Host))
if err != nil && err != chttp.ErrTimeout {
return err
}
cacheKey := u.Hostname()
if err == nil {
log.Debug().
Str("hostname", u.Hostname()).
Msg("Response received.")
// Host is not down, remove it from cache
if err := state.hostnameCache.Remove(cacheKey); err != nil {
return err
}
return nil
}
log.Debug().
Str("hostname", u.Hostname()).
Msg("Timeout confirmed")
blackListConfig, err := state.configClient.GetBlackListConfig()
if err != nil {
return err
}
count, err := state.hostnameCache.GetInt64(cacheKey)
if err != nil {
return err
}
count++
if count >= blackListConfig.Threshold {
forbiddenHostnames, err := state.configClient.GetForbiddenHostnames()
if err != nil {
return err
}
// prevent duplicates
found := false
for _, hostname := range forbiddenHostnames {
if hostname.Hostname == u.Hostname() {
found = true
break
}
}
if found {
log.Trace().Str("hostname", u.Hostname()).Msg("Skipping duplicate hostname")
} else {
log.Info().
Str("hostname", u.Hostname()).
Int64("count", count).
Msg("Blacklisting hostname")
forbiddenHostnames = append(forbiddenHostnames, configapi.ForbiddenHostname{Hostname: u.Hostname()})
if err := state.configClient.Set(configapi.ForbiddenHostnamesKey, forbiddenHostnames); err != nil {
return err
}
}
}
// Update count
if err := state.hostnameCache.SetInt64(cacheKey, count, blackListConfig.TTL); err != nil {
return err
}
return nil
}