diff --git a/internal/blacklister/blacklister.go b/internal/blacklister/blacklister.go index 238d639..0863a08 100644 --- a/internal/blacklister/blacklister.go +++ b/internal/blacklister/blacklister.go @@ -27,6 +27,19 @@ func (state *State) Name() string { return "blacklister" } +// Description return the process description +func (state *State) Description() string { + return ` +The blacklisting component. It consumes timeout URL event and will try to +crawl the hostname index page to determinate if the whole hostname does not +respond. If the hostname does not respond after a retry policy, it will +be blacklisted by the process and further crawling event involving the hostname +will be discarded by the crawling process. This allow us to not waste time +crawling for nothing. + +This process consumes the 'url.timeout' event.` +} + // Features return the process features func (state *State) Features() []process.Feature { return []process.Feature{process.EventFeature, process.ConfigFeature, process.CacheFeature, process.CrawlingFeature} diff --git a/internal/configapi/configapi.go b/internal/configapi/configapi.go index 23d8455..a4407ce 100644 --- a/internal/configapi/configapi.go +++ b/internal/configapi/configapi.go @@ -24,6 +24,19 @@ func (state *State) Name() string { return "configapi" } +// Description return the process description +func (state *State) Description() string { + return ` +The ConfigAPI component. It serves as a centralized K/V database +with notification support. +This component expose a REST API to allow other process to retrieve +configuration as startup time, and to allow value update at runtime. +Each time a configuration is update trough the API, an event will +be dispatched so that running processes can update their local values. + +This component produces the 'config' event.` +} + // Features return the process features func (state *State) Features() []process.Feature { return []process.Feature{process.EventFeature, process.CacheFeature} diff --git a/internal/crawler/crawler.go b/internal/crawler/crawler.go index 70bffc2..5137fb6 100644 --- a/internal/crawler/crawler.go +++ b/internal/crawler/crawler.go @@ -32,6 +32,17 @@ func (state *State) Name() string { return "crawler" } +// Description return the process description +func (state *State) Description() string { + return ` +The crawling component. It consumes URL, crawl the resource, and +publish the result (page content + headers). + +The crawler consumes the 'url.new' event and produces either: +- 'url.timeout' event if the crawling has failed because of timeout issue +- 'resource.new' event if the crawling has succeeded.` +} + // Features return the process features func (state *State) Features() []process.Feature { return []process.Feature{process.EventFeature, process.ConfigFeature, process.CrawlingFeature} diff --git a/internal/indexer/indexer.go b/internal/indexer/indexer.go index fd20263..c62804b 100644 --- a/internal/indexer/indexer.go +++ b/internal/indexer/indexer.go @@ -29,6 +29,15 @@ func (state *State) Name() string { return "indexer" } +// Description return the process description +func (state *State) Description() string { + return ` +The indexing component. It consumes crawled resources, format +them and finally index them using the configured driver. + +This component consumes the 'resource.new' event.` +} + // Features return the process features func (state *State) Features() []process.Feature { return []process.Feature{process.EventFeature, process.ConfigFeature} diff --git a/internal/process/process.go b/internal/process/process.go index eb9e55c..39355f6 100644 --- a/internal/process/process.go +++ b/internal/process/process.go @@ -138,6 +138,7 @@ type SubscriberDef struct { // Process is a component of Bathyscaphe type Process interface { Name() string + Description() string Features() []Feature CustomFlags() []cli.Flag Initialize(provider Provider) error @@ -148,9 +149,10 @@ type Process interface { // MakeApp return cli.App corresponding for given Process func MakeApp(process Process) *cli.App { app := &cli.App{ - Name: fmt.Sprintf("bs-%s", process.Name()), - Version: version, - Usage: fmt.Sprintf("Bathyscaphe %s component", process.Name()), + Name: fmt.Sprintf("bs-%s", process.Name()), + Version: version, + Usage: fmt.Sprintf("Bathyscaphe %s component", process.Name()), + Description: process.Description(), Flags: []cli.Flag{ &cli.StringFlag{ Name: "log-level", diff --git a/internal/scheduler/scheduler.go b/internal/scheduler/scheduler.go index eacb9dc..1e2ccb4 100644 --- a/internal/scheduler/scheduler.go +++ b/internal/scheduler/scheduler.go @@ -38,6 +38,18 @@ func (state *State) Name() string { return "scheduler" } +// Description return the process description +func (state *State) Description() string { + return ` +The scheduling component. It extracts URLs from crawled resources +and apply a predicate to determinate if the URL is eligible +for crawling. If it is, it will publish a event and update the +scheduling cache. + +This component consumes the 'resource.new' event and produces +the 'url.new' event.` +} + // Features return the process features func (state *State) Features() []process.Feature { return []process.Feature{process.EventFeature, process.ConfigFeature, process.CacheFeature}