From 8fdc8d650897ac5fc8663ea7b67dd444a567e780 Mon Sep 17 00:00:00 2001 From: Urban Guacamole Date: Sun, 5 Jul 2020 12:31:28 +0200 Subject: [PATCH] Add crawl of TPB 48h top to crawl-rss I still miss most of the trending torrents because I source new torrents mainly from the DHT. --- crawl-rss/crawlrss.service | 2 +- crawl-rss/main.go | 14 +++++++++++++ crawl-rss/main_test.go | 4 +--- crawl-rss/tpb.go | 41 ++++++++++++++++++++++++++++++++++++++ crawl-rss/tpb_test.go | 21 +++++++++++++++++++ 5 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 crawl-rss/tpb.go create mode 100644 crawl-rss/tpb_test.go diff --git a/crawl-rss/crawlrss.service b/crawl-rss/crawlrss.service index 6d719e7..2427f82 100644 --- a/crawl-rss/crawlrss.service +++ b/crawl-rss/crawlrss.service @@ -1,5 +1,5 @@ [Unit] -Description=Automatic crawl of new torrent feeds +Description=Automatic crawl of new torrent feeds. Also automatically refreshes the index every hour. Requires=postgresql [Service] diff --git a/crawl-rss/main.go b/crawl-rss/main.go index 81f6b4c..b6149dc 100644 --- a/crawl-rss/main.go +++ b/crawl-rss/main.go @@ -21,6 +21,7 @@ type Torrent struct { func main() { db := initDb() crawled := make(map[string]bool) // set to not needlessly send all torrents to db to check if we found them already + var i int for { torrents := CrawlYts() for _, torrent := range torrents { @@ -30,6 +31,16 @@ func main() { for _, torrent := range torrents { addTorrent(db, torrent, crawled) } + if i%10 == 0 { + torrents = CrawlTPB48hTop() + for _, torrent := range torrents { + addTorrent(db, torrent, crawled) + } + if len(torrents) == 0 { + log.Println("weird, no torrents crawled from TPB") + } + } + i++ time.Sleep(time.Minute * 60) go refresh(db) } @@ -72,6 +83,9 @@ func CrawlYts() []Torrent { return torrents } +//TODO https://rarbg.to/rssdd.php?category=2;14;15;16;17;21;22;42;18;19;41;27;28;29;30;31;32;40;23;24;25;26;33;34;43;44;45;46;47;48;49;50;51;52;54 +// ^^ rarbg w/o porn + func CrawlEztv() []Torrent { //maybe is there some kind of interface that this can share with CrawlYts? This function has the same signature and purpose. fp := gofeed.NewParser() feed, err := fp.ParseURL("https://eztv.io/ezrss.xml") diff --git a/crawl-rss/main_test.go b/crawl-rss/main_test.go index d5e2c26..1f3c486 100644 --- a/crawl-rss/main_test.go +++ b/crawl-rss/main_test.go @@ -1,8 +1,6 @@ package main -import ( - "testing" -) +import "testing" func TestCrawlYts(t *testing.T) { torrents := CrawlYts() diff --git a/crawl-rss/tpb.go b/crawl-rss/tpb.go new file mode 100644 index 0000000..028e8c5 --- /dev/null +++ b/crawl-rss/tpb.go @@ -0,0 +1,41 @@ +package main + +import ( + "encoding/json" + "io/ioutil" + "log" + "net/http" +) + +// CrawlTPB48hTop crawls https://apibay.org/precompiled/data_top100_48h.json +func CrawlTPB48hTop() []Torrent { + return parseApibayJSON("https://apibay.org/precompiled/data_top100_48h.json") +} + +func parseApibayJSON(url string) []Torrent { + httpresp, err := http.Get(url) + if err != nil { + log.Println(err) + return nil + } + defer httpresp.Body.Close() + body, err := ioutil.ReadAll(httpresp.Body) + + var resp []ApibayTorrent + err = json.Unmarshal(body, &resp) + + var torrents []Torrent + for _, apibayTorr := range resp { + torrents = append(torrents, Torrent{apibayTorr.Info_hash, apibayTorr.Name, apibayTorr.Size}) + } + return torrents +} + +// ApibayTorrent Structure returned from apibay. For unmarshaling from JSON. Not all fields that are returned from Apibay are in this struct; YAGNI +type ApibayTorrent struct { + ID int + Info_hash string + Name string + Size int + Added int +} diff --git a/crawl-rss/tpb_test.go b/crawl-rss/tpb_test.go new file mode 100644 index 0000000..0671348 --- /dev/null +++ b/crawl-rss/tpb_test.go @@ -0,0 +1,21 @@ +package main + +import ( + "strconv" + "testing" +) + +func TestCrawlTPB48hTop(t *testing.T) { + torrents := CrawlTPB48hTop() + if len(torrents) < 1 { + t.Error("no torrents crawled from tpb") + } + for i, torrent := range torrents { + if torrent.Length < 10 { + t.Error("bad length of torrent "+strconv.Itoa(i)) + } + if len(torrent.Name) < 2 { + t.Error("weirdly short name of torrent "+strconv.Itoa(i)) + } + } +} \ No newline at end of file