Add crawl of TPB 48h top to crawl-rss

I still miss most of the trending torrents because I source new torrents mainly from the DHT.
4 years ago · 8fdc8d6508
parent 6f9f970979
commit 8fdc8d6508
5 changed files with 78 additions and 4 deletions
--- a/crawl-rss/crawlrss.service
+++ b/crawl-rss/crawlrss.service
@ -1,5 +1,5 @@
 [Unit]
-Description=Automatic crawl of new torrent feeds
+Description=Automatic crawl of new torrent feeds. Also automatically refreshes the index every hour.
 Requires=postgresql

 [Service]
--- a/crawl-rss/main.go
+++ b/crawl-rss/main.go
@ -21,6 +21,7 @@ type Torrent struct {
 func main() {
 	db := initDb()
 	crawled := make(map[string]bool) // set to not needlessly send all torrents to db to check if we found them already
+	var i int
 	for {
 		torrents := CrawlYts()
 		for _, torrent := range torrents {
@ -30,6 +31,16 @@ func main() {
 		for _, torrent := range torrents {
 			addTorrent(db, torrent, crawled)
 		}
+		if i%10 == 0 {
+			torrents = CrawlTPB48hTop()
+			for _, torrent := range torrents {
+				addTorrent(db, torrent, crawled)
+			}
+			if len(torrents) == 0 {
+				log.Println("weird, no torrents crawled from TPB")
+			}
+		}
+		i++
 		time.Sleep(time.Minute * 60)
 		go refresh(db)
 	}
@ -72,6 +83,9 @@ func CrawlYts() []Torrent {
 	return torrents
 }

+//TODO https://rarbg.to/rssdd.php?category=2;14;15;16;17;21;22;42;18;19;41;27;28;29;30;31;32;40;23;24;25;26;33;34;43;44;45;46;47;48;49;50;51;52;54
+// ^^ rarbg w/o porn
+
 func CrawlEztv() []Torrent { //maybe is there some kind of interface that this can share with CrawlYts? This function has the same signature and purpose.
 	fp := gofeed.NewParser()
 	feed, err := fp.ParseURL("https://eztv.io/ezrss.xml")
--- a/crawl-rss/main_test.go
+++ b/crawl-rss/main_test.go
@ -1,8 +1,6 @@
 package main

-import (
-	"testing"
-)
+import "testing"

 func TestCrawlYts(t *testing.T) {
 	torrents := CrawlYts()
--- a/crawl-rss/tpb.go
+++ b/crawl-rss/tpb.go
@ -0,0 +1,41 @@
+package main
+
+import (
+	"encoding/json"
+	"io/ioutil"
+	"log"
+	"net/http"
+)
+
+// CrawlTPB48hTop crawls https://apibay.org/precompiled/data_top100_48h.json
+func CrawlTPB48hTop() []Torrent {
+	return parseApibayJSON("https://apibay.org/precompiled/data_top100_48h.json")
+}
+
+func parseApibayJSON(url string) []Torrent {
+	httpresp, err := http.Get(url)
+	if err != nil {
+		log.Println(err)
+		return nil
+	}
+	defer httpresp.Body.Close()
+	body, err := ioutil.ReadAll(httpresp.Body)
+
+	var resp []ApibayTorrent
+	err = json.Unmarshal(body, &resp)
+
+	var torrents []Torrent
+	for _, apibayTorr := range resp {
+		torrents = append(torrents, Torrent{apibayTorr.Info_hash, apibayTorr.Name, apibayTorr.Size})
+	}
+	return torrents
+}
+
+// ApibayTorrent Structure returned from apibay. For unmarshaling from JSON. Not all fields that are returned from Apibay are in this struct; YAGNI
+type ApibayTorrent struct {
+	ID        int
+	Info_hash string
+	Name      string
+	Size      int
+	Added     int
+}
--- a/crawl-rss/tpb_test.go
+++ b/crawl-rss/tpb_test.go
@ -0,0 +1,21 @@
+package main
+
+import (
+	"strconv"
+	"testing"
+)
+
+func TestCrawlTPB48hTop(t *testing.T) {
+	torrents := CrawlTPB48hTop()
+	if len(torrents) < 1 {
+		t.Error("no torrents crawled from tpb")
+	}
+	for i, torrent := range torrents {
+		if torrent.Length < 10 {
+			t.Error("bad length of torrent "+strconv.Itoa(i))
+		}
+		if len(torrent.Name) < 2 {
+			t.Error("weirdly short name of torrent "+strconv.Itoa(i))
+		}
+	}
+}