Add crawl of TPB 48h top to crawl-rss

I still miss most of the trending torrents because I source new torrents mainly from the DHT.
pull/27/head
Urban Guacamole 4 years ago
parent 6f9f970979
commit 8fdc8d6508

@ -1,5 +1,5 @@
[Unit]
Description=Automatic crawl of new torrent feeds
Description=Automatic crawl of new torrent feeds. Also automatically refreshes the index every hour.
Requires=postgresql
[Service]

@ -21,6 +21,7 @@ type Torrent struct {
func main() {
db := initDb()
crawled := make(map[string]bool) // set to not needlessly send all torrents to db to check if we found them already
var i int
for {
torrents := CrawlYts()
for _, torrent := range torrents {
@ -30,6 +31,16 @@ func main() {
for _, torrent := range torrents {
addTorrent(db, torrent, crawled)
}
if i%10 == 0 {
torrents = CrawlTPB48hTop()
for _, torrent := range torrents {
addTorrent(db, torrent, crawled)
}
if len(torrents) == 0 {
log.Println("weird, no torrents crawled from TPB")
}
}
i++
time.Sleep(time.Minute * 60)
go refresh(db)
}
@ -72,6 +83,9 @@ func CrawlYts() []Torrent {
return torrents
}
//TODO https://rarbg.to/rssdd.php?category=2;14;15;16;17;21;22;42;18;19;41;27;28;29;30;31;32;40;23;24;25;26;33;34;43;44;45;46;47;48;49;50;51;52;54
// ^^ rarbg w/o porn
func CrawlEztv() []Torrent { //maybe is there some kind of interface that this can share with CrawlYts? This function has the same signature and purpose.
fp := gofeed.NewParser()
feed, err := fp.ParseURL("https://eztv.io/ezrss.xml")

@ -1,8 +1,6 @@
package main
import (
"testing"
)
import "testing"
func TestCrawlYts(t *testing.T) {
torrents := CrawlYts()

@ -0,0 +1,41 @@
package main
import (
"encoding/json"
"io/ioutil"
"log"
"net/http"
)
// CrawlTPB48hTop crawls https://apibay.org/precompiled/data_top100_48h.json
func CrawlTPB48hTop() []Torrent {
return parseApibayJSON("https://apibay.org/precompiled/data_top100_48h.json")
}
func parseApibayJSON(url string) []Torrent {
httpresp, err := http.Get(url)
if err != nil {
log.Println(err)
return nil
}
defer httpresp.Body.Close()
body, err := ioutil.ReadAll(httpresp.Body)
var resp []ApibayTorrent
err = json.Unmarshal(body, &resp)
var torrents []Torrent
for _, apibayTorr := range resp {
torrents = append(torrents, Torrent{apibayTorr.Info_hash, apibayTorr.Name, apibayTorr.Size})
}
return torrents
}
// ApibayTorrent Structure returned from apibay. For unmarshaling from JSON. Not all fields that are returned from Apibay are in this struct; YAGNI
type ApibayTorrent struct {
ID int
Info_hash string
Name string
Size int
Added int
}

@ -0,0 +1,21 @@
package main
import (
"strconv"
"testing"
)
func TestCrawlTPB48hTop(t *testing.T) {
torrents := CrawlTPB48hTop()
if len(torrents) < 1 {
t.Error("no torrents crawled from tpb")
}
for i, torrent := range torrents {
if torrent.Length < 10 {
t.Error("bad length of torrent "+strconv.Itoa(i))
}
if len(torrent.Name) < 2 {
t.Error("weirdly short name of torrent "+strconv.Itoa(i))
}
}
}
Loading…
Cancel
Save