handle rss feeds

updater
devrandom 13 years ago
parent 4d29dd2899
commit d22d3bc1f3

@ -6,24 +6,21 @@ Construct a download config:
---
signers:
989F6B3048A116B5:
BF6273FAEF7CC0BA1F562E50989F6B3048A116B5:
weight: 20
name: Devrandom
key: |-
-----BEGIN PGP PUBLIC KEY BLOCK-----
Version: GnuPG v1.4.10 (GNU/Linux)
mQINBE2OgyMBEAC/ZNlctrNpVk1CUYbIflQtNqybqGPFzxp2F+EYdMfEXvR9e7bP
...
key: devrandom
minimum_weight: 30
the keys can be extracted with:
The keys can be extracted with:
gpg --export-options export-minimal --export KEYID
gpg --export-options export-minimal --export -a KEYID
and saved into devrandom-key.pgp (see "key" in signers).
the long key id can be obtained with:
The long key id can be obtained with:
gpg -kv --keyid-format long KEYID
gpg --status-fd 1 --dry-run --import KEYFILE
## ZIP file

@ -1,3 +1,5 @@
Downloader
* incremenral update of dest directory
* incremental update of dest directory
* check version #
* json

@ -23,6 +23,7 @@ import re
import tempfile
import atexit
import urllib2
import libxml2
import argparse
import yaml
from zipfile import ZipFile
@ -44,13 +45,15 @@ def remove_temp(tdir):
shutil.rmtree(tdir)
def download(url, dest):
if quiet == 0:
print "Downloading from %s"%(url)
file_name = url.split('/')[-1]
u = urllib2.urlopen(url)
f = open(dest, 'w')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
if quiet == 0:
print "Downloading: %s Bytes: %s" % (file_name, file_size)
print "Downloading: %s Bytes: %s"%(file_name, file_size)
file_size_dl = 0
block_sz = 65536
@ -59,7 +62,7 @@ def download(url, dest):
if not buffer:
break
file_size_dl += block_sz
file_size_dl += len(buffer)
f.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
@ -237,15 +240,18 @@ else:
if not args.dest:
parser.error('argument -d/--dest is required unless -m is specified')
rsses = []
if args.url:
urls = args.url
else:
urls = config['urls']
if config.has_key('rss'):
rsses = config['rss']
if not urls:
parser.error('argument -u/--url is required since config does not specify it')
# TODO: handle multiple urls, rss, atom, etc.
url = urls[0]
# TODO: rss, atom, etc.
if path.exists(args.dest):
print>>sys.stderr, "destination already exists, please remove it first"
@ -256,7 +262,41 @@ temp_dir = tempfile.mkdtemp('', prog)
atexit.register(remove_temp, temp_dir)
package_file = path.join(temp_dir, 'package')
download(url, package_file)
downloaded = False
for rss in rsses:
try:
feed = libxml2.parseDoc(urllib2.urlopen(rss['url']).read())
url = None
for node in feed.xpathEval(rss['xpath']):
if re.search(rss['pattern'], str(node)):
url = str(node)
break
try:
download(url, package_file)
downloaded = True
break
except:
print>>sys.stderr, "could not download from %s, trying next rss"%(url)
pass
except:
print>>sys.stderr, "could read not from rss %s"%(rss)
pass
if not downloaded:
for url in urls:
try:
download(url, package_file)
downloaded = True
break
except:
print>>sys.stderr, "could not download from %s, trying next url"%(url)
pass
if not downloaded:
print>>sys.stderr, "out of places to download from, try later"
exit(1)
unpack_dir = path.join(temp_dir, 'unpack')
files = extract(unpack_dir, package_file)

Loading…
Cancel
Save