working on the uploader to Internet Archive S3; launcher.py now do not explore subdirectories, just the current one '.'

git-svn-id: https://wikiteam.googlecode.com/svn/trunk@613 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95
pull/117/head
emijrp 12 years ago
parent 83ff174888
commit 7cdf391b74

@ -39,6 +39,7 @@ for wiki in wikis:
if f.startswith(prefix) and f.endswith('.7z'):
compressed = True
zipfilename = f
break #stop searching, dot not explore subdirectories
if compressed:
print 'Skipping... This wiki was downloaded and compressed before in', zipfilename
@ -60,6 +61,7 @@ for wiki in wikis:
if d.startswith(prefix):
wikidir = d
started = True
break #stop searching, dot not explore subdirectories
if started and wikidir: #then resume
print 'Resuming download, using directory', wikidir
@ -72,6 +74,7 @@ for wiki in wikis:
for d in dirnames:
if d.startswith(prefix):
wikidir = d
break #stop searching, dot not explore subdirectories
#compress
prefix = wikidir.split('-wikidump')[0]

@ -19,8 +19,12 @@
# https://wiki.archive.org/twiki/bin/view/Main/IAS3BulkUploader
# http://en.ecgpedia.org/api.php?action=query&meta=siteinfo&siprop=rightsinfo
import os
import re
import subprocess
import urllib
"""
log = subprocess.check_output(['curl', '--location',
'--header', "'x-amz-auto-make-bucket:1",
'--header', "'x-archive-queue-derive:0",
@ -29,7 +33,7 @@ log = subprocess.check_output(['curl', '--location',
'--header', "'x-archive-meta-mediatype:web'",
'--header', "'x-archive-meta-collection:opensource'",
'--header', "'x-archive-meta-title:Wiki - ECGpedia'",
'--header', """'x-archive-meta-description:<a href="http://en.ecgpedia.org/" rel="nofollow">ECGpedia,</a>: a free electrocardiography (ECG) tutorial and textbook to which anyone can contribute, designed for medical professionals such as cardiac care nurses and physicians. Dumped with <a href="http://code.google.com/p/wikiteam/" rel="nofollow">WikiTeam</a> tool.'"""
'--header', "'x-archive-meta-description:<a href=\"http://en.ecgpedia.org/\" rel=\"nofollow\">ECGpedia,</a>: a free electrocardiography (ECG) tutorial and textbook to which anyone can contribute, designed for medical professionals such as cardiac care nurses and physicians. Dumped with <a href=\"http://code.google.com/p/wikiteam/\" rel=\"nofollow\">WikiTeam</a> tool.'"
'--header', "'x-archive-meta-subject:ecg; ECGpedia; wiki; wikiteam; MediaWiki'",
'--header', "'x-archive-meta-licenseurl:http://creativecommons.org/licenses/by-nc-sa/3.0/'",
'--header', "'x-archive-meta-rights:http://en.ecgpedia.org/wiki/Frequently_Asked_Questions'",
@ -37,4 +41,19 @@ log = subprocess.check_output(['curl', '--location',
'--upload-file', "/home/.../ArchiveTeam/WikiTeam/enecgpediaorg-20120419-wikidump.7z",
"http://s3.us.archive.org/wiki-en.ecgpedia.org/enecgpediaorg-20120419-wikidump.7z"
])
"""
def upload(f):
print f
wikis = []
def main():
for dirname, dirnames, filenames in os.walk('.'):
if dirname == '.':
for f in filenames:
if f.endswith('-wikidump.7z') or f.endswith('-history.xml.7z'):
upload(f)
break
if __name__ == "__main__":
main()

Loading…
Cancel
Save