wikiapiary bots

pull/271/head
emijrp 8 years ago
parent 5db991bfbb
commit 6dabef5980

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2016 WikiTeam
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import urllib
import pywikibot
from pywikibot import pagegenerators
def main():
site = pywikibot.Site('wikiapiary', 'wikiapiary')
catname = 'Category:Website'
cat = pywikibot.Category(site, catname)
gen = pagegenerators.CategorizedPageGenerator(cat)
pre = pagegenerators.PreloadingGenerator(gen)
for page in pre:
if page.isRedirectPage():
continue
wtitle = page.title()
wtext = page.text
if re.search('Internet Archive', wtext):
#print('It has IA parameter')
pass
else:
print('\n','#'*50,'\n',wtitle,'\n','#'*50)
print('https://wikiapiary.com/wiki/%s' % (re.sub(' ', ',¡_', wtitle)))
print('Missing IA parameter')
if re.search(r'(?i)API URL=http', wtext):
apiurl = re.findall(r'(?i)API URL=(http[^\n]+?)\n', wtext)[0]
print('API:', apiurl)
else:
print('No API found in WikiApiary, skiping')
continue
urliasearch = 'https://archive.org/search.php?query=originalurl:"%s"' % (apiurl)
f = urllib.request.urlopen(urliasearch)
raw = f.read().decode('utf-8')
if re.search(r'(?i)Your search did not match any items', raw):
print('No dumps found at Internet Archive')
else:
itemidentifier = re.findall(r'<a href="/details/([^ ]+?)" title=', raw)[0]
itemurl = 'https://archive.org/details/%s' % (itemidentifier)
print('Item found:',itemurl)
metaurl = 'https://archive.org/download/%s/%s_files.xml' % (itemidentifier, itemidentifier)
g = urllib.request.urlopen(metaurl)
raw2 = g.read().decode('utf-8')
itemfiles = re.findall(r'(?im)<file name="[^ ]+-(\d{8})-[^ ]+" source="original">\s*<mtime>\d+</mtime>\s*<size>(\d+)</size>', raw2)
itemfiles = [[int(x), int(y)] for x, y in itemfiles]
itemfiles.sort(reverse=True)
print(itemfiles)
itemdate = str(itemfiles[0][0])[0:4] + '/' + str(itemfiles[0][0])[4:6] + '/' + str(itemfiles[0][0])[6:8]
itemsize = itemfiles[0][1]
iaparams = """|Internet Archive identifier=%s
|Internet Archive URL=%s
|Internet Archive added date=%s 00:00:00
|Internet Archive file size=%s""" % (itemidentifier, itemurl, itemdate, itemsize)
newtext = page.text
newtext = re.sub(r'(?im)\}\}\n', '%s\n}}\n' % (iaparams), newtext)
pywikibot.showDiff(page.text, newtext)
page.text = newtext
page.save('BOT - Adding dump details: %s, %s, %s bytes' % (itemidentifier, itemdate, itemsize))
if __name__ == "__main__":
main()

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
"""Family module for WikiApiary wiki."""
from __future__ import unicode_literals
__version__ = '$Id: 8c9856dd7c0af8d400d0d95b00bf406002729008 $'
from pywikibot import family
# The MediaWiki family
# user-config.py: usernames['wikiapiary']['wikiapiary'] = 'User name'
class Family(family.WikimediaFamily):
"""Family module for WikiApiary wiki."""
def __init__(self):
"""Constructor."""
super(Family, self).__init__()
self.name = 'wikiapiary'
self.langs = {
'wikiapiary': 'wikiapiary.com',
}
# Wikimedia wikis all use "bodyContent" as the id of the <div>
# element that contains the actual page content; change this for
# wikis that use something else (e.g., mozilla family)
self.content_id = "bodyContent"
def scriptpath(self, code):
"""The prefix used to locate scripts on this wiki.
This is the value displayed when you enter {{SCRIPTPATH}} on a
wiki page (often displayed at [[Help:Variables]] if the wiki has
copied the master help page correctly).
The default value is the one used on Wikimedia Foundation wikis,
but needs to be overridden in the family file for any wiki that
uses a different value.
"""
return '/w'
# Which version of MediaWiki is used? REQUIRED
def version(self, code):
# Replace with the actual version being run on your wiki
return '1.25.3'
def code2encoding(self, code):
"""Return the encoding for a specific language wiki"""
# Most wikis nowadays use UTF-8, but change this if yours uses
# a different encoding
return 'utf-8'
def path(self, code):
return '/w/index.php'
def apipath(self, code):
return '/w/api.php'
def protocol(self, code):
return 'HTTPS'
Loading…
Cancel
Save