Catch HTTP Error 405 and switch from POST to GET for API requests

Seen on http://wiki.ainigma.eu/index.php?title=Hlavn%C3%AD_strana:
HTTPError: HTTP Error 405: Method Not Allowed
pull/359/head
Federico Leva 4 years ago
parent 8b5378f991
commit 49017e3f20

@ -840,7 +840,12 @@ def getXMLRevisions(config={}, session=None, allpages=False):
print("Trying to get wikitext from the allrevisions API and to build the XML")
while True:
try:
arvrequest = site.api(**arvparams)
arvrequest = site.api(http_method=config['http_method'], **arvparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
continue
except requests.exceptions.ReadTimeout as err:
# Hopefully temporary, just wait a bit and continue with the same request.
# No point putting a limit to retries, we'd need to abort everything.
@ -865,7 +870,13 @@ def getXMLRevisions(config={}, session=None, allpages=False):
print("Trying to list the revisions and to export them one by one")
# We only need the revision ID, all the rest will come from the raw export
arvparams['arvprop'] = 'ids'
arvrequest = site.api(**arvparams)
try:
arvrequest = site.api(http_method=config['http_method'], **arvparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
continue
exportparams = {
'action': 'query',
'export': '1',
@ -888,7 +899,14 @@ def getXMLRevisions(config={}, session=None, allpages=False):
# chooses to give us only the latest for each page
for revid in revids:
exportparams['revids'] = revid
exportrequest = site.api(**exportparams)
try:
exportrequest = site.api(http_method=config['http_method'], **exportparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
exportrequest = site.api(http_method=config['http_method'], **exportparams)
# This gives us a self-standing <mediawiki> element
# but we only need the inner <page>: we can live with
# duplication and non-ordering of page titles, but the
@ -900,7 +918,12 @@ def getXMLRevisions(config={}, session=None, allpages=False):
# Get the new ones
arvparams['arvcontinue'] = arvrequest['continue']['arvcontinue']
try:
arvrequest = site.api(**arvparams)
arvrequest = site.api(http_method=config['http_method'], **arvparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
arvrequest = site.api(http_method=config['http_method'], **arvparams)
except requests.exceptions.ReadTimeout as err:
# As above
print("ERROR: {}".format(str(err)))
@ -932,7 +955,14 @@ def getXMLRevisions(config={}, session=None, allpages=False):
'titles': title,
'export': '1',
}
exportrequest = site.api(**exportparams)
try:
exportrequest = site.api(http_method=config['http_method'], **exportparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
exportrequest = site.api(http_method=config['http_method'], **exportparams)
xml = exportrequest['query']['export']['*']
c += 1
if c % 10 == 0:
@ -959,7 +989,14 @@ def getXMLRevisions(config={}, session=None, allpages=False):
'rvlimit': 50,
'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content',
}
prequest = site.api(**pparams)
try:
prequest = site.api(http_method=config['http_method'], **pparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
exportrequest = site.api(http_method=config['http_method'], **exportparams)
c += 1
if c % 10 == 0:
print('Downloaded {} pages'.format(c))
@ -987,7 +1024,13 @@ def getXMLRevisions(config={}, session=None, allpages=False):
if 'continue' in prequest.keys():
print("Getting more revisions for page {}".format(title))
pparams['rvcontinue'] = prequest['continue']['rvcontinue']
prequest = site.api(**pparams)
try:
prequest = site.api(http_method=config['http_method'], **pparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
prequest = site.api(http_method=config['http_method'], **pparams)
# mwclient seems to rewrite query-continue
#if 'query-continue' in prequest.keys():
# pparams['rvcontinue'] = prequest['query-continue']['revisions']['rvcontinue']
@ -1826,6 +1869,7 @@ def getParameters(params=[]):
'date': datetime.datetime.now().strftime('%Y%m%d'),
'api': api,
'failfast': args.failfast,
'http_method': "POST",
'index': index,
'images': args.images,
'logs': False,

Loading…
Cancel
Save