From 2c21eadf7c456b7ea1efe233d5eeaa45e29d3ee3 Mon Sep 17 00:00:00 2001
From: Federico Leva <federicoleva@tiscali.it>
Date: Mon, 10 Feb 2020 22:32:01 +0200
Subject: [PATCH] Wikia: make getXMLHeader() check more lenient,

Otherwise we end up using Special:Export even though the export API
would work perfectly well with --xmlrevisions.

May also fix images on fandom.com:
https://github.com/WikiTeam/wikiteam/issues/330
---
 dumpgenerator.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/dumpgenerator.py b/dumpgenerator.py
index 924a02e..071a458 100755
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -423,10 +423,24 @@ def getXMLHeader(config={}, session=None):
             # Export and exportnowrap exist from MediaWiki 1.15, allpages from 1.18
             r = session.get(config['api'] + '?action=query&export=1&exportnowrap=1&list=allpages&aplimit=1', timeout=10)
             xml = r.text
+            # Otherwise try without exportnowrap, e.g. Wikia returns a blank page on 1.19
+            if not xml:
+                r = session.get(config['api'] + '?action=query&export=1&list=allpages&aplimit=1&format=json', timeout=10)
+                try:
+                    xml = r.json()['query']['export']['*']
+                except KeyError:
+                    xml = None
             if not xml:
                 # Do without a generator, use our usual trick of a random page title
                 r = session.get(config['api'] + '?action=query&export=1&exportnowrap=1&titles=' + randomtitle, timeout=10)
                 xml = r.text
+            # Again try without exportnowrap
+            if not xml:
+                r = session.get(config['api'] + '?action=query&export=1&format=json&titles=' + randomtitle, timeout=10)
+                try:
+                    xml = r.json()['query']['export']['*']
+                except KeyError:
+                    xml = None
         except requests.exceptions.RetryError:
             pass
 
@@ -1302,7 +1316,7 @@ def getImageNamesAPI(config={}, session=None):
                 url = curateImageURL(config=config, url=url)
                 # encoding to ascii is needed to work around this horrible bug:
                 # http://bugs.python.org/issue8136
-                if 'api' in config and '.wikia.com' in config['api']:
+                if 'api' in config and ('.wikia.' in config['api'] or '.fandom.com' in config['api']):
                     #to avoid latest?cb=20120816112532 in filenames
                     filename = unicode(urllib.unquote((re.sub('_', ' ', url.split('/')[-3])).encode('ascii', 'ignore')), 'utf-8')
                 else: