no longer strip <sha1> tags

It's 2023, and wikia isn't wikia anymore.
Missing sha1 makes it impossible to import wikidump revisions deduplicated.
pull/473/head
yzqzss 10 months ago
parent 54d9d8051e
commit d57e42fcc6

@ -624,12 +624,6 @@ def getXMLPage(config={}, title='', verbose=True, session=None):
raise ExportAbortedError(config['index'])
if not "</page>" in xml:
raise PageMissingError(params['title'], xml)
else:
# strip these sha1s sums which keep showing up in the export and
# which are invalid for the XML schema (they only apply to
# revisions)
xml = re.sub(r'\n\s*<sha1>\w+</sha1>\s*\n', r'\n', xml)
xml = re.sub(r'\n\s*<sha1/>\s*\n', r'\n', xml)
yield xml.split("</page>")[0]

Loading…
Cancel
Save