diff --git a/dumpgenerator.py b/dumpgenerator.py
index 1c99c24..678272b 100644
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -509,6 +509,12 @@ def getXMLPage(config={}, title='', verbose=True, session=None):
xml = getXMLPageCore(params=params, config=config, session=session)
if not xml:
raise PageMissingError
+ else:
+ # strip these sha1s sums which keep showing up in the export and
+ # which are invalid for the XML schema (they only apply to
+ # revisions)
+ xml = re.sub(r'\n\s*\w+\s*\n', r'\n', xml)
+ xml = re.sub(r'\n\s*\s*\n', r'\n', xml)
yield xml.split("")[0]