diff --git a/dumpgenerator.py b/dumpgenerator.py
index cd6a58e..ef3ba66 100644
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -1,1291 +1,1292 @@
-#!/usr/bin/env python2
 # -*- coding: utf-8 -*-
 
-# dumpgenerator.py A generator of dumps for wikis
-# Copyright (C) 2011-2014 WikiTeam developers
+# Copyright (C) 2013 Hydriz Scholz
+# Copyright (C) 2014 WikiTeam
+#
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
-# 
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA, or visit
+# <http://www.gnu.org/copyleft/gpl.html>
+
+#######################################################################
+# dumpgenerator.py is a script to generate backups of MediaWiki wikis #
+# To learn more, read the documentation:							  #
+#		http://code.google.com/p/wikiteam/wiki/NewTutorial 			  #
+#######################################################################
+
+# For developers:
+# * All functions and classes are displayed in alphabetical order for easier accessibility.
+# * Script exit codes reference:
+#  * 0 - Script ran well without problems
+#  * 1 - Script failed due to user's incorrect use
+#  * 2 - Script failed due to destination server issue
+# * For testing purposes, add the --debug parameter and edit DumpGenerator.debug() accordingly.
+
+######
+# TODO LIST
+# 0. Download index.html and Special:Version.html
+# 1. Index.php support.
+# 2. Special:Log pages support
+# 3. GUI (Question and Answer if no parameters are given)
+# 4. Resuming of dump
+# 5. Place the images in various folders so as to avoid hitting the limit of number of files in a directory
+# 6. Speed up the script. A run with --xml --images on test.wikidata.org came up with 9 min 23 sec on 2.0 and 3 min 58 sec on 1.0
+
+# WHAT IS WORKING
+# 1. XML dumping
+# 2. Complete dumping using API (except for --logs)
+# 3. Automatic updating
+# 4. Dumping of XML based on a list of titles
+# 5. Integrity check for XML dump
 
-# To learn more, read the documentation:
-#     https://github.com/WikiTeam/wikiteam/wiki
-
-import cookielib
-import cPickle
 import datetime
-import sys
-try:
-    import argparse
-except ImportError:
-    print "Please install the argparse module."
-    sys.exit(1)
+import getopt
+import hashlib
 import json
-try:
-    from hashlib import md5
-except ImportError:             # Python 2.4 compatibility
-    from md5 import new as md5
 import os
 import re
-try:
-    import requests
-except ImportError:
-    print "Please install or update the Requests module."
-    sys.exit(1)
-import subprocess
+import shutil
+import sys
 import time
 import urllib
-
-__VERSION__ = '0.2.2' #major, minor, micro
-
-def getVersion():
-    return(__VERSION__)
-
-
-def truncateFilename(other={}, filename=''):
-    """ Truncate filenames when downloading images with large filenames """
-    return filename[:other['filenamelimit']] + md5(filename).hexdigest() + '.' + filename.split('.')[-1]
-
-def delay(config={}, session=None):
-    """ Add a delay if configured for that """
-    if config['delay'] > 0:
-        print 'Sleeping... %d seconds...' % (config['delay'])
-        time.sleep(config['delay'])
-
-def cleanHTML(raw=''):
-    """ Extract only the real wiki content and remove rubbish """
-    """ This function is ONLY used to retrieve page titles and file names when no API is available """
-    """ DO NOT use this function to extract page content """
-    #different "tags" used by different MediaWiki versions to mark where starts and ends content
-    if re.search('<!-- bodytext -->', raw):
-        raw = raw.split('<!-- bodytext -->')[1].split('<!-- /bodytext -->')[0]
-    elif re.search('<!-- start content -->', raw):
-        raw = raw.split('<!-- start content -->')[1].split('<!-- end content -->')[0]
-    elif re.search('<!-- Begin Content Area -->', raw):
-        raw = raw.split('<!-- Begin Content Area -->')[1].split('<!-- End Content Area -->')[0]
-    elif re.search('<!-- content -->', raw):
-        raw = raw.split('<!-- content -->')[1].split('<!-- mw_content -->')[0]
-    elif re.search('<article id="WikiaMainContent" class="WikiaMainContent">', raw):
-        raw = raw.split('<article id="WikiaMainContent" class="WikiaMainContent">')[1].split('</article>')[0]
-    else:
-        print raw[:250]
-        print 'This wiki doesn\'t use marks to split content'
-        sys.exit()
-    return raw
-
-def handleStatusCode(response):
-    statuscode = response.status_code
-    if statuscode >= 200 and statuscode < 300:
-        return
-
-    print "HTTP Error %d." % statuscode
-    if statuscode >= 300 and statuscode < 400:
-        print "Redirect should happen automatically: please report this as a bug."
-        print response.url
-
-    elif statuscode == 400:
-        print "Bad Request: The wiki may be malfunctioning."
-        print "Please try again later."
-        print response.url
-        sys.exit(1)
-
-    elif statuscode == 401 or statuscode == 403:
-        print "Authentication required."
-        print "Please use --userpass."
-        print response.url
-
-    elif statuscode == 404:
-        print "Not found. Is Special:Export enabled for this wiki?"
-        print response.url
-        sys.exit(1)
-
-    elif statuscode == 429 or (statuscode >= 500 and statuscode < 600):
-        print "Server error, max retries exceeded."
-        print "Please resume the dump later."
-        print response.url
-        sys.exit(1)
-
-def getNamespacesScraper(config={}, session=None):
-    """ Hackishly gets the list of namespaces names and ids from the dropdown in the HTML of Special:AllPages """
-    """ Function called if no API is available """
-    namespaces = config['namespaces']
-    namespacenames = {0:''} # main is 0, no prefix
-    if namespaces:
-        r = session.post(url=config['index'], data={'title': 'Special:Allpages'})
-        raw = r.text
-        delay(config=config, session=session)
-
-        m = re.compile(r'<option [^>]*?value="(?P<namespaceid>\d+)"[^>]*?>(?P<namespacename>[^<]+)</option>').finditer(raw) # [^>]*? to include selected="selected"
-        if 'all' in namespaces:
-            namespaces = []
-            for i in m:
-                namespaces.append(int(i.group("namespaceid")))
-                namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
-        else:
-            #check if those namespaces really exist in this wiki
-            namespaces2 = []
-            for i in m:
-                if int(i.group("namespaceid")) in namespaces:
-                    namespaces2.append(int(i.group("namespaceid")))
-                    namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
-            namespaces = namespaces2
-    else:
-        namespaces = [0]
-    
-    namespaces = list(set(namespaces)) #uniques
-    print '%d namespaces found' % (len(namespaces))
-    return namespaces, namespacenames
-    
-def getNamespacesAPI(config={}, session=None):
-    """ Uses the API to get the list of namespaces names and ids """
-    namespaces = config['namespaces']
-    namespacenames = {0:''} # main is 0, no prefix
-    if namespaces:
-        r = session.post(url=config['api'], data={'action': 'query', 'meta': 'siteinfo', 'siprop': 'namespaces', 'format': 'json'})
-        result = json.loads(r.text)
-        delay(config=config, session=session)
-
-        if 'all' in namespaces:
-            namespaces = []
-            for i in result['query']['namespaces'].keys():
-                if int(i) < 0: # -1: Special, -2: Media, excluding
-                    continue
-                namespaces.append(int(i))
-                namespacenames[int(i)] = result['query']['namespaces'][i]['*']
-        else:
-            #check if those namespaces really exist in this wiki
-            namespaces2 = []
-            for i in result['query']['namespaces'].keys():
-                if int(i) < 0: # -1: Special, -2: Media, excluding
-                    continue
-                if int(i) in namespaces:
-                    namespaces2.append(int(i))
-                    namespacenames[int(i)] = result['query']['namespaces'][i]['*']
-            namespaces = namespaces2
-    else:
-        namespaces = [0]
-    
-    namespaces = list(set(namespaces)) #uniques
-    print '%d namespaces found' % (len(namespaces))
-    return namespaces, namespacenames
-
-def getPageTitlesAPI(config={}, session=None):
-    """ Uses the API to get the list of page titles """
-    titles = []
-    namespaces, namespacenames = getNamespacesAPI(config=config, session=session)
-    for namespace in namespaces:
-        if namespace in config['exnamespaces']:
-            print '    Skipping namespace = %d' % (namespace)
-            continue
-        
-        c = 0
-        print '    Retrieving titles in the namespace %d' % (namespace)
-        apfrom = '!'
-        while apfrom:
-            sys.stderr.write('.') #progress
-            params = {'action': 'query', 'list': 'allpages', 'apnamespace': namespace, 'apfrom': apfrom.encode('utf-8'), 'format': 'json', 'aplimit': 500}
-            r = session.post(url=config['api'], data=params)
-            handleStatusCode(r)
-            #FIXME Handle HTTP errors here!
-            jsontitles = json.loads(r.text)
-            apfrom = ''
-            if jsontitles.has_key('query-continue') and jsontitles['query-continue'].has_key('allpages'):
-                if jsontitles['query-continue']['allpages'].has_key('apcontinue'):
-                    apfrom = jsontitles['query-continue']['allpages']['apcontinue'] 
-                elif jsontitles['query-continue']['allpages'].has_key('apfrom'):
-                    apfrom = jsontitles['query-continue']['allpages']['apfrom']
-            #print apfrom
-            #print jsontitles
-            titles += [page['title'] for page in jsontitles['query']['allpages']]
-            if len(titles) != len(set(titles)):
-                #probably we are in a loop, server returning dupe titles, stop it
-                print 'Probably a loop, finishing'
-                titles = list(set(titles))
-                apfrom = ''
-            c += len(jsontitles['query']['allpages'])
-            delay(config=config, session=session)
-        print '    %d titles retrieved in the namespace %d' % (c, namespace)
-    return titles
-
-def getPageTitlesScraper(config={}, session=None):
-    """  """
-    titles = []
-    namespaces, namespacenames = getNamespacesScraper(config=config, session=session)
-    for namespace in namespaces:
-        print '    Retrieving titles in the namespace', namespace
-        url = '%s?title=Special:Allpages&namespace=%s' % (config['index'], namespace)
-        r = session.get(url=url)
-        raw = r.text
-        raw = cleanHTML(raw)
-        
-        r_title = r'title="(?P<title>[^>]+)">'
-        r_suballpages = ''
-        r_suballpages1 = r'&amp;from=(?P<from>[^>]+)&amp;to=(?P<to>[^>]+)">'
-        r_suballpages2 = r'Special:Allpages/(?P<from>[^>]+)">'
-        if re.search(r_suballpages1, raw):
-            r_suballpages = r_suballpages1
-        elif re.search(r_suballpages2, raw):
-            r_suballpages = r_suballpages2
-        else:
-            pass #perhaps no subpages
-        
-        deep = 3 # 3 is the current deep of English Wikipedia for Special:Allpages, 3 levels
-        c = 0
-        checked_suballpages = []
-        rawacum = raw
-        while r_suballpages and re.search(r_suballpages, raw) and c < deep:
-            #load sub-Allpages
-            m = re.compile(r_suballpages).finditer(raw)
-            for i in m:
-                fr = i.group('from')
-                
-                if r_suballpages == r_suballpages1:
-                    to = i.group('to')
-                    name = '%s-%s' % (fr, to)
-                    url = '%s?title=Special:Allpages&namespace=%s&from=%s&to=%s' % (config['index'], namespace, fr, to) #do not put urllib.quote in fr or to
-                elif r_suballpages == r_suballpages2: #fix, esta regexp no carga bien todas? o falla el r_title en este tipo de subpag? (wikiindex)
-                    fr = fr.split('&amp;namespace=')[0] #clean &amp;namespace=\d, sometimes happens
-                    name = fr
-                    url = '%s?title=Special:Allpages/%s&namespace=%s' % (config['index'], name, namespace)
-                
-                if not name in checked_suballpages:
-                    checked_suballpages.append(name) #to avoid reload dupe subpages links
-                    delay(config=config, session=session)
-                    r2 = session.get(url=url)
-                    raw2 = r2.text
-                    raw2 = cleanHTML(raw2)
-                    rawacum += raw2 #merge it after removed junk
-                    print '    Reading', name, len(raw2), 'bytes', len(re.findall(r_suballpages, raw2)), 'subpages', len(re.findall(r_title, raw2)), 'pages'
-
-                delay(config=config, session=session)
-            c += 1
-        
-        c = 0
-        m = re.compile(r_title).finditer(rawacum)
-        for i in m:
-            t = undoHTMLEntities(text=i.group('title'))
-            if not t.startswith('Special:'):
-                if not t in titles:
-                    titles.append(t)
-                    c += 1
-        print '    %d titles retrieved in the namespace %d' % (c, namespace)
-    return titles
-
-def getPageTitles(config={}, session=None):
-    """ Get list of page titles """
-    #http://en.wikipedia.org/wiki/Special:AllPages
-    #http://archiveteam.org/index.php?title=Special:AllPages
-    #http://www.wikanda.es/wiki/Especial:Todas
-    print 'Loading page titles from namespaces = %s' % (config['namespaces'] and ','.join([str(i) for i in config['namespaces']]) or 'None')
-    print 'Excluding titles from namespaces = %s' % (config['exnamespaces'] and ','.join([str(i) for i in config['exnamespaces']]) or 'None')
-    
-    titles = []
-    if config['api']:
-        titles = getPageTitlesAPI(config=config, session=session)
-    elif config['index']:
-        titles = getPageTitlesScraper(config=config, session=session)
-    
-    titles = list(set(titles)) #removing dupes (e.g. in CZ appears Widget:AddThis two times (main namespace and widget namespace))
-    titles.sort() #sorting
-    
-    print '%d page titles loaded' % (len(titles))
-    return titles
-
-def getXMLHeader(config={}, session=None):
-    """ Retrieve a random page to extract XML headers (namespace info, etc) """
-    #get the header of a random page, to attach it in the complete XML backup
-    #similar to: <mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:x....
-    randomtitle = 'Main_Page' #previously AMF5LKE43MNFGHKSDMRTJ
-    xml = getXMLPage(config=config, title=randomtitle, verbose=False, session=session)
-    header = xml.split('</mediawiki>')[0]
-    if not xml:
-        print 'XML export on this wiki is broken, quitting.'
-        sys.exit()
-    return header
-
-def getXMLFileDesc(config={}, title='', session=None):
-    """ Get XML for image description page """
-    config['curonly'] = 1 #tricky to get only the most recent desc
-    return getXMLPage(config=config, title=title, verbose=False, session=session)
-
-def getUserAgent():
-    """ Return a cool user-agent to hide Python user-agent """
-    useragents = [
-        #firefox
-        'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0', 
-        'Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101  Firefox/28.0',
-        ]
-    return useragents[0]
-
-def logerror(config={}, text=''):
-    """ Log error in file """
-    if text:
-        with open('%s/errors.log' % (config['path']), 'a') as outfile:
-            output = u'%s: %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), text)
-            outfile.write(output.encode('utf-8'))
-
-def getXMLPageCore(headers={}, params={}, config={}, session=None):
-    """  """
-    #returns a XML containing params['limit'] revisions (or current only), ending in </mediawiki>
-    #if retrieving params['limit'] revisions fails, returns a current only version
-    #if all fail, returns the empty string
-    xml = ''
-    c = 0
-    maxseconds = 100 #max seconds to wait in a single sleeping
-    maxretries = 5 # x retries and skip
-    increment = 20 #increment every retry
-    while not re.search(r'</mediawiki>', xml):
-        if c > 0 and c < maxretries:
-            wait = increment * c < maxseconds and increment * c or maxseconds # incremental until maxseconds
-            print '    XML for "%s" is wrong. Waiting %d seconds and reloading...' % (params['pages'], wait)
-            time.sleep(wait)
-            if params['limit'] > 1: # reducing server load requesting smallest chunks (if curonly then limit = 1 from mother function)
-                params['limit'] = params['limit'] / 2 # half
-        if c >= maxretries:
-            print '    We have retried %d times' % (c)
-            print '    MediaWiki error for "%s", network error or whatever...' % (params['pages'])
-            # If it's not already what we tried: our last chance, preserve only the last revision...
-            # config['curonly'] means that the whole dump is configured to save nonly the last
-            # params['curonly'] should mean that we've already tried this fallback, because it's set by the following if and passed to getXMLPageCore
-            if not config['curonly']: 
-                print '    Trying to save only the last revision for this page...'
-                params['curonly'] = 1
-                logerror(config=config, text='Error while retrieving the full history of "%s". Trying to save only the last revision for this page' % (params['pages']))
-                return getXMLPageCore(headers=headers, params=params, config=config)
-            else:
-                print '    Saving in the errors log, and skipping...'
-                logerror(config=config, text='Error while retrieving the last revision of "%s". Skipping.' % (params['pages']))
-                return '' # empty xml
-        #FIXME HANDLE HTTP Errors HERE
-        r = session.post(url=config['index'], data=params, headers=headers)
-        handleStatusCode(r)
-        xml = r.text
-        c += 1
-    
-    return xml
-
-def getXMLPage(config={}, title='', verbose=True, session=None):
-    """ Get the full history (or current only) of a page """
-
-    #if server errors occurs while retrieving the full page history, it may return [oldest OK versions] + last version, excluding middle revisions, so it would be partialy truncated
-    #http://www.mediawiki.org/wiki/Manual_talk:Parameters_to_Special:Export#Parameters_no_longer_in_use.3F
-    
-    limit = 1000
-    truncated = False
-    title_ = title
-    title_ = re.sub(' ', '_', title_)
-    #do not convert & into %26, title_ = re.sub('&', '%26', title_)
-    params = {'title': 'Special:Export', 'pages': title_, 'action': 'submit'}
-    if config['curonly']:
-        params['curonly'] = 1
-        params['limit'] = 1
-    else:
-        params['offset'] = '1' # 1 always < 2000s
-        params['limit'] = limit
-    if config.has_key('templates') and config['templates']: #in other case, do not set params['templates']
-        params['templates'] = 1
-    
-    xml = getXMLPageCore(params=params, config=config, session=session)
-
-    #if complete history, check if this page history has > limit edits, if so, retrieve all using offset if available
-    #else, warning about Special:Export truncating large page histories
-    r_timestamp = r'<timestamp>([^<]+)</timestamp>'
-    if not config['curonly'] and re.search(r_timestamp, xml): # search for timestamps in xml to avoid analysing empty pages like Special:Allpages and the random one
-        while not truncated and params['offset']: #next chunk
-            params['offset'] = re.findall(r_timestamp, xml)[-1] #get the last timestamp from the acum XML
-            xml2 = getXMLPageCore(params=params, config=config, session=session)
-            
-            if re.findall(r_timestamp, xml2): #are there more edits in this next XML chunk or no <page></page>?
-                if re.findall(r_timestamp, xml2)[-1] == params['offset']:
-                    #again the same XML, this wiki does not support params in Special:Export, offer complete XML up to X edits (usually 1000)
-                    print 'ATTENTION: This wiki does not allow some parameters in Special:Export, therefore pages with large histories may be truncated'
-                    truncated = True
-                    break
-                else:
-                    """    </namespaces>
-                      </siteinfo>
-                      <page>
-                        <title>Main Page</title>
-                        <id>15580374</id>
-                        <restrictions>edit=sysop:move=sysop</restrictions> (?)
-                        <revision>
-                          <id>418009832</id>
-                          <timestamp>2011-03-09T19:57:06Z</timestamp>
-                          <contributor>
-                    """
-                    #offset is OK in this wiki, merge with the previous chunk of this page history and continue
-                    xml = xml.split('</page>')[0] + '    <revision>' + ('<revision>'.join(xml2.split('<revision>')[1:]))
-            else:
-                params['offset'] = '' #no more edits in this page history
-    
-    if verbose:
-        numberofedits = len(re.findall(r_timestamp, xml))
-        if (numberofedits == 1):
-            print '    %s, 1 edit' % (title)
-        else:
-            print '    %s, %d edits' % (title, numberofedits)
-    
-    return xml
-
-def cleanXML(xml=''):
-    """ Trim redundant info """
-    #do not touch XML codification, leave AS IS
-    if re.search(r'</siteinfo>\n', xml) and re.search(r'</mediawiki>', xml):
-        xml = xml.split('</siteinfo>\n')[1]
-        xml = xml.split('</mediawiki>')[0]
-    return xml
-
-def generateXMLDump(config={}, titles=[], start='', session=None):
-    """ Generates a XML dump for a list of titles """
-    
-    print 'Retrieving the XML for every page from "%s"' % (start and start or 'start')
-    header = getXMLHeader(config=config, session=session)
-    footer = '</mediawiki>\n' #new line at the end
-    xmlfilename = '%s-%s-%s.xml' % (domain2prefix(config=config), config['date'], config['curonly'] and 'current' or 'history')
-    xmlfile = ''
-    lock = True
-    if start:
-        #remove the last chunk of xml dump (it is probably incomplete)
-        xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'r')
-        xmlfile2 = open('%s/%s2' % (config['path'], xmlfilename), 'w')
-        prev = ''
-        c = 0
-        for l in xmlfile:
-            #removing <page>\n until end of file
-            if c != 0: #lock to avoid write an empty line at the begining of file
-                if not re.search(r'<title>%s</title>' % (start), l): 
-                    xmlfile2.write(prev)
-                else:
-                    break
-            c += 1
-            prev = l
-        xmlfile.close()
-        xmlfile2.close()
-        #subst xml with xml2
-        os.remove('%s/%s' % (config['path'], xmlfilename)) #remove previous xml dump
-        os.rename('%s/%s2' % (config['path'], xmlfilename), '%s/%s' % (config['path'], xmlfilename)) #move correctly truncated dump to its real name
-    else:
-        #requested complete xml dump
-        lock = False
-        xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'w')
-        xmlfile.write(header.encode('utf-8'))
-        xmlfile.close()
-    
-    xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'a')
-    c = 1
-    for title in titles:
-        if not title.strip():
-            continue
-        if title == start: #start downloading from start, included
-            lock = False
-        if lock:
-            continue
-        delay(config=config, session=session)
-        if c % 10 == 0:
-            print 'Downloaded %d pages' % (c)
-        xml = getXMLPage(config=config, title=title, session=session)
-        xml = cleanXML(xml=xml)
-        if not xml:
-            logerror(config=config, text=u'The page "%s" was missing in the wiki (probably deleted)' % (title))
-        #here, XML is a correct <page> </page> chunk or 
-        #an empty string due to a deleted page (logged in errors log) or
-        #an empty string due to an error while retrieving the page from server (logged in errors log)
-        xmlfile.write(xml.encode('utf-8'))
-        c += 1
-    xmlfile.write(footer)
-    xmlfile.close()
-    print 'XML dump saved at...', xmlfilename
-
-def saveTitles(config={}, titles=[]):
-    """ Save title list in a file """
-
-    titlesfilename = '%s-%s-titles.txt' % (domain2prefix(config=config), config['date'])
-    titlesfile = open('%s/%s' % (config['path'], titlesfilename), 'w')
-    output = u"%s\n--END--" % ('\n'.join(titles))
-    titlesfile.write(output.encode('utf-8'))
-    titlesfile.close()
-    
-    print 'Titles saved at...', titlesfilename
-
-def saveImageFilenamesURL(config={}, images=[], session=None):
-    """ Save image list in a file, including filename, url and uploader """
-
-    imagesfilename = '%s-%s-images.txt' % (domain2prefix(config=config), config['date'])
-    imagesfile = open('%s/%s' % (config['path'], imagesfilename), 'w')
-    imagesfile.write(('\n'.join(['%s\t%s\t%s' % (filename, url, uploader) for filename, url, uploader in images]).encode('utf-8')))
-    imagesfile.write('\n--END--')
-    imagesfile.close()
-    
-    print 'Image filenames and URLs saved at...', imagesfilename
-
-def getImageFilenamesURL(config={}, session=None):
-    """ Retrieve file list: filename, url, uploader """
-    
-    print 'Retrieving image filenames'
-    r_next = r'(?<!&amp;dir=prev)&amp;offset=(?P<offset>\d+)&amp;' # (?<! http://docs.python.org/library/re.html
-    images = []
-    offset = '29990101000000' #january 1, 2999
-    limit = 5000
-    retries = 5
-    while offset:
-        #5000 overload some servers, but it is needed for sites like this with no next links http://www.memoryarchive.org/en/index.php?title=Special:Imagelist&sort=byname&limit=50&wpIlMatch=
-        r = session.post(url=config['index'], data={'title': 'Special:Imagelist', 'limit': limit, 'offset': offset})
-        raw = r.text
-        delay(config=config, session=session)
-        if re.search(ur'(?i)(allowed memory size of \d+ bytes exhausted|Call to a member function getURL)', raw): # delicate wiki
-            if limit > 10:
-                print 'Error: listing %d images in a chunk is not possible, trying tiny chunks' % (limit)
-                limit = limit/10
-                continue
-            elif retries > 0: # waste retries, then exit
-                retries -= 1
-                print 'Retrying...'
-                continue
-            else:
-                print 'No more retries, exit...'
-                break
-        
-        raw = cleanHTML(raw)
-        #archiveteam 1.15.1 <td class="TablePager_col_img_name"><a href="/index.php?title=File:Yahoovideo.jpg" title="File:Yahoovideo.jpg">Yahoovideo.jpg</a> (<a href="/images/2/2b/Yahoovideo.jpg">file</a>)</td>
-        #wikanda 1.15.5 <td class="TablePager_col_img_user_text"><a href="/w/index.php?title=Usuario:Fernandocg&amp;action=edit&amp;redlink=1" class="new" title="Usuario:Fernandocg (página no existe)">Fernandocg</a></td>
-        r_images1 = r'(?im)<td class="TablePager_col_img_name"><a href[^>]+title="[^:>]+:(?P<filename>[^>]+)">[^<]+</a>[^<]+<a href="(?P<url>[^>]+/[^>/]+)">[^<]+</a>[^<]+</td>\s*<td class="TablePager_col_img_user_text"><a[^>]+>(?P<uploader>[^<]+)</a></td>'
-        #wikijuegos 1.9.5 http://softwarelibre.uca.es/wikijuegos/Especial:Imagelist old mediawiki version
-        r_images2 = r'(?im)<td class="TablePager_col_links"><a href[^>]+title="[^:>]+:(?P<filename>[^>]+)">[^<]+</a>[^<]+<a href="(?P<url>[^>]+/[^>/]+)">[^<]+</a></td>\s*<td class="TablePager_col_img_timestamp">[^<]+</td>\s*<td class="TablePager_col_img_name">[^<]+</td>\s*<td class="TablePager_col_img_user_text"><a[^>]+>(?P<uploader>[^<]+)</a></td>'
-        #gentoowiki 1.18 <tr><td class="TablePager_col_img_timestamp">18:15, 3 April 2011</td><td class="TablePager_col_img_name"><a href="/wiki/File:Asus_eeepc-1201nl.png" title="File:Asus eeepc-1201nl.png">Asus eeepc-1201nl.png</a> (<a href="/w/images/2/2b/Asus_eeepc-1201nl.png">file</a>)</td><td class="TablePager_col_thumb"><a href="/wiki/File:Asus_eeepc-1201nl.png" class="image"><img alt="" src="/w/images/thumb/2/2b/Asus_eeepc-1201nl.png/180px-Asus_eeepc-1201nl.png" width="180" height="225" /></a></td><td class="TablePager_col_img_size">37 KB</td><td class="TablePager_col_img_user_text"><a href="/w/index.php?title=User:Yannails&amp;action=edit&amp;redlink=1" class="new" title="User:Yannails (page does not exist)">Yannails</a></td><td class="TablePager_col_img_description">&#160;</td><td class="TablePager_col_count">1</td></tr>
-        r_images3 = r'(?im)<td class="TablePager_col_img_name"><a[^>]+title="[^:>]+:(?P<filename>[^>]+)">[^<]+</a>[^<]+<a href="(?P<url>[^>]+)">[^<]+</a>[^<]+</td><td class="TablePager_col_thumb"><a[^>]+><img[^>]+></a></td><td class="TablePager_col_img_size">[^<]+</td><td class="TablePager_col_img_user_text"><a[^>]+>(?P<uploader>[^<]+)</a></td>'
-        #http://www.memoryarchive.org/en/index.php?title=Special:Imagelist&sort=byname&limit=50&wpIlMatch=
-        #(<a href="/en/Image:109_0923.JPG" title="Image:109 0923.JPG">desc</a>) <a href="/en/upload/c/cd/109_0923.JPG">109 0923.JPG</a> . . 885,713 bytes . . <a href="/en/User:Bfalconer" title="User:Bfalconer">Bfalconer</a> . . 18:44, 17 November 2005<br />
-        r_images4 = r'(?im)<a href=[^>]+ title="[^:>]+:(?P<filename>[^>]+)">[^<]+</a>[^<]+<a href="(?P<url>[^>]+)">[^<]+</a>[^<]+<a[^>]+>(?P<uploader>[^<]+)</a>'
-        m = []
-        #different mediawiki versions
-        if re.search(r_images1, raw):
-            m = re.compile(r_images1).finditer(raw)
-        elif re.search(r_images2, raw):
-            m = re.compile(r_images2).finditer(raw)
-        elif re.search(r_images3, raw):
-            m = re.compile(r_images3).finditer(raw)
-        elif re.search(r_images4, raw):
-            m = re.compile(r_images4).finditer(raw)
-        
-        for i in m:
-            url = i.group('url')
-            if url[0] == '/' or (not url.startswith('http://') and not url.startswith('https://')): #is it a relative URL?
-                if url[0] == '/': #slash is added later
-                    url = url[1:]
-                domainalone = config['index'].split('://')[1].split('/')[0] #remove from :// (http or https) until the first / after domain
-                url = u'%s://%s/%s' % (config['index'].split('://')[0], domainalone, url) # concat http(s) + domain + relative url
-            url = undoHTMLEntities(text=url)
-            #url = urllib.unquote(url) #do not use unquote with url, it break some urls with odd chars
-            url = re.sub(' ', '_', url)
-            filename = re.sub('_', ' ', i.group('filename'))
-            filename = undoHTMLEntities(text=filename)
-            filename = urllib.unquote(filename)
-            uploader = re.sub('_', ' ', i.group('uploader'))
-            uploader = undoHTMLEntities(text=uploader)
-            uploader = urllib.unquote(uploader)
-            images.append([filename, url, uploader])
-            #print filename, url
-        
-        if re.search(r_next, raw):
-            offset = re.findall(r_next, raw)[0]
-            retries += 5 # add more retries if we got a page with offset
-        else:
-            offset = ''
-    
-    if (len(images) == 1):
-        print '    Found 1 image'
-    else:
-        print '    Found %d images' % (len(images))
-    
-    images.sort()
-    return images
-
-def getImageFilenamesURLAPI(config={}, session=None):
-    """ Retrieve file list: filename, url, uploader """
-    
-    print 'Retrieving image filenames'
-    aifrom = '!'
-    images = []
-    while aifrom:
-        sys.stderr.write('.') #progress
-        params = {'action': 'query', 'list': 'allimages', 'aiprop': 'url|user', 'aifrom': aifrom, 'format': 'json', 'ailimit': 500}
-        #FIXME Handle HTTP Errors HERE
-        r = session.post(url=config['api'], data=params)
-        handleStatusCode(r)
-        jsonimages = json.loads(r.text)
-        delay(config=config, session=session)
-        aifrom = ''
-        if jsonimages.has_key('query-continue') and jsonimages['query-continue'].has_key('allimages'):
-            if jsonimages['query-continue']['allimages'].has_key('aicontinue'):
-                aifrom = jsonimages['query-continue']['allimages']['aicontinue'] 
-            elif jsonimages['query-continue']['allimages'].has_key('aifrom'):
-                aifrom = jsonimages['query-continue']['allimages']['aifrom']
-        #print aifrom
-        
-        for image in jsonimages['query']['allimages']:
-            url = image['url']
-            if url[0] == '/' or (not url.startswith('http://') and not url.startswith('https://')): #is it a relative URL?
-                if url[0] == '/': #slash is added later
-                    url = url[1:]
-                domainalone = config['index'].split('://')[1].split('/')[0] #remove from :// (http or https) until the first / after domain
-                url = u'%s://%s/%s' % (config['index'].split('://')[0], domainalone, url) # concat http(s) + domain + relative url
-            url = re.sub(' ', '_', url)
-            # encoding to ascii is needed to work around this horrible bug: http://bugs.python.org/issue8136
-            filename = unicode(urllib.unquote((re.sub('_', ' ', url.split('/')[-1])).encode('ascii','ignore')), 'utf-8')
-            uploader = re.sub('_', ' ', image['user'])
-            images.append([filename, url, uploader])
-
-    if (len(images) == 1):
-        print '    Found 1 image'
-    else:
-        print '    Found %d images' % (len(images))
-
-    images.sort()
-    return images
-
-def undoHTMLEntities(text=''):
-    """ Undo some HTML codes """
-    
-    text = re.sub('&lt;', '<', text) # i guess only < > & " ' need conversion http://www.w3schools.com/html/html_entities.asp
-    text = re.sub('&gt;', '>', text)
-    text = re.sub('&amp;', '&', text)
-    text = re.sub('&quot;', '"', text)
-    text = re.sub('&#039;', '\'', text)
-    
-    return text
-
-def generateImageDump(config={}, other={}, images=[], start='', session=None):
-    """ Save files and descriptions using a file list """
-    
-    #fix use subdirectories md5
-    print 'Retrieving images from "%s"' % (start and start or 'start')
-    imagepath = '%s/images' % (config['path'])
-    if not os.path.isdir(imagepath):
-        print 'Creating "%s" directory' % (imagepath)
-        os.makedirs(imagepath)
-    
-    c = 0
-    lock = True
-    if not start:
-        lock = False
-    for filename, url, uploader in images:
-        if filename == start: #start downloading from start (included)
-            lock = False
-        if lock:
-            continue
-        delay(config=config, session=session)
-        
-        #saving file
-        #truncate filename if length > 100 (100 + 32 (md5) = 132 < 143 (crash limit). Later .desc is added to filename, so better 100 as max)
-        filename2 = urllib.unquote(filename)
-        if len(filename2) > other['filenamelimit']:
-            # split last . (extension) and then merge
-            filename2 = truncateFilename(other=other, filename=filename2)
-            print 'Filename is too long, truncating. Now it is:', filename2
-        filename3 = u'%s/%s' % (imagepath, filename2)
-        imagefile = open(filename3, 'wb')
-        r = requests.get(url=url)
-        imagefile.write(r.content)
-        imagefile.close()
-        #saving description if any
-        xmlfiledesc = getXMLFileDesc(config=config, title=u'Image:%s' % (filename), session=session) # use Image: for backwards compatibility
-        f = open('%s/%s.desc' % (imagepath, filename2), 'w')
-        if not re.search(r'</mediawiki>', xmlfiledesc): #<text xml:space="preserve" bytes="36">Banner featuring SG1, SGA, SGU teams</text>
-            #failure when retrieving desc? then save it as empty .desc
-            xmlfiledesc = ''
-        f.write(xmlfiledesc.encode('utf-8'))
-        f.close()
-        delay(config=config, session=session)
-        c += 1
-        if c % 10 == 0:
-            print '    Downloaded %d images' % (c)
-    
-    print 'Downloaded %d images' % (c)
-    
-def saveLogs(config={}, session=None):
-    """ Save Special:Log """
-    #get all logs from Special:Log
-    """parse
-    <select name='type'>
-    <option value="block">Bloqueos de usuarios</option>
-    <option value="rights">Cambios de perfil de usuario</option>
-    <option value="protect" selected="selected">Protecciones de páginas</option>
-    <option value="delete">Registro de borrados</option>
-    <option value="newusers">Registro de creación de usuarios</option>
-    <option value="merge">Registro de fusiones</option>
-    <option value="import">Registro de importaciones</option>
-    <option value="patrol">Registro de revisiones</option>
-    <option value="move">Registro de traslados</option>
-    <option value="upload">Subidas de archivos</option>
-    <option value="">Todos los registros</option>
-    </select>
-    """
-    delay(config=config, session=session)
-
-def domain2prefix(config={}, session=None):
-    """ Convert domain name to a valid prefix filename. """
-    
-    # At this point, both api and index are supposed to be defined
-    domain = ''
-    if config['api']:
-        domain = config['api']
-    elif config['index']:
-        domain = config['index']
-
-    domain = domain.lower()
-    domain = re.sub(r'(https?://|www\.|/index\.php|/api\.php)', '', domain)
-    domain = re.sub(r'/', '_', domain)
-    domain = re.sub(r'\.', '', domain)
-    domain = re.sub(r'[^A-Za-z0-9]', '_', domain)
-    
-    return domain
-
-def loadConfig(config={}, configfilename=''):
-    """ Load config file """
-    
-    try:
-        with open('%s/%s' % (config['path'], configfilename), 'r') as infile:
-            config = cPickle.load(infile)
-    except:
-        print 'There is no config file. we can\'t resume. Start a new dump.'
-        sys.exit()
-    
-    return config
-
-def saveConfig(config={}, configfilename=''):
-    """ Save config file """
-    
-    with open('%s/%s' % (config['path'], configfilename), 'w') as outfile:
-        cPickle.dump(config, outfile)
-    
-def welcome():
-    message = ''
-    """ Opening message """
-    message += "#"*73
-    message += """
-# Welcome to DumpGenerator %s by WikiTeam (GPL v3)                   #
-# More info at: https://github.com/WikiTeam/wikiteam                    #""" % (getVersion())
-    message += "\n"
-    message += "#"*73
-    message += "\n"
-    message += ''
-    message += "\n" 
-    message += "#"*73
-    message += """
-# Copyright (C) 2011-2014 WikiTeam                                      #
-# This program is free software: you can redistribute it and/or modify  #
-# it under the terms of the GNU General Public License as published by  #
-# the Free Software Foundation, either version 3 of the License, or     #
-# (at your option) any later version.                                   #
-#                                                                       #
-# This program is distributed in the hope that it will be useful,       #
-# but WITHOUT ANY WARRANTY; without even the implied warranty of        #
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
-# GNU General Public License for more details.                          #
-#                                                                       #
-# You should have received a copy of the GNU General Public License     #
-# along with this program.  If not, see <http://www.gnu.org/licenses/>. #"""
-    message += "\n"
-    message += "#"*73
-    message += "\n"
-    message += ''
-    
-    return message
-
-def bye():
-    """ Closing message """
-    print "---> Congratulations! Your dump is complete <---"
-    print "If you found any bug, report a new issue here: https://github.com/WikiTeam/wikiteam/issues"
-    print "If this is a public wiki, please, consider publishing this dump. Do it yourself as explained in https://github.com/WikiTeam/wikiteam/wiki/New-Tutorial#Publishing_the_dump or contact us at https://github.com/WikiTeam/wikiteam"
-    print "Good luck! Bye!"
-
-
-def getParameters(params=[]):
-    if not params:
-        params = sys.argv
-
-    parser = argparse.ArgumentParser(description='')
-    
-    parser.add_argument('-v', '--version', action='version', version=getVersion())
-    parser.add_argument('--cookies', metavar="cookies.txt", help="path to a cookies.txt file")
-    parser.add_argument('--delay', metavar=5, default=0, help="adds a delay (in seconds)")
-    parser.add_argument('--retries', metavar=5, default=5, help="Maximum number of retries for ")
-    parser.add_argument('--get-wiki-engine', action='store_true', help="returns the wiki engine")
-
-    groupWikiOrAPIOrIndex = parser.add_mutually_exclusive_group(required=True)
-    groupWikiOrAPIOrIndex.add_argument('wiki', default='', nargs='?', help="URL to wiki")
-    groupWikiOrAPIOrIndex.add_argument('--api', help="URL to api.php")
-    groupWikiOrAPIOrIndex.add_argument('--index', help="URL to index.php")
-    
-    groupXMLOrImages = parser.add_argument_group()
-    groupXMLOrImages.add_argument('--xml', action='store_true', help="generates a full history XML dump (--xml --curonly for current revisions only)")
-    parser.add_argument('--curonly', action='store_true', help='store only the current version of pages')
-
-    groupXMLOrImages.add_argument('--images', action='store_true', help="generates an image dump")
-    
-    parser.add_argument('--path', help='path to store wiki dump at')
-    parser.add_argument('--resume', action='store_true', help='resumes previous incomplete dump (requires --path)')
-    parser.add_argument('--force', action='store_true', help='')
-    parser.add_argument('--namespaces', metavar="1,2,3", help='comma-separated value of namespaces to include (all by default)')
-    parser.add_argument('--exnamespaces', metavar="1,2,3", help='comma-separated value of namespaces to exclude')
-    
-    parser.add_argument('--user', help='Username if authentication is required.')
-    parser.add_argument('--pass', dest='password', help='Password if authentication is required.')
-    
-    args = parser.parse_args()
-    #print args
-    
-    # Execute excluding args
-    if args.get_wiki_engine and args.wiki and (args.wiki.startswith('http://') or args.wiki.startswith('https://')):
-        print getWikiEngine(url=args.wiki)
-        sys.exit()
-    # End execute excluding args
-    
-    # check API URL
-    if args.api and (not args.api.startswith('http://') and not args.api.startswith('https://')):
-        print args.api
-        print 'ERROR: URL to api.php must start with http:// or https://\n'
-        parser.print_usage()
-        sys.exit(1)
-        
-    # check index URL
-    if args.index and (not args.index.startswith('http://') and not args.index.startswith('https://')):
-        print 'ERROR: URL to index.php must start with http:// or https://\n'
-        parser.print_usage()
-        sys.exit(1)
-        
-    # check user and pass (one requires both)
-    if (args.user and not args.password) or (args.password and not args.user):
-        print 'Both --user and --pass are required for authentication.'
-        parser.print_usage()
-        sys.exit(1)
-
-    namespaces = ['all']
-    exnamespaces = []
-    # Process namespace inclusions
-    if args.namespaces:
-        if re.search(r'[^\d, \-]', args.namespaces) and args.namespaces.lower() != 'all': #fix, why - ?  and... --namespaces= all with a space works?
-            print "Invalid namespace values.\nValid format is integer(s) separated by commas"
-            sys.exit()
-        else:
-            ns = re.sub(' ', '', args.namespaces)
-            if ns.lower() == 'all':
-                namespaces = ['all']
-            else:
-                namespaces = [int(i) for i in ns.split(',')]
-
-    # Process namespace exclusions
-    if args.exnamespaces:
-        if re.search(r'[^\d, \-]', args.exnamespaces):
-            print "Invalid namespace values.\nValid format is integer(s) separated by commas"
-            sys.exit(1)
-        else:
-            ns = re.sub(' ', '', args.exnamespaces)
-            if ns.lower() == 'all':
-                print 'You cannot exclude all namespaces.'
-                sys.exit(1)
-            else:
-                exnamespaces = [int(i) for i in ns.split(',')]
-
-    # --curonly requires --xml
-    if args.curonly and not args.xml:
-        print "--curonly requires --xml\n"
-        parser.print_usage()
-        sys.exit(1)
-        
-    #user chose --api, but --index it is necessary for special:export: we generate it
-    if args.api and not args.index:
-        index = args.api.split('api.php')[0] + 'index.php'
-        # WARNING: remove index.php here for misconfigured sites like editthis.info, or provide --index directly
-        print 'You didn\'t provide a path for index.php, using ', index
-    else:
-        index = args.index
-
-    cj = cookielib.MozillaCookieJar()
-    if args.cookies:
-        cj.load(args.cookies)
-        print 'Using cookies from %s' % args.cookies
-
-    session = requests.Session()
-    session.cookies = cj
-    session.headers = {'User-Agent': getUserAgent()}
-    if args.user and args.password:
-        session.auth = (args.user, args.password)
-    #session.mount(args.api.split('/api.php')[0], HTTPAdapter(max_retries=max_ret))
-
-    config = {
-        'curonly': args.curonly,
-        'date': datetime.datetime.now().strftime('%Y%m%d'),
-        'api': args.api or '',
-        'index': index,
-        'images': args.images,
-        'logs': False,
-        'xml': args.xml,
-        'namespaces': namespaces,
-        'exnamespaces': exnamespaces,
-        'path': args.path or '',
-        'cookies': args.cookies or '',
-        'delay': args.delay
-    }
-    other = {
-        'resume': args.resume,
-        'filenamelimit': 100, #do not change
-        'force': args.force,
-        'session': session
-    }
-        
-    if config['api']:
-        #check api.php
-        if checkAPI(config['api'], config, session=other['session']):
-            print 'api.php is OK'
-        else:
-            print 'Error in api.php, please, provide a correct path to api.php'
-            sys.exit()
-    
-    if config['index']:
-        #check index.php
-        if checkIndexphp(config['index'], config, session=other['session']):
-            print 'index.php is OK'
-        else:
-            print 'Error in index.php, please, provide a correct path to index.php'
-            sys.exit()
-    
-    #calculating path, if not defined by user with --path=
-    if not config['path']:
-        config['path'] = './%s-%s-wikidump' % (domain2prefix(config=config, session=session), config['date'])
-
-    return config, other
-    
-def checkAPI(api, config={}, session=None):
-    """ Checking API availability """
-    global cj
-    r = session.post(url=api, data={'action': 'query', 'meta': 'siteinfo', 'format': 'json'})
-    resultText = r.text
-    print 'Checking api.php...', api
-    if "MediaWiki API is not enabled for this site." in resultText:
-        return False
-    result = json.loads(resultText)
-    delay(config=config, session=session)
-    if result.has_key('query'):
-        return True
-    return False
-
-def checkIndexphp(indexphp, config={}, session=None):
-    """ Checking index.php availability """
-    r = session.post(url=indexphp, data={'title': 'Special:Version'})
-    raw = r.text
-    delay(config=config, session=session)
-    print 'Checking index.php...', indexphp
-    if re.search(r'(Special:Badtitle</a>|class="permissions-errors"|"wgCanonicalSpecialPageName":"Badtitle"|Login Required</h1>)', raw) and not config['cookies']: # Workaround for issue 71
-         print "ERROR: This wiki requires login and we are not authenticated"
-         return False
-    if re.search(r'(This wiki is powered by|<h2 id="mw-version-license">|meta name="generator" content="MediaWiki)', raw):
-        return True
-    return False
-
-def removeIP(raw=''):
-    """ Remove IP from HTML comments <!-- --> """
-    
-    raw = re.sub(r'\d+\.\d+\.\d+\.\d+', '0.0.0.0', raw)
-    #http://www.juniper.net/techpubs/software/erx/erx50x/swconfig-routing-vol1/html/ipv6-config5.html
-    #weird cases as :: are not included
-    raw = re.sub(r'(?i)[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}', '0:0:0:0:0:0:0:0', raw)
-    
-    return raw
-
-def checkXMLIntegrity(config={}, session=None):
-    """ Check XML dump integrity, to detect broken XML chunks """
-    return 
-    
-    print 'Verifying dump...'
-    checktitles = 0
-    checkpageopen = 0
-    checkpageclose = 0
-    checkrevisionopen = 0
-    checkrevisionclose = 0
-    for line in file('%s/%s-%s-%s.xml' % (config['path'], domain2prefix(config=config, session=session), config['date'], config['curonly'] and 'current' or 'history'), 'r').read().splitlines():
-        if "<revision>" in line:
-            checkrevisionopen += 1
-        elif "</revision>" in line:
-            checkrevisionclose += 1
-        elif "<page>" in line:
-            checkpageopen += 1
-        elif "</page>" in line:
-            checkpageclose += 1
-        elif "<title>" in line:
-            checktitles += 1
-        else:
-            continue
-    if (checktitles == checkpageopen and checktitles == checkpageclose and checkrevisionopen == checkrevisionclose):
-        pass
-    else:
-        print 'XML dump seems to be corrupted.'
-        reply = ''
-        while reply.lower() not in ['yes', 'y', 'no', 'n']:
-            reply = raw_input('Regenerate a new dump ([yes, y], [no, n])? ')
-        if reply.lower() in ['yes', 'y']:
-            generateXMLDump(config=config, titles=titles)
-        elif reply.lower() in ['no', 'n']:
-            print 'Not generating a new dump.'
-        
-
-def createNewDump(config={}, other={}):
-    titles = []
-    images = []
-    print 'Trying generating a new dump into a new directory...'
-    if config['xml']:
-        titles += getPageTitles(config=config, session=other['session'])
-        saveTitles(config=config, titles=titles)
-        generateXMLDump(config=config, titles=titles, session=other['session'])
-        checkXMLIntegrity(config=config)
-    if config['images']:
-        if config['api']:
-            images += getImageFilenamesURLAPI(config=config, session=other['session'])
-        else:
-            images += getImageFilenamesURL(config=config, session=other['session'])
-        saveImageFilenamesURL(config=config, images=images, session=other['session'])
-        generateImageDump(config=config, other=other, images=images, session=other['session'])
-    if config['logs']:
-        saveLogs(config=config, session=session)
-
-def resumePreviousDump(config={}, other={}):
-    titles = []
-    images = []
-    print 'Resuming previous dump process...'
-    if config['xml']:
-        #load titles
-        lasttitle = ''
-        try:
-            f = open('%s/%s-%s-titles.txt' % (config['path'], domain2prefix(config=config, session=other['session']), config['date']), 'r')
-            raw = unicode(f.read(), 'utf-8')
-            titles = raw.split('\n')
-            lasttitle = titles[-1]
-            if not lasttitle: #empty line at EOF ?
-                lasttitle = titles[-2]
-            f.close()
-        except:
-            pass #probably file doesnot exists
-        if lasttitle == '--END--':
-            #titles list is complete
-            print 'Title list was completed in the previous session'
-        else:
-            print 'Title list is incomplete. Reloading...'
-            #do not resume, reload, to avoid inconsistences, deleted pages or so
-            titles = getPageTitles(config=config, session=other['session'])
-            saveTitles(config=config, titles=titles)
-        #checking xml dump
-        xmliscomplete = False
-        lastxmltitle = ''
-        try:
-            f = open('%s/%s-%s-%s.xml' % (config['path'], domain2prefix(config=config, session=other['session']), config['date'], config['curonly'] and 'current' or 'history'), 'r')
-            for l in f:
-                if re.findall('</mediawiki>', l):
-                    #xml dump is complete
-                    xmliscomplete = True
-                    break
-                xmltitles = re.findall(r'<title>([^<]+)</title>', l) #weird if found more than 1, but maybe
-                if xmltitles:
-                    lastxmltitle = undoHTMLEntities(text=xmltitles[-1])
-            f.close()
-        except:
-            pass #probably file doesnot exists
-        #removing --END-- before getXMLs
-        while titles and titles[-1] in ['', '--END--']:
-            titles = titles[:-1]
-        if xmliscomplete:
-            print 'XML dump was completed in the previous session'
-        elif lastxmltitle:
-            #resuming...
-            print 'Resuming XML dump from "%s"' % (lastxmltitle)
-            generateXMLDump(config=config, titles=titles, start=lastxmltitle, session=other['session'])
-        else:
-            #corrupt? only has XML header?
-            print 'XML is corrupt? Regenerating...'
-            generateXMLDump(config=config, titles=titles, session=other['session'])
-    
-    if config['images']:
-        #load images
-        lastimage = ''
-        try:
-            f = open('%s/%s-%s-images.txt' % (config['path'], domain2prefix(config=config), config['date']), 'r')
-            raw = unicode(f.read(), 'utf-8').strip()
-            lines = raw.split('\n')
-            for l in lines:
-                if re.search(r'\t', l):
-                    images.append(l.split('\t'))
-            lastimage = lines[-1]
-            f.close()
-        except:
-            pass #probably file doesnot exists
-        if lastimage == u'--END--':
-            print 'Image list was completed in the previous session'
-        else:
-            print 'Image list is incomplete. Reloading...'
-            #do not resume, reload, to avoid inconsistences, deleted images or so
-            if config['api']:
-                images=getImageFilenamesURLAPI(config=config, session=other['session'])
-            else:
-                images = getImageFilenamesURL(config=config, session=other['session'])
-            saveImageFilenamesURL(config=config, images=images)
-        #checking images directory
-        listdir = []
-        try:
-            listdir = os.listdir('%s/images' % (config['path']))
-        except:
-            pass #probably directory does not exist
-        listdir.sort()
-        complete = True
-        lastfilename = ''
-        lastfilename2 = ''
-        c = 0
-        for filename, url, uploader in images:
-            lastfilename2 = lastfilename
-            lastfilename = filename #return always the complete filename, not the truncated
-            filename2 = filename
-            if len(filename2) > other['filenamelimit']:
-                filename2 = truncateFilename(other=other, filename=filename2)
-            if filename2 not in listdir:
-                complete = False
-                break
-            c +=1
-        print '%d images were found in the directory from a previous session' % (c)
-        if complete:
-            #image dump is complete
-            print 'Image dump was completed in the previous session'
-        else:
-            generateImageDump(config=config, other=other, images=images, start=lastfilename2, session=other['session']) # we resume from previous image, which may be corrupted (or missing .desc)  by the previous session ctrl-c or abort
-    
-    if config['logs']:
-        #fix
-        pass
-
-def saveSpecialVersion(config={}, session=None):
-    """ Save Special:Version as .html, to preserve extensions details """
-    
-    if os.path.exists('%s/Special:Version.html' % (config['path'])):
-        print 'Special:Version.html exists, do not overwrite'
-    else:
-        print 'Downloading Special:Version with extensions and other related info'
-        r = session.post(url=config['index'], data={'title': 'Special:Version'})
-        raw = r.text
-        delay(config=config, session=session)
-        raw = removeIP(raw=raw)
-        with open('%s/Special:Version.html' % (config['path']), 'w') as outfile:
-            outfile.write(raw.encode('utf-8'))
-
-def saveIndexPHP(config={}, session=None):
-    """ Save index.php as .html, to preserve license details available at the botom of the page """
-    
-    if os.path.exists('%s/index.html' % (config['path'])):
-        print 'index.html exists, do not overwrite'
-    else:
-        print 'Downloading index.php (Main Page) as index.html'
-        r = session.post(url=config['index'], data={})
-        raw = r.text
-        delay(config=config, session=session)
-        raw = removeIP(raw=raw)
-        with open('%s/index.html' % (config['path']), 'w') as outfile:
-            outfile.write(raw.encode('utf-8'))
-
-def saveSiteInfo(config={}, session=None):
-    """ Save a file with site info """
-    
-    if config['api']:
-        if os.path.exists('%s/siteinfo.json' % (config['path'])):
-            print 'siteinfo.json exists, do not overwrite'
-        else:
-            print 'Downloading site info as siteinfo.json'
-            r = session.post(url=config['api'], data = {'action': 'query', 'meta': 'siteinfo', 'format': 'json'})
-            result = json.loads(r.text)
-            delay(config=config, session=session)
-            with open('%s/siteinfo.json' % (config['path']), 'w') as outfile:
-                outfile.write(json.dumps(result, indent=4, sort_keys=True))
-
-def avoidWikimediaProjects(config={}, other={}):
-    """ Skip Wikimedia projects and redirect to the dumps website """
-    
-    #notice about wikipedia dumps
-    if re.findall(r'(?i)(wikipedia|wikisource|wiktionary|wikibooks|wikiversity|wikimedia|wikispecies|wikiquote|wikinews|wikidata|wikivoyage)\.org', config['api']+config['index']):
-        print 'PLEASE, DO NOT USE THIS SCRIPT TO DOWNLOAD WIKIMEDIA PROJECTS!'
-        print 'Download the dumps from http://dumps.wikimedia.org'
-        if not other['force']:
-            print 'Thanks!'
-            sys.exit()
-
-def getWikiEngine(url=''):
-    """ Returns the wiki engine of a URL, if known """
-    
-    session = requests.Session()
-    session.headers = {'User-Agent': getUserAgent()}
-    r = session.post(url=url)
-    result = r.text
-    
-    wikiengine = 'Unknown'
-    if re.search(ur'(?im)(<meta name="generator" content="DokuWiki)', result):
-        wikiengine = 'DokuWiki'
-    elif re.search(ur'(?im)(alt="Powered by MediaWiki"|<meta name="generator" content="MediaWiki)', result):
-        wikiengine = 'MediaWiki'
-    elif re.search(ur'(?im)(>MoinMoin Powered</a>)', result):
-        wikiengine = 'MoinMoin'
-    
-    return wikiengine
-
-def main(params=[]):
-    """ Main function """
-    
-    configfilename = 'config.txt'
-    config, other = getParameters(params=params)
-    avoidWikimediaProjects(config=config, other=other)
-    
-    print welcome()
-    print 'Analysing %s' % (config['api'] and config['api'] or config['index'])
-    
-    #creating path or resuming if desired
-    c = 2
-    originalpath = config['path'] # to avoid concat blabla-2, blabla-2-3, and so on...
-    while not other['resume'] and os.path.isdir(config['path']): #do not enter if resume is requested from begining
-        print '\nWarning!: "%s" path exists' % (config['path'])
-        reply = ''
-        while reply.lower() not in ['yes', 'y', 'no', 'n']:
-            reply = raw_input('There is a dump in "%s", probably incomplete.\nIf you choose resume, to avoid conflicts, the parameters you have chosen in the current session will be ignored\nand the parameters available in "%s/%s" will be loaded.\nDo you want to resume ([yes, y], [no, n])? ' % (config['path'], config['path'], configfilename))
-        if reply.lower() in ['yes', 'y']:
-            if not os.path.isfile('%s/%s' % (config['path'], configfilename)):
-                print 'No config file found. I can\'t resume. Aborting.'
-                sys.exit()
-            print 'You have selected: YES'
-            other['resume'] = True
-            break
-        elif reply.lower() in ['no', 'n']:
-            print 'You have selected: NO'
-            other['resume'] = False
-        config['path'] = '%s-%d' % (originalpath, c)
-        print 'Trying to use path "%s"...' % (config['path'])
-        c += 1
-
-    if other['resume']:
-        print 'Loading config file...'
-        config = loadConfig(config=config, configfilename=configfilename)
-    else:
-        os.mkdir(config['path'])
-        saveConfig(config=config, configfilename=configfilename)
-    
-    if other['resume']:
-        resumePreviousDump(config=config, other=other)
-    else:
-        createNewDump(config=config, other=other)
-
-    saveIndexPHP(config=config, session=session)
-    saveSpecialVersion(config=config, session=session)
-    saveSiteInfo(config=config, session=session)
-    bye()
+import urllib2
+import xml.etree.ElementTree as ElementTree
+
+class DumpGenerator:
+	"""
+	The main class that powers and operates everything else
+	"""
+	def __init__(self):
+		"""
+		Main constructor class for DumpGenerator, registers important variables too.
+		"""
+		self.Version = "2.0"
+		self.revision = "1"
+		# Provide a cool user-agent to hide the fact that this is a script
+		self.UserAgent = "Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0"
+		self.useAPI = False
+		self.useIndex = False
+		self.prefix = ""
+		self.domain = ""
+		self.tasklist = []
+		self.configfile = "config.json"
+		self.configoptions = {
+			"date": "",
+			"useAPI": False,
+			"useIndex": False,
+			"urltoapi": "",
+			"urltoindex": "",
+			"images": False,
+			"logs": False,
+			"xml": False,
+			"curonly": False,
+			"exnamespaces": "",
+			"titlesonly": False
+		}
+
+		# Basic metadata
+		self.date = datetime.datetime.now().strftime('%Y%m%d')
+
+		# Important URLs
+		self.urltoapi = ""
+		self.urltoindex = ""
+
+		# Type of dump to generate
+		self.images = False
+		self.logs = False
+		self.xml = False
+
+		# Resuming of previous dump
+		self.resume = False
+		self.path = ""
+
+		# Additional information for XML
+		self.curonly = False
+		self.exnamespaces = ""
+		self.titlesonly = False
+		self.titles = ""
+
+		# Others
+		self.cookies = ""
+		self.delay = 0
+		self.debugmode = False
+		self.nolog = False
+		self.autonomous = False
+
+		# Short options: string (no commas), long options: array
+		# More information about these options are at self.help()
+		self.shortoptions = "hv"
+		self.longoptions = [ "help", "api=", "index=", "curonly", "images", "logs", "xml", "auto", "delay=", "cookies=", "exnamespaces=", "resume", "path=", "debug", "nolog", "titlesonly", "titles=" ]
+
+	def bye(self):
+		"""
+		Bid farewell to the user at the very end of the script when everything 
+		has been successful.
+
+		Returns: Goodbye message.
+		"""
+		message = """---> Congratulations! Your dump is complete <---
+If you have suggestions, file a new issue here (Google account required): http://code.google.com/p/wikiteam/issues/list
+If this is a public wiki, do consider publishing this dump so others can benefit from it. Follow the steps as explained in http://code.google.com/p/wikiteam/wiki/NewTutorial#Publishing_the_dump or contact us at http://code.google.com/p/wikiteam.
+Thank you for using DumpGenerator %s by WikiTeam, good bye!""" % ( self.Version )
+		return message
+
+	def checkAPI(self):
+		"""
+		Checks the validity of the api.php.
+		"""
+		query = {
+			"meta": "siteinfo",
+			"siprop": "general" }
+		sitestats = json.loads( RequestAPI.query( query ) )
+		try:
+			if ( sitestats[ "query" ][ "general" ][ "server" ] in self.urltoapi ):
+				return True
+		except:
+			try:
+				if ( sitestats[ "error" ][ "code" ] == "readapidenied" ) and ( self.cookies == "" ):
+					Output.warn( "The wiki is private and we do not have proper authentication information!" )
+					return False
+			except:
+				Output.warn( "This api.php seems weird or is not valid." )
+				return False
+
+	def checkIndex(self):
+		"""
+		Checks the validity of the index.php.
+		"""
+		# TODO: Screen scraping is involved here, need backward compact for older version of MediaWiki.
+		parameters = { "title": "Special:Version" }
+		request = RequestIndex.query( parameters )
+		# Since we are at Special:Version, we should not be getting Special:BadTitle unless we are not logged in
+		if ( re.search( r'(Special:Badtitle</a>)', request ) ) and ( self.cookies == "" ):
+			Output.error( "The wiki is private and we do not have proper authentication information!" )
+			sys.exit(1)
+
+		# Check for some tags within the Special:Version page, must be language-independent
+		if ( re.search( r'(<h2 id="mw-version-license">|meta name="generator" content="MediaWiki)', request ) ):
+			return True
+
+	def debug(self):
+		"""
+		A temporary debug mode for testing purposes.
+		REMOVE WHEN COMPLETE!
+		"""
+		print "DEBUG MODE ON"
+		print "Date: %s" % (self.date)
+		print "URL to api.php: %s" % (self.urltoapi)
+		print "URL to index.php: %s" % (self.urltoindex)
+		print "Current revision only: %s" % (self.curonly)
+		print "Image dump: %s" % (self.images)
+		print "Log dump: %s" % (self.logs)
+		print "XML dump: %s" % (self.xml)
+		print "Resume: %s" % (self.resume)
+		print "Path for resuming: %s" % (self.path)
+		print "Delay: %s" % (self.delay)
+		print "Cookies file: %s" % (self.cookies)
+		print "Excluded namespaces: %s" % (self.exnamespaces)
+		print "Debug mode on: %s" % (self.debugmode)
+		self.tasklist = sorted( self.tasklist )
+		for task in self.tasklist:
+			if ( task == "axml" ):
+				DumpXML.run()
+			elif ( task == "bimages" ):
+				DumpImages.run()
+			elif ( task == "clogs" ):
+				DumpLogs.run()
+		sys.exit(0)
+
+	def downloadHtmlPages(self):
+		"""
+		Downloads the HTML pages such as the main page and Special:Version.
+		"""
+		# Download the main page
+		Output.message( "Downloading index.php (Main Page) as index.html." )
+		query = {}
+		index = RequestIndex.query( query )
+		index = RequestIndex.removeIP( index )
+		if ( os.path.exists( "Special:Version.html" ) ):
+			os.remove( "index.html" )
+		else:
+			pass
+		for line in index:
+			Output.appendToFile( "index.html", line )
+
+		# Download Special:Version or its respective localized version
+		Output.message( "Downloading Special:Version with extensions and other related info." )
+		query = { "title": "Special:Version" }
+		SpecialVersion = RequestIndex.query( query )
+		SpecialVersion = RequestIndex.removeIP( SpecialVersion )
+		if ( os.path.exists( "Special:Version.html" ) ):
+			os.remove( "Special:Version.html" )
+		else:
+			pass
+		for line in SpecialVersion:
+			Output.appendToFile( "Special:Version.html", line )
+
+	def fixHTMLEntities(self, text):
+		"""
+		Convert some HTML entities to their regular characters.
+		"""
+		text = re.sub('&lt;', '<', text)
+		text = re.sub('&gt;', '>', text)
+		text = re.sub('&amp;', '&', text)
+		text = re.sub('&quot;', '"', text)
+		text = re.sub('&#039;', '\'', text)
+		return text
+
+	def help(self):
+		"""
+		Provides vital help information to the user. This function 
+		directly uses the "print" function because it is harmless and 
+		what needs to be logged has already been done so.
+
+		Returns: Help message text
+		"""
+		message = """DumpGenerator %s, a script to generate backups of MediaWiki wikis.
+For more information, please see: http://code.google.com/p/wikiteam/wiki/NewTutorial
+
+Startup:
+  -h, --help         Displays this help information and exits.
+  -v, --version	     Displays the version of this script, with additional credits.
+
+Wiki information:
+  --api=URL          The URL to the wiki's api.php, not to be used with --index.
+  --index=URL        The URL to the wiki's index.php, not to be used with --api.
+
+Options:
+  --xml	             Creates an XML dump.
+  --images           Creates an image dump.
+  --logs             Creates a dump of all log pages (not yet supported).
+
+XML dump (only if --xml is used):
+  --curonly          Download only the current revision.
+  --exnamespaces     The unique system number(s) for namespaces to exclude, separated by commas.
+  --titlesonly       Download only the page titles without the actual content.
+  --titles           Path to a file containing list of titles, requires "--END--" to be on the last line.
+
+Other:
+  --auto             Enable auto pilot mode (select options that ensures that the script creates a new dump).
+  --resume           Resume an incomplete dump (requires --path to be given).
+  --path=PATH        Path to the incomplete dump.
+  --delay=SECONDS    Adds a delay (in seconds) between requests.
+  --cookies=PATH     Path to a Mozilla cookies.txt file for authentication cookies.
+  --nolog            Disable logging to dumpgenerator.log (does not affect output in terminal).
+
+Report any issues to our issue tracker: https://code.google.com/p/wikiteam.""" % (self.Version)
+		return message
+
+	def loadConfig(self):
+		"""
+		Load a config file from a partially-made dump.
+		"""
+		config = json.loads( self.configfile )
+		self.date = config[ "date" ]
+		self.useAPI = config[ "useAPI" ]
+		self.useIndex = config[ "useIndex" ]
+		self.urltoapi = config[ "urltoapi" ]
+		self.urltoindex = config[ "urltoindex" ]
+		self.images = config[ "images" ]
+		self.logs = config[ "logs" ]
+		self.xml = config[ "xml" ]
+		self.curonly = config[ "curonly" ]
+		self.exnamespaces = config[ "exnamespaces" ]
+		self.titlesonly = config[ "titlesonly" ]
+
+		if ( self.images == True ):
+			self.tasklist.append( "bimage" )
+		if ( self.logs == True ):
+			self.tasklist.append( "clogs" )
+		if ( self.xml == True ):
+			self.tasklist.append( "axml" )
+
+		if ( self.useAPI == True ):
+			domain = self.urltoapi
+		elif ( self.useIndex == True ):
+			domain = self.urltoindex
+
+	def makePrefix(self, domain):
+		"""
+		Converts a domain to a prefix.
+
+		Inputs:
+		 - domain: The domain to change, may contain api.php or index.php as suffix.
+
+		Returns:
+		 - string with slashes and stray characters changed to underscores, suffix 
+		   removed and URL protocol removed.
+		"""
+		domain = domain.lower()
+		# Remove unnecessary prefixes and suffixes
+		domain = re.sub(r'(https?://|www\.|/index\.php|/api\.php)', '', domain)
+		# Substitute directory slashes with underscores
+		domain = re.sub(r'/', '_', domain)
+		# Convert any stray character that is not in the alphabet to underscores
+		domain = re.sub(r'[^-.A-Za-z0-9]', '_', domain)
+		return domain
+
+	def makeNiceURL(self, domain):
+		"""
+		Converts a domain to a more human-readable format (used for uploading).
+
+		Inputs:
+		 - domain: The domain to change, may contain api.php or index.php as suffix.
+
+		Returns:
+		 - string with suffix removed.
+		"""
+		domain = domain.lower()
+		# Remove the suffixes
+		domain = re.sub(r'(/index\.php|/api\.php)', '', domain)
+		return domain
+
+	def processargs(self):
+		"""
+		Processing arguments and options provided by the user.
+		"""
+		try:
+			options, answers = getopt.getopt( sys.argv[1:], self.shortoptions, self.longoptions )
+		except getopt.GetoptError:
+			Output.error( "An unknown option has been specified, please check your arguments before re-running!" )
+			sys.exit(1)
+
+		# First accept all arguments and store them in a variable
+		for option, answer in options:
+			# Startup
+			if ( option in ( "-h", "--help" ) ):
+				# Display the help guide and exit
+				print self.help()
+				os.remove( Output.logfile )
+				sys.exit(0)
+			elif ( option in ( "-v", "--version" ) ):
+				# Display the version of this script
+				print self.version()
+				os.remove( Output.logfile )
+				sys.exit(0)
+
+			# Wiki information
+			elif ( option in "--api" ):
+				self.urltoapi = answer
+				self.configoptions[ "urltoapi" ] = self.urltoapi
+			elif ( option in "--index" ):
+				self.urltoindex = answer
+				self.configoptions[ "urltoindex" ] = self.urltoindex
+
+			# Dump options
+			elif ( option == "--images" ):
+				self.images = True
+				self.configoptions[ "images" ] = True
+				self.tasklist.append( "bimages" )
+			elif ( option == "--logs" ):
+				self.logs = True
+				self.configoptions[ "logs" ] = True
+				self.tasklist.append( "clogs" )
+			elif ( option == "--xml" ):
+				self.xml = True
+				self.configoptions[ "xml" ] = True
+				self.tasklist.append( "axml" )
+
+			# XML dump options
+			elif ( option == "--curonly" ):
+				self.curonly = True
+				self.configoptions[ "curonly" ] = True
+			elif ( option in "--exnamespaces" ):
+				self.exnamespaces = answer
+				self.configoptions[ "exnamespaces" ] = self.exnamespaces
+			elif ( option == "--titlesonly" ):
+				self.titlesonly = True
+				self.configoptions[ "titlesonly" ] = True
+			elif ( option in "--titles" ):
+				self.titles = os.path.abspath( answer )
+
+			# Other options
+			elif ( option == "--auto" ):
+				self.autonomous = True
+			elif ( option in "--cookies" ):
+				self.cookies = answer
+			elif ( option in "--delay" ):
+				self.delay = answer
+			elif ( option == "--nolog" ):
+				self.nolog = True
+			elif ( option in "--path" ):
+				self.path = answer
+			elif ( option == "--resume" ):
+				self.resume = True
+
+			# Private options (i.e. usable but not documented in --help)
+			elif ( option == "--debug" ):
+				self.debugmode = True
+			else:
+				Output.error( "An unknown option has been specified, please check your arguments before re-running!" )
+				sys.exit(1)
+
+		# Now to verify that the user is not messing around
+		if ( self.urltoapi == "" and self.urltoindex == "" ):
+			# User did not specify either --api= or --index=
+			if ( self.resume == True and self.path != "" ):
+				# ...but specified --resume and --path= accordingly
+				self.resumeDump()
+			elif ( self.resume == True and self.path == "" ):
+				# ...and specified --resume without --path=
+				Output.error( "--resume was provided, but you still need to tell me the path to the incomplete dump!" )
+				sys.exit(1)
+			else:
+				Output.error( "You need to tell me the URL to either the api.php or to index.php!" )
+				sys.exit(1)
+		elif ( self.resume == True ) and ( self.path == "" ):
+			# User specified --resume, but no --path= was given
+			Output.error( "--resume was provided, but you still need to tell me the path to the incomplete dump!" )
+			sys.exit(1)
+		elif ( self.urltoapi != "" and self.urltoindex != "" ):
+			# User specified both --api= and --index=
+			self.useAPI = True
+		elif ( self.xml == False and ( self.curonly == True or self.exnamespaces != "" ) ):
+			# User specified --curonly and --exnamespaces without --xml
+			Output.error( "You did not specify to make an XML dump using --xml, so why write --curonly or --exnamespaces? Remove them before re-running!" )
+			sys.exit(1)
+
+		if ( self.urltoapi != "" ):
+			self.useAPI = True
+		elif ( self.urltoindex != "" ):
+			self.useIndex = True
+
+		if ( self.useAPI == True ):
+			Output.message( "Checking api.php..." )
+			if not ( self.urltoapi.startswith( "http://" ) ) and not ( self.urltoapi.startswith( "https://" ) ):
+				Output.error( "The URL to api.php must start with either http:// or https://!" )
+				sys.exit(1)
+			elif ( self.checkAPI() ):
+				Output.message( "api.php is okay" )
+			else:
+				Output.error( "There is an error with api.php, please provide a correct path to it." )
+				sys.exit(1)
+		elif ( self.useIndex == True ):
+			Output.message( "Checking index.php..." )
+			if not ( self.urltoindex.startswith( "http://" ) ) and not ( self.urltoindex.startswith( "https://" ) ):
+				Output.error( "The URL to index.php must start with either http:// or https://!" )
+				sys.exit(1)
+			elif ( self.checkIndex() ):
+				Output.message( "index.php is okay" )
+			else:
+				Output.error( "There is an error with index.php, please provide a correct path to it." )
+				sys.exit(1)
+
+	def resumeDump(self):
+		"""
+		Resume an incomplete dump defined in self.path.
+		"""
+		# TODO: Add support for resuming dumps.
+		os.chdir( self.path )
+		self.loadConfig()
+		self.prefix = "%s-%s" % ( self.makePrefix( domain ), self.date )
+		self.domain = self.makeNiceURL( domain )
+		if ( self.useAPI == True ):
+			self.urltoindex = "%s/index.php" % ( self.domain )
+		self.tasklist = sorted( self.tasklist )
+		for task in self.tasklist:
+			if ( task == "axml" ):
+				DumpXML.run()
+			elif ( task == "bimages" ):
+				DumpImages.run()
+			elif ( task == "clogs" ):
+				DumpLogs.run()
+
+	def run(self):
+		"""
+		Run the whole script itself and excute important functions.
+		"""
+		print self.welcome()
+		Updater.checkRevision()
+		# Check if previously there was a log file in the working directory and remove it if exists
+		# This is followed by the equivalent of "touch" in Unix to create an empty file
+		if ( os.path.exists( Output.logfile ) ):
+			os.remove( Output.logfile )
+			open( Output.logfile, "a" ).close()
+		else:
+			open( Output.logfile, "a" ).close()
+		self.processargs()
+		if ( DumpGenerator.nolog or DumpGenerator.debugmode):
+			# Remove the dumpgenerator.log file
+			os.remove( Output.logfile )
+		if ( self.useAPI == True ):
+			domain = self.urltoapi
+		elif ( self.useIndex == True ):
+			domain = self.urltoindex
+		directories = os.walk( "." ).next()[1]
+		for directory in directories:
+			# Check if there is a dump that already exists in the current working directory
+			if ( directory.startswith( self.makePrefix( domain ) ) and directory.endswith( "-wikidump" ) ):
+				print "" # Create a blank line
+				Output.warn( "There seems to be a similar dump at %s which might be incomplete." % ( directory ) )
+				if ( self.autonomous == True ):
+					Output.message( "Since auto pilot mode is enabled, that dump will not be resumed." )
+					self.resume = False
+				else:
+					Output.warn( "Do you wish to resume using configuration from that dump? [yes, y], [no, n]" )
+					reply = ""
+					while reply.lower() not in [ "yes", "y", "no", "n" ]:
+						reply = raw_input( "Answer: " )
+					if ( reply.lower() in [ "yes", "y" ] ):
+						if not ( os.path.isfile( "%s/%s" % ( directory, self.configfile ) ) ):
+							Output.error( "I cannot find a %s in the directory! Please delete that directory before re-running!" % ( self.configfile ) )
+							sys.exit(1)
+						else:
+							Output.warn( "Resuming dump and ignoring configuration given in this session..." )
+							self.resume = True
+							self.path = directory
+							break
+					elif ( reply.lower() in [ "no", "n" ] ):
+						Output.message( "Not resuming..." )
+						self.resume = False
+			else:
+				continue
+		if ( self.resume == True ):
+			self.resumeDump()
+		else:
+			self.prefix = "%s-%s" % ( self.makePrefix( domain ), self.date )
+			self.domain = self.makeNiceURL( domain )
+			workingdir = "%s-wikidump" % ( self.prefix )
+			if ( os.path.exists( workingdir ) ):
+				if ( self.autonomous == True ):
+					Output.message( "Since auto pilot mode is enabled, the directory with the same name will be deleted." )
+					reply = "yes"
+				else:
+					Output.warn( "\nThere seems to be a directory with the same name, delete the old one? [yes, y], [no, n]" )
+					reply = ""
+				while reply.lower() not in [ "yes", "y", "no", "n" ]:
+					reply = raw_input( "Answer: " )
+				if ( reply.lower() in [ "yes", "y" ] ):
+					try:
+						shutil.rmtree( workingdir )
+					except:
+						Output.error( "There was a problem deleting the directory, please manually delete it before re-running!" )
+						sys.exit(1)
+					print "" # Create a blank line
+				elif ( reply.lower() in [ "no", "n" ] ):
+					Output.error( "Existing directory exists, either delete that directory or rename it before re-running!" )
+					sys.exit(1)
+			else:
+				pass
+			Output.message( "Generating a new dump into a new directory..." )
+			os.mkdir( workingdir )
+			os.rename( Output.logfile, "%s/%s" % ( workingdir, Output.logfile ) )
+			os.chdir( workingdir )
+			self.saveConfig()
+			# Guess the URL to index.php
+			if ( self.useAPI == True ):
+				self.urltoindex = "%s/index.php" % ( self.domain )
+			if ( self.debugmode == True ):
+				self.debug()
+			else:
+				# Run every single task that we are assigned to do in order: xml, images, logs
+				# The "a", "b" and "c" prefix is just to force the order.
+				self.tasklist = sorted( self.tasklist )
+				if ( self.tasklist == [] ):
+					Output.error( "You did not tell me what dump to create!" )
+				else:
+					for task in self.tasklist:
+						if ( task == "axml" ):
+							DumpXML.run()
+						elif ( task == "bimages" ):
+							DumpImages.run()
+						elif ( task == "clogs" ):
+							DumpLogs.run()
+					self.downloadHtmlPages()
+					print self.bye()
+
+	def saveConfig(self):
+		"""
+		Save the configuration settings provided.
+		"""
+		self.configoptions[ "date" ] = self.date
+		output = open( self.configfile, "w" )
+		json.dump( self.configoptions, output, indent=4 )
+
+	def version(self):
+		"""
+		Displays the version information and credits of the script.
+
+		Returns: Version information and credits
+		"""
+		message = """DumpGenerator %s by WikiTeam
+
+Copyright (C) 2013 Hydriz Scholz
+Copyright (C) 2014 WikiTeam
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program. If not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA, or visit
+<http://www.gnu.org/copyleft/gpl.html>
+""" % (self.Version)
+		return message
+
+	def welcome(self):
+		"""
+		Welcomes the user at the very beginning of the script running process.
+
+		Returns: Welcome message.
+		"""
+		message = """########## Welcome to DumpGenerator %s by WikiTeam ##########\n""" % (self.Version)
+		return message
+
+class DumpImages:
+	"""
+	The class for generating an image dump.
+	"""
+	def __init__(self):
+		"""
+		The constructor function.
+		"""
+		self.files = []
+
+	def dumpImages(self):
+		"""
+		Download all the images on the wiki with their corresponding XML.
+		"""
+		if ( DumpGenerator.useAPI == True ):
+			self.getFileListAPI()
+		else:
+			self.getFileListIndex()
+		filecount = 0
+		if ( self.files == [] ):
+			pass
+		else:
+			Output.message( "Downloading files and their descriptions into \"images\" directory..." )
+			for media in self.files:
+				time.sleep( DumpGenerator.delay ) # Delay between requests
+				urllib.urlretrieve( media[ "url" ], "images/%s" % (media[ "name" ] ) )
+				title = DumpGenerator.fixHTMLEntities( media[ "title" ].encode( "utf-8" ) )
+				contentsfile = DumpXML.getXMLPage( title, siteinfo=True )
+				destfile = "images/%s.xml" % ( media[ "name" ] )
+				shutil.move( contentsfile, destfile )
+				Output.appendToFile( destfile, "</mediawiki>\n" )
+				filecount += 1
+				if ( filecount % 10 == 0 ):
+					# Give the user a regular status report so that it does not look stuck
+					Output.message( "    Downloaded %d files." % ( filecount ) )
+			if ( filecount == 1 ):
+				Output.message( "Downloaded 1 file." % ( filecount ) )
+			else:
+				Output.message( "Downloaded %d files." % ( filecount ) )
+
+	def getFileListAPI(self):
+		"""
+		Download the list of files on the wiki via the API.
+		"""
+		files = []
+		dumpfile = "%s-images.txt" % ( DumpGenerator.prefix )
+		filecount = 0
+		Output.message( "Getting list of files on the wiki..." )
+		aifrom = "!" # Very first page of a wiki
+		while aifrom:
+			sys.stderr.write('.') # Tell the user that downloading is in progress
+			query = {
+				"list": "allimages",
+				"aifrom": aifrom,
+				"ailimit": 500 } # The default limit for anonymous users of the API is 500 pages per request
+			time.sleep( DumpGenerator.delay ) # Delay between requests
+			filesmeta = json.loads( RequestAPI.query( query ) )
+			# Store what the server tells us to continue from
+			try:
+				serveraifrom = filesmeta[ "query-continue" ][ "allimages" ][ "aicontinue" ]
+				aifrom = DumpGenerator.fixHTMLEntities( serveraifrom )
+			except:
+				# Reached the end of having to keep continuing, exit the while condition
+				aifrom = ""
+			# TODO: On a wiki with a lot of files, this can cause huge memory problems
+			files.extend( filesmeta[ "query" ][ "allimages" ] )
+			for media in filesmeta[ "query" ][ "allimages" ]:
+				outputline = "%s\t%s\n" % ( media[ "title" ], media[ "url" ] )
+				Output.appendToFile( dumpfile, outputline )
+			# Add to namespace page count
+			filecount += len( files )
+		Output.appendToFile( dumpfile, "--END--" )
+		if ( filecount == 1 ):
+			Output.message( "    Got 1 file" )
+		else:
+			Output.message( "    Got %d files" % ( filecount ) )
+
+		if ( filecount == 0 ):
+			Output.warn( "There are no files on the wiki to download!" )
+		else:
+			Output.message( "File names and URLs saved at %s." % ( dumpfile ) )
+		self.files = files
+
+	def getFileListIndex(self):
+		"""
+		Download the list of files on the wiki via index.php.
+		"""
+		# TODO: Add code here
+
+	def run(self):
+		"""
+		Execute the process of producing an image dump.
+		"""
+		if ( os.path.isdir( "images" ) ):
+			time.sleep(0)
+		else:
+			os.mkdir( "images" )
+		self.dumpImages()
+
+class DumpLogs:
+	"""
+	The class for generating a log pages dump (pages in Special:Log).
+	"""
+	def __init__(self):
+		"""
+		The constructor function.
+		"""
+
+	def run(self):
+		"""
+		Execute the process of producing a log pages dump.
+		"""
+		# TODO: Support downloading of log pages
+		Output.warn( "Sorry, downloading of log pages are not yet supported!" )
+
+class DumpXML:
+	"""
+	The class for generating an XML dump.
+	"""
+	def __init__(self):
+		"""
+		The constructor function.
+		"""
+		self.lennamespaces = 0
+		self.namespaces = {}
+		self.pagetitles = []
+		self.titlesdumpfile = ""
+		self.dumpretrycount = 0
+
+	def dumpPageTitlesAPI(self):
+		"""
+		Get a list of page titles and outputs it to a file.
+		"""
+		self.getNamespacesAPI()
+		self.getPageTitlesAPI()
+		Output.message( "Saving list of page titles..." )
+		Output.appendToFile( self.titlesdumpfile, "--END--" )
+		Output.message( "List of page titles saved at %s." % ( self.titlesdumpfile ) )
+
+	def dumpXML(self):
+		"""
+		Get the whole wiki in an XML file.
+		"""
+		Output.message( "Downloading the XML of every page..." )
+		if ( DumpGenerator.curonly == True ):
+			dumpfile = "%s-curonly.xml" % ( DumpGenerator.prefix )
+		else:
+			dumpfile = "%s-history.xml" % ( DumpGenerator.prefix )
+		pagecount = 0
+		# To reduce memory usage, we are storing the title into memory only when we need it
+		for title in file( self.titlesdumpfile, "r" ).read().splitlines():
+			pagecount += 1
+			numberofedits = 0
+			# Add the initial siteinfo and header tags for the first page
+			if ( pagecount == 1 ):
+				contentsfile = self.getXMLPage( title, siteinfo=True )
+				contents = file( contentsfile, "r" ).readlines()
+				open( dumpfile, "a" ).close() # "touch" the file
+				os.remove( contentsfile )
+			elif ( title == "--END--" ):
+				contents = [ "</mediawiki>\n" ]
+			else:
+				contentsfile = self.getXMLPage( title )
+				contents = file( contentsfile, "r" ).readlines()
+				os.remove( contentsfile )
+
+			for content in contents:
+				# Count the number of occurrences of "<timestamp>" to determine number of revisions
+				if ( "<timestamp>" in content ):
+					numberofedits += 1
+				Output.appendToFile( dumpfile, content )
+			if ( title == "--END--" ):
+				pass
+			else:
+				if ( numberofedits == 1 ):
+					Output.message( "    %s, 1 edit" % ( title ) )
+				else:
+					Output.message( "    %s, %s edits" % ( title, numberofedits ) )
+			if ( pagecount % 10 == 0 ):
+				Output.message( "Downloaded %d pages" % ( pagecount ) )
+		Output.message( "XML dump saved at %s." % ( dumpfile ) )
+		self.integrityCheck( dumpfile )
+
+	def getNamespacesAPI(self):
+		"""
+		Download the list of namespaces with their names and IDs
+		via the API.
+		"""
+		query = {
+			"meta": "siteinfo",
+			"siprop": "namespaces" }
+		namespacedetails = json.loads( RequestAPI.query( query ) )
+		namespacenums = namespacedetails[ "query" ][ "namespaces" ].keys()
+		# Remove the system namespaces ("Media" and "Special")
+		namespacenums.remove( "-2" )
+		namespacenums.remove( "-1" )
+		namespaces = {}
+		for namespacenum in namespacenums:
+			namespacename = namespacedetails[ "query" ][ "namespaces" ][ namespacenum ][ "*" ]
+			namespaces[ namespacenum ] = namespacename
+		self.lennamespaces = len( list( namespacenums ) )
+		Output.message( "%d namespaces found." % ( self.lennamespaces ) )
+		self.namespaces = namespaces
+
+	def getPageTitlesAPI(self):
+		"""
+		Grab a list of page titles in each namespace via the API.
+		
+		There are leading spaces in the outputs so as to make things neater on the terminal.
+		"""
+		titles = []
+		self.titlesdumpfile = "%s-titles.txt" % ( DumpGenerator.prefix )
+		totalpagecount = 0
+		for namespace in self.namespaces:
+			if namespace in DumpGenerator.exnamespaces:
+				Output.warn( "    Skipping namespace %s" % (namespace) )
+			else:
+				pagecount = 0
+				Output.message( "    Getting titles in namespace %s" % (namespace) )
+				apfrom = "!" # Very first page of a wiki
+				while apfrom:
+					sys.stderr.write( "." ) # Tell the user that downloading is in progress
+					query = {
+						"list": "allpages",
+						"apnamespace": namespace,
+						"apfrom": apfrom,
+						"aplimit": 500 } # The default limit for anonymous users of the API is 500 pages per request
+					time.sleep( DumpGenerator.delay ) # Delay between requests
+					pagetitles = json.loads( RequestAPI.query( query ) )
+					# Store what the server tells us to continue from
+					try:
+						serverapfrom = pagetitles[ "query-continue" ][ "allpages" ][ "apcontinue" ]
+						apfrom = DumpGenerator.fixHTMLEntities( serverapfrom )
+					except:
+						try:
+							serverapfrom = pagetitles[ "query-continue" ][ "allpages" ][ "apfrom" ]
+							apfrom = DumpGenerator.fixHTMLEntities( serverapfrom )
+						except:
+							# Reached the end of having to keep continuing, exit the while condition
+							apfrom = ""
+					pages = pagetitles[ "query" ][ "allpages" ]
+					# Add to namespace page count
+					pagecount += len( pages )
+					for page in pages:
+						title = "%s\n" % ( page[ "title" ] )
+						Output.appendToFile( self.titlesdumpfile, title )
+				if ( pagecount == 1 ):
+					Output.message( "    Got 1 page title in namespace %s" % ( namespace ) )
+				else:
+					Output.message( "    Got %d page titles in namespace %s" % ( pagecount, namespace ) )
+				# Add to total page count
+				totalpagecount += pagecount
+		if ( totalpagecount == 1 ):
+			Output.message( "Got 1 page title in total." % ( totalpagecount ) )
+		else:
+			Output.message( "Got %d page titles in total." % ( totalpagecount ) )
+
+	def getXMLPage(self, page, siteinfo=False):
+		"""
+		Get the XML of one page.
+		
+		Input:
+		 - page: The title of the page to download.
+		 - siteinfo: Whether to include the siteinfo header in the XML.
+		"""
+		parameters = {
+			"title": "Special:Export",
+			"pages": page,
+			"action": "submit" }
+		if ( DumpGenerator.curonly == True ):
+			parameters[ "curonly" ] = 1
+			parameters[ "limit" ] = 1
+		else:
+			# Make the wiki download the actual full history
+			parameters["history"] = "1"
+		# TODO: Can cause memory problems if the page has a huge history
+		result = RequestIndex.query( parameters )
+		pagehash = hashlib.sha256( page ).hexdigest()[:8]
+		tempfile = "%s.xml.tmp" % ( pagehash )
+		tempfile2 = "%s.xml" % ( pagehash )
+		Output.appendToFile( tempfile, result )
+		result = "" # Free up memory
+		# Warning: The following is NOT compatible with MediaWiki XML Schema Description version 0.3 and below!
+		# See http://wikiteam.googlecode.com/svn/trunk/schema/README.md for more information about MediaWiki versions 
+		# this will affect and ways to overcome it.
+		if ( siteinfo == False ):
+			linecount = 0
+			# The 11 comes from lines like <siteinfo>, "special" namespaces and the very first line
+			# TODO: Hacky way of removing the siteinfo, check for backward compatibility!
+			linestoskip = 11 + self.lennamespaces
+			for line in open( tempfile, "r" ).read().splitlines():
+				linecount += 1
+				if linecount > linestoskip:
+					if ( "</mediawiki>" in line ):
+						pass
+					else:
+						line = "%s\n" % ( line )
+						Output.appendToFile( tempfile2, line )
+				else:
+					continue
+		else:
+			for line in open( tempfile, "r" ).read().splitlines():
+				if ( "</mediawiki>" in line ):
+					pass
+				else:
+					line = "%s\n" % ( line )
+					Output.appendToFile( tempfile2, line )
+		os.remove( tempfile )
+		return tempfile2
+
+	def integrityCheck(self, dumpfile):
+		"""
+		Checks the integrity of the XML dump and ensures that it is not corrupted.
+		"""
+		Output.message( "Checking the integrity of the XML dump..." )
+		checktitles = 0
+		checkpageopen = 0
+		checkpageclose = 0
+		checkrevisionopen = 0
+		checkrevisionclose = 0
+		# Check the number of instances of the following tags
+		# By logic they should be the same number
+		for line in file( dumpfile, "r" ).read().splitlines():
+			if "<title>" in line:
+				checktitles += 1
+			elif "<page>" in line:
+				checkpageopen += 1
+			elif "</page>" in line:
+				checkpageclose += 1
+			elif "<revision>" in line:
+				checkrevisionopen += 1
+			elif "</revision>" in line:
+				checkrevisionclose += 1
+			else:
+				continue
+
+		if ( checktitles == checkpageopen and checktitles == checkpageclose and checkrevisionopen == checkrevisionclose ):
+			Output.message( "Excellent, the XML dump is not corrupted." )
+		else:
+			Output.warn( "WARNING: XML dump seems to be corrupted." )
+			if ( DumpGenerator.autonomous == True ):
+				reply = "yes"
+			else:
+				reply = ""
+			while reply.lower() not in [ "yes", "y", "no", "n" ]:
+				reply = raw_input( 'Regenerate a new dump ([yes, y], [no, n])? ' )
+			if reply.lower() in [ "yes", "y" ]:
+				self.dumpretrycount += 1
+				if ( self.dumpretrycount < 3 ):
+					Output.warn( "Generating a new dump..." )
+					os.remove( dumpfile )
+					self.dumpXML()
+				else:
+					Output.warn( "We have tried dumping the wiki 3 times, but the dump is still corrupted. Not going to carry on since it is probably a problem on the wiki." )
+					# Encourage the user to tell us about this faulty wiki
+					print "Please tell us about this by reporting an issue here: https://code.google.com/p/wikiteam/issues/list. Thank you!"
+					print "Giving you a little time to see this message..."
+					time.sleep(3) # Give time for the user to see the message
+			elif reply.lower() in [ "no", "n" ]:
+				Output.warn( "Not generating a new dump. Note: Your dump is corrupted and might not work with MediaWiki!" )
+
+	def run(self):
+		"""
+		Execute the process of producing an XML dump.
+		"""
+		if ( DumpGenerator.useAPI == True ):
+			if ( DumpGenerator.titlesonly == True ):
+				self.dumpPageTitlesAPI()
+			else:
+				if ( DumpGenerator.titles != "" ):
+					Output.message( "Using the list of page titles provided at %s." % ( DumpGenerator.titles ) )
+					self.titlesdumpfile = DumpGenerator.titles
+				else:
+					self.dumpPageTitlesAPI()
+				self.dumpXML()
+		else:
+			if ( DumpGenerator.titlesonly == True ):
+				self.dumpPageTitlesIndex()
+			else:
+				if ( DumpGenerator.titles != "" ):
+					self.titlesdumpfile = DumpGenerator.titles
+				else:
+					self.dumpPageTitlesIndex()
+				self.dumpXML()
+
+class Output:
+	"""
+	The class to output anything to the user or to a place not within the script.
+
+	For doing outputs to user:
+		This is used instead of directly using the "print" function is because 
+		this is intended to log everything that is told to the user, so that it 
+		is possible to check when and where things went wrong.
+
+	For doing outputs to elsewhere:
+		This is to reduce memory usage by storing large chunks of data into disk 
+		and reducing the risk of getting a MemoryError.
+	"""
+	def __init__(self):
+		self.logfile = "dumpgenerator.log"
+
+	# Output to disk
+	def appendToFile(self, outputfile, contents):
+		"""
+		Output contents to file.
+
+		Inputs:
+		 - outputfile: The file to output to.
+		 - contents: The content to add for each line.
+		"""
+		if ( os.path.exists( outputfile ) == False ):
+			open( outputfile, "a" ).close() # "touch" the file
+		else:
+			pass
+		thefile = open( outputfile, "a" )
+		try:
+			contents = contents.encode( "utf-8", "ignore" )
+		# TODO: During a test phase, this error kept coming up, though the final output was no different from
+		# what was produced using dumpBackup.php and using Special:Export itself.
+		except UnicodeDecodeError:
+			pass
+		thefile.write( contents )
+		thefile.close()
+
+	# Output to user
+	def error(self, message):
+		print message
+		print "Write --help for more information."
+		self.log( "An error occurred: %s" % (message) )
+
+	def log(self, message):
+		if ( DumpGenerator.nolog or DumpGenerator.debugmode):
+			# Skip logging
+			time.sleep(0)
+		else:
+			timestamp = datetime.datetime.fromtimestamp( time.time() ).strftime( "%Y-%m-%d %H:%M:%S" )
+			logline = "%s: %s\n" % (timestamp, message)
+			self.appendToFile( self.logfile, logline )
+
+	def message(self, message):
+		print message
+		self.log( "Told the user: %s" % (message) )
+
+	def warn(self, message):
+		print message
+		self.log( "Warned the user: %s" % (message) )
+
+class RequestAPI:
+	"""
+	The RequestAPI class, to submit APi request calls to the server.
+	"""
+	def __init__(self):
+		"""
+		The constructor function.
+		"""
+
+	def query(self, params, url=""):
+		"""
+		The function to send an API call to the server given in the "url" 
+		parameter using the parameters found in params. If url is empty, 
+		DumpGenerator.urltoapi is used instead.
+
+		Note: This function will assume action=query, other functions provides 
+		the other query forms, but not this one.
+
+		Input:
+		 - params: Parameters to API call as an array (excluding action=query and format=json)
+
+		Returns
+		 - Result of API call in JSON format.
+		"""
+		if ( url == "" ):
+			url = DumpGenerator.urltoapi
+		else:
+			url = url
+		queryurl = "%s?action=query&format=json" % ( url )
+		headers = { "User-Agent": DumpGenerator.UserAgent }
+		# Convert the array to a proper URL
+		paras = urllib.urlencode( params )
+		# POST the parameters to the server
+		request = urllib2.Request( queryurl, paras, headers )
+		try:
+			result = urllib2.urlopen( request )
+		except:
+			try:
+				# Add a little delay between requests if server is slow
+				sleeptime = DumpGenerator.delay + 10
+				Output.warn( "Failed to get a response from the server, retrying in %d seconds..." % (sleeptime) )
+				time.sleep( sleeptime )
+				result = urllib2.urlopen( request )
+			except:
+				Output.error( "An error occurred when trying to get a response from the server. Please resume the dump with --resume." )
+				sys.exit(2)
+		output = result.read()
+		result.close()
+		return output
+
+class RequestIndex:
+	def __init__(self):
+		"""
+		The constructor function.
+		"""
+
+	def query(self, params, url=""):
+		"""
+		The function to send an request to the server given in the "url" 
+		parameter using the parameters found in params. If url is empty, 
+		DumpGenerator.urltoindex is used instead.
+
+		Input:
+		 - params: Parameters to the request to send, appended to url as 
+		   a GET request.
+
+		Returns
+		 - Result of GET request.
+		"""
+		if ( url == "" ):
+			url = DumpGenerator.urltoindex
+		else:
+			url = url
+		headers = { "User-Agent": DumpGenerator.UserAgent }
+		paras = urllib.urlencode( params )
+		# index.php does not support POST request, formulating a correct GET URL here
+		queryurl = "%s?%s" % ( url, paras )
+		request = urllib2.Request( queryurl, headers=headers )
+		# TODO: Make urlopen follow redirects
+		try:
+			result = urllib2.urlopen( request )
+		except:
+			try:
+				# Add a little delay between requests if server is slow
+				sleeptime = DumpGenerator.delay + 10
+				Output.warn( "Failed to get a response from the server, retrying in %d seconds..." % (sleeptime) )
+				time.sleep( sleeptime )
+				result = urllib2.urlopen( request )
+			except:
+				Output.error( "An error occurred when trying to get a response from the server. Please resume the dump with --resume." )
+				sys.exit(2)
+		output = result.read()
+		result.close()
+		return output
+
+	def removeIP(self, content):
+		"""
+		Remove the user's IP address while fetching HTML pages.
+		"""
+		# Remove IPv4 addresses
+		content = re.sub( r"\d+\.\d+\.\d+\.\d+", "0.0.0.0", content )
+		# Remove IPv6 addresses
+		content = re.sub( r"(?i)[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}", "0:0:0:0:0:0:0:0", content )
+		return content
+
+class Updater:
+	"""
+	The class to auto-update the user's script to the latest version of DumpGenerator.
+	"""
+	# TODO: Get the script to check only occasionally, this is a performance concern
+	def __init__(self):
+		"""
+		The constructor function.
+		"""
+		self.controlUrl = "http://wikiteam.googlecode.com/svn/trunk/revnum.json"
+		self.controlUrl2 = "https://raw.github.com/dumps/DumpGenerator/master/revnum.json"
+		self.result = {}
+
+	def checkRevision(self):
+		"""
+		Check the current revision and ensure that it is up-to-date.
+		"""
+		jsonresult = self.getRevisionJson()
+		if ( jsonresult == False ):
+			pass
+		else:
+			result = json.loads( jsonresult )
+			self.result = result
+			if ( result[ "latest" ] == DumpGenerator.Version ):
+				if ( result[ "releases" ][ DumpGenerator.Version ][ "revision" ] == DumpGenerator.revision ):
+					pass
+				else:
+					self.update()
+			else:
+				self.update()
+
+	def getRevisionJson(self):
+		"""
+		Download the controlling JSON file.
+		"""
+		headers = {'User-Agent': DumpGenerator.UserAgent}
+		skip = False
+		# TODO: Handle 404 errors
+		try:
+			revjson = urllib2.urlopen( urllib2.Request( self.controlUrl, headers=headers ) )
+		except:
+			try:
+				revjson = urllib2.urlopen( urllib2.Request( self.controlUrl2, headers=headers ) )
+			except:
+				Output.warn( "Unable to check if a new version of dumpgenerator.py is available, continuing..." )
+				skip = True
+		if ( skip == False ):
+			output = revjson.read()
+			revjson.close()
+			return output
+		else:
+			return False
+
+	def update(self):
+		"""
+		Update DumpGenerator.py to the current latest version
+		"""
+		currentfile = sys.argv[0]
+		latestver = self.result[ "latest" ]
+		latestrev = self.result[ "releases" ][ latestver ][ "revision" ]
+		latesturl = self.result[ "releases" ][ latestver ][ "downloadurl" ]
+		latesturl2 = self.result[ "releases" ][ latestver ][ "downloadurl2" ]
+		updated = True
+		# TODO: Handle 404 errors
+		try:
+			urllib.urlretrieve( latesturl, currentfile )
+		except:
+			try:
+				urllib.urlretrieve( latesturl2, currentfile )
+			except:
+				updated = False
+		if ( updated == False ):
+			Output.warn( "Unable to update DumpGenerator, skipping update for now..." )
+		else:
+			Output.message( "DumpGenerator was updated to %s (revision %s)! Changes will take effect on next run." % ( latestver, latestrev ) )
 
 if __name__ == "__main__":
-    main()
+	# Class registry, for use throughout the whole script
+	RequestAPI = RequestAPI()
+	RequestIndex = RequestIndex()
+	DumpGenerator = DumpGenerator()
+	DumpImages = DumpImages()
+	DumpLogs = DumpLogs()
+	DumpXML = DumpXML()
+	Output = Output()
+	Updater = Updater()
+
+	# Start everything up
+	DumpGenerator.run()