From 6668999658a97139f78081f745963f3182c277f8 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 26 Nov 2022 21:32:03 -0800 Subject: [PATCH 1/2] Update User-Agent to latest Firefox --- dumpgenerator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index fd09b4c..53f568a 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -509,7 +509,8 @@ def getUserAgent(): # firefox #'Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0', #'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0', - 'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0' + #'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0' + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0' ] return useragents[0] From 97146c6f01608d18799f5f44700ca262e9c8ea25 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 26 Nov 2022 21:28:03 -0800 Subject: [PATCH 2/2] Use the same requests session for getting the wiki engine and checking API/index --- dumpgenerator.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index 53f568a..1e0dabb 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -1762,7 +1762,7 @@ def getParameters(params=[]): # Execute meta info params if args.wiki: if args.get_wiki_engine: - print getWikiEngine(url=args.wiki) + print getWikiEngine(session, url=args.wiki) sys.exit() # Create session @@ -1802,8 +1802,8 @@ def getParameters(params=[]): index = args.index and args.index or '' if api == '' or index == '': if args.wiki: - if getWikiEngine(args.wiki) == 'MediaWiki': - api2, index2 = mwGetAPIAndIndex(args.wiki) + if getWikiEngine(session, args.wiki) == 'MediaWiki': + api2, index2 = mwGetAPIAndIndex(session, args.wiki) if not api: api = api2 if not index: @@ -2379,11 +2379,9 @@ def avoidWikimediaProjects(config={}, other={}): sys.exit() -def getWikiEngine(url=''): +def getWikiEngine(session, url): """ Returns the wiki engine of a URL, if known """ - session = requests.Session() - session.headers.update({'User-Agent': getUserAgent()}) r = session.post(url=url, timeout=30) if r.status_code == 405 or r.text == '': r = session.get(url=url, timeout=120) @@ -2462,13 +2460,11 @@ def getWikiEngine(url=''): return wikiengine -def mwGetAPIAndIndex(url=''): +def mwGetAPIAndIndex(session, url): """ Returns the MediaWiki API and Index.php """ api = '' index = '' - session = requests.Session() - session.headers.update({'User-Agent': getUserAgent()}) r = session.post(url=url, timeout=120) result = r.text