From daab40aa6e10194d30b3257fd6f7efb943a43625 Mon Sep 17 00:00:00 2001 From: arkiver Date: Fri, 16 Feb 2024 12:19:43 +0100 Subject: [PATCH] Version 20240216.01. Use fixed minimum Wget version 1.21.3-at.20231213.03. Use TLSv1.2. Fix check on svc comment content check. --- pipeline.py | 6 +++--- reddit.lua | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pipeline.py b/pipeline.py index daf220c..764deb3 100644 --- a/pipeline.py +++ b/pipeline.py @@ -55,7 +55,7 @@ WGET_AT = find_executable( 'Wget+AT', HigherVersion( r'(GNU Wget 1\.[0-9]{2}\.[0-9]{1}-at\.[0-9]{8}\.[0-9]{2})[^0-9a-zA-Z\.-_]', - 'GNU Wget 1.21.3-at.20230623.01' + 'GNU Wget 1.21.3-at.20231213.03' ), [ './wget-at', @@ -72,7 +72,7 @@ if not WGET_AT: # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. -VERSION = '20231201.01' +VERSION = '20240216.01' TRACKER_ID = 'reddit' TRACKER_HOST = 'legacy-api.arpa.li' MULTI_ITEM_SIZE = 100 @@ -281,7 +281,7 @@ class WgetArgs(object): '--warc-compression-use-zstd', '--warc-zstd-dict-no-include', '--header', 'Accept-Language: en-US;q=0.9, en;q=0.8', - '--secure-protocol', 'PFS', + '--secure-protocol', 'TLSv1_2', #'--ciphers', '+ECDHE-RSA:+AES-256-CBC:+SHA384' ] dict_data = ZstdDict.get_dict() diff --git a/reddit.lua b/reddit.lua index ef257a5..45f4301 100644 --- a/reddit.lua +++ b/reddit.lua @@ -739,6 +739,8 @@ wget.callbacks.write_to_warc = function(url, http_stat) if ( string.match(url["url"], "^https?://[^/]+/svc/") and string.match(html, 'level%s*=%s*"') + and not string.match(html, '