Version 20231017.01. Use --secure-protocol=auto. Use new minimum Wget version checker.

master
arkiver 8 months ago
parent a0e35bb72d
commit b1bf682030

@ -4,6 +4,7 @@ from distutils.version import StrictVersion
import hashlib
import os.path
import random
import re
from seesaw.config import realize, NumberConfigValue
from seesaw.externalprocess import ExternalProcess
from seesaw.item import ItemInterpolation, ItemValue
@ -39,11 +40,23 @@ if StrictVersion(seesaw.__version__) < StrictVersion('0.8.5'):
# 1. does not crash with --version, and
# 2. prints the required version string
class HigherVersion:
def __init__(self, expression, min_version):
self._expression = re.compile(expression)
self._min_version = min_version
def search(self, text):
for result in self._expression.findall(text):
if result >= self._min_version:
print('Found version {}.'.format(result))
return True
WGET_AT = find_executable(
'Wget+AT',
[
HigherVersion(
r'(GNU Wget 1.[0-9]{2}.[0-9]{1}-at.[0-9]{8}\.[0-9]{2})[^0-9a-zA-Z\.-_]',
'GNU Wget 1.21.3-at.20230623.01'
],
),
[
'./wget-at',
'/home/warrior/data/wget-at'
@ -59,7 +72,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20230910.05'
VERSION = '20231017.01'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 100
@ -268,7 +281,7 @@ class WgetArgs(object):
'--warc-compression-use-zstd',
'--warc-zstd-dict-no-include',
'--header', 'Accept-Language: en-US;q=0.9, en;q=0.8',
'--secure-protocol', 'TLSv1_3'
'--secure-protocol', 'auto'
]
dict_data = ZstdDict.get_dict()
with open(os.path.join(item['item_dir'], 'zstdict'), 'wb') as f:

Loading…
Cancel
Save