Version 20230607.05. Better checking for video. Abort item if no post is found (during blackout for example).

pull/17/head
arkiver 12 months ago
parent 7bb5c39419
commit fe17191306

@ -59,7 +59,7 @@ if not WGET_AT:
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20230607.04'
VERSION = '20230607.05'
TRACKER_ID = 'reddit'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 20

@ -560,11 +560,6 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
end
selftext = child["data"]["selftext"]
checknewurl(child["data"]["permalink"])
-- temp
if child["data"]["is_video"] then
error()
end
--
if child["data"]["is_video"] and not child["data"]["secure_media"] then
io.stdout:write("Video still being processed.\n")
io.stdout:flush()
@ -669,7 +664,18 @@ wget.callbacks.write_to_warc = function(url, http_stat)
end
if string.match(url["url"], "/api/info%.json") then
local html = read_file(http_stat["local_file"])
if string.match(html, "v%.redd%.it")
local json = load_json_file(html)
local child_count = 0
local has_video = false
for _, child in pairs(json["data"]["children"]) do
child_count = child_count + 1
if child["data"]["is_video"] then
has_video = true
end
end
if child_count ~= 1
or has_video
or string.match(html, "v%.redd%.it")
or string.match(html, "reddit_video") then
abort_item()
return false

Loading…
Cancel
Save