NewsDownloader: add date time to filename (#3863)

* Download all files from ftp server(Not only epub). Show failed download number

* add date and time to filename

* optimalization - decrese max redirect number

* remove new external lib from luacheck

* add new lib licence info
pull/3867/head
mwoz123 6 years ago committed by Frans de Jonge
parent e66b4882dc
commit a6b824ed90

@ -98,8 +98,10 @@ read_globals = {
exclude_files = {
"frontend/luxl.lua",
"plugins/newsdownloader.koplugin/lib/handler.lua",
"plugins/newsdownloader.koplugin/lib/LICENSE",
"plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML",
"plugins/newsdownloader.koplugin/lib/xml.lua",
"plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser",
"plugins/newsdownloader.koplugin/lib/dateparser.lua",
}
-- don't balk on busted stuff in spec

@ -6,7 +6,7 @@ local socket = require('socket')
local socket_url = require("socket.url")
local InternalDownloadBackend = {}
local max_redirects = 10; --prevent infinite redirects
local max_redirects = 5; --prevent infinite redirects
function InternalDownloadBackend:getResponseAsString(url, redirectCount)
if not redirectCount then

@ -0,0 +1,28 @@
feedparser is available under the (new) BSD license. it uses a
portion of LuaSocket code (copyright 2007 Diego Nehab)
(http://www.keplerproject.org/luaexpat/), which is under the MIT license.
Copyright (c) 2009 Leo Ponomarev.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the <organization> nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY <copyright holder> ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,192 @@
local difftime, time, date = os.difftime, os.time, os.date
local format = string.format
local tremove, tinsert = table.remove, table.insert
local pcall, pairs, ipairs, tostring, tonumber, type, setmetatable = pcall, pairs, ipairs, tostring, tonumber, type, setmetatable
local dateparser={}
--we shall use the host OS's time conversion facilities. Dealing with all those leap seconds by hand can be such a bore.
local unix_timestamp
do
local now = time()
local local_UTC_offset_sec = difftime(time(date("!*t", now)), time(date("*t", now)))
unix_timestamp = function(t, offset_sec)
local success, improper_time = pcall(time, t)
if not success or not improper_time then return nil, "invalid date. os.time says: " .. (improper_time or "nothing") end
return improper_time - local_UTC_offset_sec - offset_sec
end
end
local formats = {} -- format names
local format_func = setmetatable({}, {__mode='v'}) --format functions
---register a date format parsing function
function dateparser.register_format(format_name, format_function)
if type(format_name)~="string" or type(format_function)~='function' then return nil, "improper arguments, can't register format handler" end
local found
for i, f in ipairs(format_func) do --for ordering
if f==format_function then
found=true
break
end
end
if not found then
tinsert(format_func, format_function)
end
formats[format_name] = format_function
return true
end
---register a date format parsing function
function dateparser.unregister_format(format_name)
if type(format_name)~="string" then return nil, "format name must be a string" end
formats[format_name]=nil
end
---return the function responsible for handling format_name date strings
function dateparser.get_format_function(format_name)
return formats[format_name] or nil, ("format %s not registered"):format(format_name)
end
---try to parse date string
--@param str date string
--@param date_format optional date format name, if known
--@return unix timestamp if str can be parsed; nil, error otherwise.
function dateparser.parse(str, date_format)
local success, res, err
if date_format then
if not formats[date_format] then return 'unknown date format: ' .. tostring(date_format) end
success, res = pcall(formats[date_format], str)
else
for i, func in ipairs(format_func) do
success, res = pcall(func, str)
if success and res then return res end
end
end
return success and res
end
dateparser.register_format('W3CDTF', function(rest)
local year, day_of_year, month, day, week
local hour, minute, second, second_fraction, offset_hours
local alt_rest
year, rest = rest:match("^(%d%d%d%d)%-?(.*)$")
day_of_year, alt_rest = rest:match("^(%d%d%d)%-?(.*)$")
if day_of_year then rest=alt_rest end
month, rest = rest:match("^(%d%d)%-?(.*)$")
day, rest = rest:match("^(%d%d)(.*)$")
if #rest>0 then
rest = rest:match("^T(.*)$")
hour, rest = rest:match("^([0-2][0-9]):?(.*)$")
minute, rest = rest:match("^([0-6][0-9]):?(.*)$")
second, rest = rest:match("^([0-6][0-9])(.*)$")
second_fraction, alt_rest = rest:match("^%.(%d+)(.*)$")
if second_fraction then
rest=alt_rest
end
if rest=="Z" then
rest=""
offset_hours=0
else
local sign, offset_h, offset_m
sign, offset_h, rest = rest:match("^([+-])(%d%d)%:?(.*)$")
local offset_m, alt_rest = rest:match("^(%d%d)(.*)$")
if offset_m then rest=alt_rest end
offset_hours = tonumber(sign .. offset_h) + (tonumber(offset_m) or 0)/60
end
if #rest>0 then return nil end
end
year = tonumber(year)
local d = {
year = year and (year > 100 and year or (year < 50 and (year + 2000) or (year + 1900))),
month = tonumber(month) or 1,
day = tonumber(day) or 1,
hour = tonumber(hour) or 0,
min = tonumber(minute) or 0,
sec = tonumber(second) or 0,
isdst = false
}
local t = unix_timestamp(d, (offset_hours or 0) * 3600)
if second_fraction then
return t + tonumber("0."..second_fraction)
else
return t
end
end)
do
local tz_table = { --taken from http://www.timeanddate.com/library/abbreviations/timezones/
A = 1, B = 2, C = 3, D = 4, E=5, F = 6, G = 7, H = 8, I = 9,
K = 10, L = 11, M = 12, N = -1, O = -2, P = -3, Q = -4, R = -5,
S = -6, T = -7, U = -8, V = -9, W = -10, X = -11, Y = -12,
Z = 0,
EST = -5, EDT = -4, CST = -6, CDT = -5,
MST = -7, MDT = -6, PST = -8, PDT = -7,
GMT = 0, UT = 0, UTC = 0
}
local month_val = {Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12}
dateparser.register_format('RFC2822', function(rest)
local year, month, day, day_of_year, week_of_year, weekday
local hour, minute, second, second_fraction, offset_hours
local alt_rest
weekday, alt_rest = rest:match("^(%w%w%w),%s+(.*)$")
if weekday then rest=alt_rest end
day, rest=rest:match("^(%d%d?)%s+(.*)$")
month, rest=rest:match("^(%w%w%w)%s+(.*)$")
month = month_val[month]
year, rest = rest:match("^(%d%d%d?%d?)%s+(.*)$")
hour, rest = rest:match("^(%d%d?):(.*)$")
minute, rest = rest:match("^(%d%d?)(.*)$")
second, alt_rest = rest:match("^:(%d%d)(.*)$")
if second then rest = alt_rest end
local tz, offset_sign, offset_h, offset_m
tz, alt_rest = rest:match("^%s+(%u+)(.*)$")
if tz then
rest = alt_rest
offset_hours = tz_table[tz]
else
offset_sign, offset_h, offset_m, rest = rest:match("^%s+([+-])(%d%d)(%d%d)%s*(.*)$")
offset_hours = tonumber(offset_sign .. offset_h) + (tonumber(offset_m) or 0)/60
end
if #rest>0 or not (year and day and month and hour and minute) then
return nil
end
year = tonumber(year)
local d = {
year = year and ((year > 100) and year or (year < 50 and (year + 2000) or (year + 1900))),
month = month,
day = tonumber(day),
hour= tonumber(hour) or 0,
min = tonumber(minute) or 0,
sec = tonumber(second) or 0,
isdst = false
}
return unix_timestamp(d, offset_hours * 3600)
end)
end
dateparser.register_format('RFC822', formats.RFC2822) --2822 supercedes 822, but is not a strict superset. For our intents and purposes though, it's perfectly good enough
dateparser.register_format('RFC3339', formats.W3CDTF) --RFC3339 is a subset of W3CDTF
return dateparser

@ -8,6 +8,7 @@ local LuaSettings = require("frontend/luasettings")
local UIManager = require("ui/uimanager")
local NetworkMgr = require("ui/network/manager")
local WidgetContainer = require("ui/widget/container/widgetcontainer")
local dateparser = require("lib.dateparser")
local ffi = require("ffi")
local logger = require("logger")
local util = require("util")
@ -245,6 +246,7 @@ function NewsDownloader:deserializeXMLString(xml_str)
-- uses LuaXML https://github.com/manoelcampos/LuaXML
-- The MIT License (MIT)
-- Copyright (c) 2016 Manoel Campos da Silva Filho
-- see: koreader/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML
local treehdl = require("lib/handler")
local libxml = require("lib/xml")
@ -297,10 +299,30 @@ function NewsDownloader:processRSS(feeds, limit, download_full_article)
end
end
local function parseDate(dateTime)
-- uses lua-feedparser https://github.com/slact/lua-feedparser
-- feedparser is available under the (new) BSD license.
-- see: koreader/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser
local date = dateparser.parse(dateTime)
return os.date("%y-%m-%d_%H-%M_", date)
end
local function getTitleWithDate(feed)
local title = util.replaceInvalidChars(getFeedTitle(feed.title))
if feed.updated then
title = parseDate(feed.updated) .. title
elseif feed.pubDate then
title = parseDate(feed.pubDate) .. title
elseif feed.published then
title = parseDate(feed.published) .. title
end
return title
end
function NewsDownloader:downloadFeed(feed, feed_output_dir)
local link = getFeedLink(feed.link)
local news_dl_path = ("%s%s%s"):format(feed_output_dir,
util.replaceInvalidChars(getFeedTitle(feed.title)),
getTitleWithDate(feed),
file_extension)
logger.dbg("NewsDownloader: News file will be stored to :", news_dl_path)
@ -309,7 +331,7 @@ end
function NewsDownloader:createFromDescription(feed, context, feed_output_dir)
local news_file_path = ("%s%s%s"):format(feed_output_dir,
util.replaceInvalidChars(getFeedTitle(feed.title)),
getTitleWithDate(feed),
file_extension)
logger.dbg("NewsDownloader: News file will be created :", news_file_path)
local file = io.open(news_file_path, "w")

Loading…
Cancel
Save