diff --git a/.luacheckrc b/.luacheckrc index 946d1a9bf..0da515bcb 100644 --- a/.luacheckrc +++ b/.luacheckrc @@ -98,8 +98,10 @@ read_globals = { exclude_files = { "frontend/luxl.lua", "plugins/newsdownloader.koplugin/lib/handler.lua", - "plugins/newsdownloader.koplugin/lib/LICENSE", + "plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML", "plugins/newsdownloader.koplugin/lib/xml.lua", + "plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser", + "plugins/newsdownloader.koplugin/lib/dateparser.lua", } -- don't balk on busted stuff in spec diff --git a/plugins/newsdownloader.koplugin/internaldownloadbackend.lua b/plugins/newsdownloader.koplugin/internaldownloadbackend.lua index 0ff5003c1..fa19cf9ad 100644 --- a/plugins/newsdownloader.koplugin/internaldownloadbackend.lua +++ b/plugins/newsdownloader.koplugin/internaldownloadbackend.lua @@ -6,7 +6,7 @@ local socket = require('socket') local socket_url = require("socket.url") local InternalDownloadBackend = {} -local max_redirects = 10; --prevent infinite redirects +local max_redirects = 5; --prevent infinite redirects function InternalDownloadBackend:getResponseAsString(url, redirectCount) if not redirectCount then diff --git a/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser b/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser new file mode 100644 index 000000000..829d87ca6 --- /dev/null +++ b/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser @@ -0,0 +1,28 @@ +feedparser is available under the (new) BSD license. it uses a +portion of LuaSocket code (copyright 2007 Diego Nehab) +(http://www.keplerproject.org/luaexpat/), which is under the MIT license. + +Copyright (c) 2009 Leo Ponomarev. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY ''AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/plugins/newsdownloader.koplugin/lib/LICENSE b/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML similarity index 100% rename from plugins/newsdownloader.koplugin/lib/LICENSE rename to plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML diff --git a/plugins/newsdownloader.koplugin/lib/dateparser.lua b/plugins/newsdownloader.koplugin/lib/dateparser.lua new file mode 100644 index 000000000..abd6f0957 --- /dev/null +++ b/plugins/newsdownloader.koplugin/lib/dateparser.lua @@ -0,0 +1,192 @@ +local difftime, time, date = os.difftime, os.time, os.date +local format = string.format +local tremove, tinsert = table.remove, table.insert +local pcall, pairs, ipairs, tostring, tonumber, type, setmetatable = pcall, pairs, ipairs, tostring, tonumber, type, setmetatable + +local dateparser={} + +--we shall use the host OS's time conversion facilities. Dealing with all those leap seconds by hand can be such a bore. +local unix_timestamp +do + local now = time() + local local_UTC_offset_sec = difftime(time(date("!*t", now)), time(date("*t", now))) + unix_timestamp = function(t, offset_sec) + local success, improper_time = pcall(time, t) + if not success or not improper_time then return nil, "invalid date. os.time says: " .. (improper_time or "nothing") end + return improper_time - local_UTC_offset_sec - offset_sec + end +end + +local formats = {} -- format names +local format_func = setmetatable({}, {__mode='v'}) --format functions + +---register a date format parsing function +function dateparser.register_format(format_name, format_function) + if type(format_name)~="string" or type(format_function)~='function' then return nil, "improper arguments, can't register format handler" end + + local found + for i, f in ipairs(format_func) do --for ordering + if f==format_function then + found=true + break + end + end + if not found then + tinsert(format_func, format_function) + end + formats[format_name] = format_function + return true +end + +---register a date format parsing function +function dateparser.unregister_format(format_name) + if type(format_name)~="string" then return nil, "format name must be a string" end + formats[format_name]=nil +end + +---return the function responsible for handling format_name date strings +function dateparser.get_format_function(format_name) + return formats[format_name] or nil, ("format %s not registered"):format(format_name) +end + +---try to parse date string +--@param str date string +--@param date_format optional date format name, if known +--@return unix timestamp if str can be parsed; nil, error otherwise. +function dateparser.parse(str, date_format) + local success, res, err + if date_format then + if not formats[date_format] then return 'unknown date format: ' .. tostring(date_format) end + success, res = pcall(formats[date_format], str) + else + for i, func in ipairs(format_func) do + success, res = pcall(func, str) + if success and res then return res end + end + end + return success and res +end + +dateparser.register_format('W3CDTF', function(rest) + + local year, day_of_year, month, day, week + local hour, minute, second, second_fraction, offset_hours + + local alt_rest + + year, rest = rest:match("^(%d%d%d%d)%-?(.*)$") + + day_of_year, alt_rest = rest:match("^(%d%d%d)%-?(.*)$") + + if day_of_year then rest=alt_rest end + + month, rest = rest:match("^(%d%d)%-?(.*)$") + + day, rest = rest:match("^(%d%d)(.*)$") + if #rest>0 then + rest = rest:match("^T(.*)$") + hour, rest = rest:match("^([0-2][0-9]):?(.*)$") + minute, rest = rest:match("^([0-6][0-9]):?(.*)$") + second, rest = rest:match("^([0-6][0-9])(.*)$") + second_fraction, alt_rest = rest:match("^%.(%d+)(.*)$") + if second_fraction then + rest=alt_rest + end + if rest=="Z" then + rest="" + offset_hours=0 + else + local sign, offset_h, offset_m + sign, offset_h, rest = rest:match("^([+-])(%d%d)%:?(.*)$") + local offset_m, alt_rest = rest:match("^(%d%d)(.*)$") + if offset_m then rest=alt_rest end + offset_hours = tonumber(sign .. offset_h) + (tonumber(offset_m) or 0)/60 + end + if #rest>0 then return nil end + end + + year = tonumber(year) + local d = { + year = year and (year > 100 and year or (year < 50 and (year + 2000) or (year + 1900))), + month = tonumber(month) or 1, + day = tonumber(day) or 1, + hour = tonumber(hour) or 0, + min = tonumber(minute) or 0, + sec = tonumber(second) or 0, + isdst = false + } + local t = unix_timestamp(d, (offset_hours or 0) * 3600) + if second_fraction then + return t + tonumber("0."..second_fraction) + else + return t + end +end) + + +do + local tz_table = { --taken from http://www.timeanddate.com/library/abbreviations/timezones/ + A = 1, B = 2, C = 3, D = 4, E=5, F = 6, G = 7, H = 8, I = 9, + K = 10, L = 11, M = 12, N = -1, O = -2, P = -3, Q = -4, R = -5, + S = -6, T = -7, U = -8, V = -9, W = -10, X = -11, Y = -12, + Z = 0, + + EST = -5, EDT = -4, CST = -6, CDT = -5, + MST = -7, MDT = -6, PST = -8, PDT = -7, + + GMT = 0, UT = 0, UTC = 0 + } + + local month_val = {Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12} + + dateparser.register_format('RFC2822', function(rest) + + local year, month, day, day_of_year, week_of_year, weekday + local hour, minute, second, second_fraction, offset_hours + + local alt_rest + + weekday, alt_rest = rest:match("^(%w%w%w),%s+(.*)$") + if weekday then rest=alt_rest end + day, rest=rest:match("^(%d%d?)%s+(.*)$") + month, rest=rest:match("^(%w%w%w)%s+(.*)$") + month = month_val[month] + year, rest = rest:match("^(%d%d%d?%d?)%s+(.*)$") + hour, rest = rest:match("^(%d%d?):(.*)$") + minute, rest = rest:match("^(%d%d?)(.*)$") + second, alt_rest = rest:match("^:(%d%d)(.*)$") + if second then rest = alt_rest end + local tz, offset_sign, offset_h, offset_m + tz, alt_rest = rest:match("^%s+(%u+)(.*)$") + if tz then + rest = alt_rest + offset_hours = tz_table[tz] + else + offset_sign, offset_h, offset_m, rest = rest:match("^%s+([+-])(%d%d)(%d%d)%s*(.*)$") + offset_hours = tonumber(offset_sign .. offset_h) + (tonumber(offset_m) or 0)/60 + end + + if #rest>0 or not (year and day and month and hour and minute) then + return nil + end + + year = tonumber(year) + local d = { + year = year and ((year > 100) and year or (year < 50 and (year + 2000) or (year + 1900))), + month = month, + day = tonumber(day), + + hour= tonumber(hour) or 0, + min = tonumber(minute) or 0, + sec = tonumber(second) or 0, + isdst = false + } + return unix_timestamp(d, offset_hours * 3600) + end) +end + +dateparser.register_format('RFC822', formats.RFC2822) --2822 supercedes 822, but is not a strict superset. For our intents and purposes though, it's perfectly good enough +dateparser.register_format('RFC3339', formats.W3CDTF) --RFC3339 is a subset of W3CDTF + + +return dateparser \ No newline at end of file diff --git a/plugins/newsdownloader.koplugin/main.lua b/plugins/newsdownloader.koplugin/main.lua index c7de0e87c..063f8961e 100644 --- a/plugins/newsdownloader.koplugin/main.lua +++ b/plugins/newsdownloader.koplugin/main.lua @@ -8,6 +8,7 @@ local LuaSettings = require("frontend/luasettings") local UIManager = require("ui/uimanager") local NetworkMgr = require("ui/network/manager") local WidgetContainer = require("ui/widget/container/widgetcontainer") +local dateparser = require("lib.dateparser") local ffi = require("ffi") local logger = require("logger") local util = require("util") @@ -245,6 +246,7 @@ function NewsDownloader:deserializeXMLString(xml_str) -- uses LuaXML https://github.com/manoelcampos/LuaXML -- The MIT License (MIT) -- Copyright (c) 2016 Manoel Campos da Silva Filho + -- see: koreader/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML local treehdl = require("lib/handler") local libxml = require("lib/xml") @@ -297,10 +299,30 @@ function NewsDownloader:processRSS(feeds, limit, download_full_article) end end +local function parseDate(dateTime) + -- uses lua-feedparser https://github.com/slact/lua-feedparser + -- feedparser is available under the (new) BSD license. + -- see: koreader/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser + local date = dateparser.parse(dateTime) + return os.date("%y-%m-%d_%H-%M_", date) +end + +local function getTitleWithDate(feed) + local title = util.replaceInvalidChars(getFeedTitle(feed.title)) + if feed.updated then + title = parseDate(feed.updated) .. title + elseif feed.pubDate then + title = parseDate(feed.pubDate) .. title + elseif feed.published then + title = parseDate(feed.published) .. title + end + return title +end + function NewsDownloader:downloadFeed(feed, feed_output_dir) local link = getFeedLink(feed.link) local news_dl_path = ("%s%s%s"):format(feed_output_dir, - util.replaceInvalidChars(getFeedTitle(feed.title)), + getTitleWithDate(feed), file_extension) logger.dbg("NewsDownloader: News file will be stored to :", news_dl_path) @@ -309,7 +331,7 @@ end function NewsDownloader:createFromDescription(feed, context, feed_output_dir) local news_file_path = ("%s%s%s"):format(feed_output_dir, - util.replaceInvalidChars(getFeedTitle(feed.title)), + getTitleWithDate(feed), file_extension) logger.dbg("NewsDownloader: News file will be created :", news_file_path) local file = io.open(news_file_path, "w")