diff --git a/.luacheckrc b/.luacheckrc index 92febd149..6bc2ab784 100644 --- a/.luacheckrc +++ b/.luacheckrc @@ -97,6 +97,9 @@ read_globals = { exclude_files = { "frontend/luxl.lua", + "plugins/newsdownloader.koplugin/lib/handler.lua", + "plugins/newsdownloader.koplugin/lib/LICENSE", + "plugins/newsdownloader.koplugin/lib/xml.lua", } -- TODO: clean up and enforce max line width (631) diff --git a/frontend/ui/elements/filemanager_menu_order.lua b/frontend/ui/elements/filemanager_menu_order.lua index 04c5bdee3..b9c463351 100644 --- a/frontend/ui/elements/filemanager_menu_order.lua +++ b/frontend/ui/elements/filemanager_menu_order.lua @@ -36,6 +36,7 @@ local order = { "storage_stat", "cloud_storage", "read_timer", + "rss_news_downloader", "synchronize_time", "terminal", "----------------------------", diff --git a/frontend/ui/elements/reader_menu_order.lua b/frontend/ui/elements/reader_menu_order.lua index dc239304d..449f6c61d 100644 --- a/frontend/ui/elements/reader_menu_order.lua +++ b/frontend/ui/elements/reader_menu_order.lua @@ -59,6 +59,7 @@ local order = { "synchronize_time", "progress_sync", "zsync", + "rss_news_downloader", "terminal", }, search = { diff --git a/plugins/newsdownloader.koplugin/feed_config.lua b/plugins/newsdownloader.koplugin/feed_config.lua new file mode 100644 index 000000000..6764c58de --- /dev/null +++ b/plugins/newsdownloader.koplugin/feed_config.lua @@ -0,0 +1,9 @@ +return { + -- list your feeds here: + -- only supports http URL for now + -- Atom is currently not supported, only RSS + { "http://www.pcworld.com/index.rss", limit = 1 }, + { "http://www.economist.com/sections/science-technology/rss.xml", limit = 2}, + -- set limit to "0" means no download, "-1" no limit. + { "http://www.economist.com/sections/culture/rss.xml", limit = 0 }, +} \ No newline at end of file diff --git a/plugins/newsdownloader.koplugin/lib/LICENSE b/plugins/newsdownloader.koplugin/lib/LICENSE new file mode 100644 index 000000000..fe42580e8 --- /dev/null +++ b/plugins/newsdownloader.koplugin/lib/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Manoel Campos da Silva Filho + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/plugins/newsdownloader.koplugin/lib/handler.lua b/plugins/newsdownloader.koplugin/lib/handler.lua new file mode 100644 index 000000000..4596dd3f9 --- /dev/null +++ b/plugins/newsdownloader.koplugin/lib/handler.lua @@ -0,0 +1,346 @@ +--- +-- Overview: +-- ========= +-- Standard XML event handler(s) for XML parser module (xml.lua) +-- +-- Features: +-- ========= +-- printHandler - Generate XML event trace +-- domHandler - Generate DOM-like node tree +-- simpleTreeHandler - Generate 'simple' node tree +-- +-- API: +-- ==== +-- Must be called as handler function from xmlParser +-- and implement XML event callbacks (see xmlParser.lua +-- for callback API definition) +-- +-- printHandler: +-- ------------- +-- +-- printHandler prints event trace for debugging +-- +-- domHandler: +-- ----------- +-- +-- domHandler generates a DOM-like node tree structure with +-- a single ROOT node parent - each node is a table comprising +-- fields below. +-- +-- node = { _name = , +-- _type = ROOT|ELEMENT|TEXT|COMMENT|PI|DECL|DTD, +-- _attr = { Node attributes - see callback API }, +-- _parent = +-- _children = { List of child nodes - ROOT/NODE only } +-- } +-- +-- The dom structure is capable of representing any valid XML document +-- +-- simpleTreeHandler +-- ----------------- +-- +-- simpleTreeHandler is a simplified handler which attempts +-- to generate a more 'natural' table based structure which +-- supports many common XML formats. +-- +-- The XML tree structure is mapped directly into a recursive +-- table structure with node names as keys and child elements +-- as either a table of values or directly as a string value +-- for text. Where there is only a single child element this +-- is inserted as a named key - if there are multiple +-- elements these are inserted as a vector (in some cases it +-- may be preferable to always insert elements as a vector +-- which can be specified on a per element basis in the +-- options). Attributes are inserted as a child element with +-- a key of '_attr'. +-- +-- Only Tag/Text & CDATA elements are processed - all others +-- are ignored. +-- +-- This format has some limitations - primarily +-- +-- * Mixed-Content behaves unpredictably - the relationship +-- between text elements and embedded tags is lost and +-- multiple levels of mixed content does not work +-- * If a leaf element has both a text element and attributes +-- then the text must be accessed through a vector (to +-- provide a container for the attribute) +-- +-- In general however this format is relatively useful. +-- +-- It is much easier to understand by running some test +-- data through 'textxml.lua -simpletree' than to read this) +-- +-- Options +-- ======= +-- simpleTreeHandler.options.noReduce = { = bool,.. } +-- +-- - Nodes not to reduce children vector even if only +-- one child +-- +-- domHandler.options.(comment|pi|dtd|decl)Node = bool +-- +-- - Include/exclude given node types +-- +-- Usage +-- ===== +-- Pased as delegate in xmlParser constructor and called +-- as callback by xmlParser:parse(xml) method. +-- +-- See textxml.lua for examples +-- License: +-- ======== +-- +-- This code is freely distributable under the terms of the Lua license +-- (http://www.lua.org/copyright.html) +-- +-- History +-- ======= +-- $Id: handler.lua,v 1.1.1.1 2001/11/28 06:11:33 paulc Exp $ +-- +-- $Log: handler.lua,v $ +-- Revision 1.1.1.1 2001/11/28 06:11:33 paulc +-- Initial Import +--@author Paul Chakravarti (paulc@passtheaardvark.com)

+ + +---Handler to generate a string prepresentation of a table +--Convenience function for printHandler (Does not support recursive tables). +--@param t Table to be parsed +--@returns Returns a string representation of table +local function showTable(t) + local sep = '' + local res = '' + if type(t) ~= 'table' then + return t + end + for k,v in pairs(t) do + if type(v) == 'table' then + v = showTable(v) + end + res = res .. sep .. string.format("%s=%s",k,v) + sep = ',' + end + res = '{'..res..'}' + return res +end + +---Handler to generate a simple event trace +local printHandler = function() + local obj = {} + obj.starttag = function(self,t,a,s,e) + io.write("Start : "..t.."\n") + if a then + for k,v in pairs(a) do + io.write(string.format(" + %s='%s'\n",k,v)) + end + end + end + obj.endtag = function(self,t,s,e) + io.write("End : "..t.."\n") + end + obj.text = function(self,t,s,e) + io.write("Text : "..t.."\n") + end + obj.cdata = function(self,t,s,e) + io.write("CDATA : "..t.."\n") + end + obj.comment = function(self,t,s,e) + io.write("Comment : "..t.."\n") + end + obj.dtd = function(self,t,a,s,e) + io.write("DTD : "..t.."\n") + if a then + for k,v in pairs(a) do + io.write(string.format(" + %s='%s'\n",k,v)) + end + end + end + obj.pi = function(self,t,a,s,e) + io.write("PI : "..t.."\n") + if a then + for k,v in pairs(a) do + io. write(string.format(" + %s='%s'\n",k,v)) + end + end + end + obj.decl = function(self,t,a,s,e) + io.write("XML Decl : "..t.."\n") + if a then + for k,v in pairs(a) do + io.write(string.format(" + %s='%s'\n",k,v)) + end + end + end + return obj +end + + +--Obtém a primeira chave de uma tabela +--@param Tabela de onde deverá ser obtido o primeiro elemento +--@return Retorna a primeira chave da tabela +local function getFirstKey(tb) + if type(tb) == "table" then + --O uso da função next não funciona para pegar o primeiro elemento. Trava aqui + --k, v = next(tb) + --return k + for k, v in pairs(tb) do + return k + end + return nil + else + return tb + end +end + +---Handler to generate a lua table from a XML content string +local function simpleTreeHandler() + local obj = {} + + obj.root = {} + obj.stack = {obj.root;n=1} + obj.options = {noreduce = {}} + + obj.reduce = function(self,node,key,parent) + -- Recursively remove redundant vectors for nodes + -- with single child elements + for k,v in pairs(node) do + if type(v) == 'table' then + self:reduce(v,k,node) + end + end + if #node == 1 and not self.options.noreduce[key] and + node._attr == nil then + parent[key] = node[1] + else + node.n = nil + end + end + + --@param t Table that represents a XML tag + --@param a Attributes table (_attr) + obj.starttag = function(self,t,a) + local node = {} + if self.parseAttributes == true then + node._attr=a + end + + local current = self.stack[#self.stack] + if current[t] then + table.insert(current[t],node) + else + current[t] = {node;n=1} + end + table.insert(self.stack,node) + end + + --@param t Tag name + obj.endtag = function(self,t,s) + --Tabela que representa a tag atualmente sendo processada + local current = self.stack[#self.stack] + --Tabela que representa a tag na qual a tag + --atual está contida. + local prev = self.stack[#self.stack-1] + if not prev[t] then + error("XML Error - Unmatched Tag ["..s..":"..t.."]\n") + end + if prev == self.root then + -- Once parsing complete recursively reduce tree + self:reduce(prev,nil,nil) + end + + local firstKey = getFirstKey(current) + --Se a primeira chave da tabela que representa + --a tag atual não possui nenhum elemento, + --é porque não há nenhum valor associado à tag + -- (como nos casos de tags automaticamente fechadas como ). + --Assim, atribui uma string vazia a mesma para + --que seja retornado vazio no lugar da tag e não + --uma tabela. Retornando uma string vazia + --simplifica para as aplicações NCLua + --para imprimir tal valor. + if firstKey == nil then + current[t] = "" + prev[t] = "" + end + + table.remove(self.stack) + end + + obj.text = function(self,t) + local current = self.stack[#self.stack] + table.insert(current,t) + end + + obj.cdata = obj.text + + return obj +end + +--- domHandler +local function domHandler() + local obj = {} + obj.options = {commentNode=1,piNode=1,dtdNode=1,declNode=1} + obj.root = { _children = {n=0}, _type = "ROOT" } + obj.current = obj.root + obj.starttag = function(self,t,a) + local node = { _type = 'ELEMENT', + _name = t, + _attr = a, + _parent = self.current, + _children = {n=0} } + table.insert(self.current._children,node) + self.current = node + end + obj.endtag = function(self,t,s) + if t ~= self.current._name then + error("XML Error - Unmatched Tag ["..s..":"..t.."]\n") + end + self.current = self.current._parent + end + obj.text = function(self,t) + local node = { _type = "TEXT", + _parent = self.current, + _text = t } + table.insert(self.current._children,node) + end + obj.comment = function(self,t) + if self.options.commentNode then + local node = { _type = "COMMENT", + _parent = self.current, + _text = t } + table.insert(self.current._children,node) + end + end + obj.pi = function(self,t,a) + if self.options.piNode then + local node = { _type = "PI", + _name = t, + _attr = a, + _parent = self.current } + table.insert(self.current._children,node) + end + end + obj.decl = function(self,t,a) + if self.options.declNode then + local node = { _type = "DECL", + _name = t, + _attr = a, + _parent = self.current } + table.insert(self.current._children,node) + end + end + obj.dtd = function(self,t,a) + if self.options.dtdNode then + local node = { _type = "DTD", + _name = t, + _attr = a, + _parent = self.current } + table.insert(self.current._children,node) + end + end + obj.cdata = obj.text + return obj +end + +return { simpleTreeHandler = simpleTreeHandler } diff --git a/plugins/newsdownloader.koplugin/lib/xml.lua b/plugins/newsdownloader.koplugin/lib/xml.lua new file mode 100644 index 000000000..525d490b4 --- /dev/null +++ b/plugins/newsdownloader.koplugin/lib/xml.lua @@ -0,0 +1,483 @@ +--- +-- Overview: +-- ========= +-- +-- This module provides a non-validating XML stream parser in Lua. +-- +-- Features: +-- ========= +-- +-- * Tokenises well-formed XML (relatively robustly) +-- * Flexible handler based event API (see below) +-- * Parses all XML Infoset elements - ie. +-- - Tags +-- - Text +-- - Comments +-- - CDATA +-- - XML Decl +-- - Processing Instructions +-- - DOCTYPE declarations +-- * Provides limited well-formedness checking +-- (checks for basic syntax & balanced tags only) +-- * Flexible whitespace handling (selectable) +-- * Entity Handling (selectable) +-- +-- Limitations: +-- ============ +-- +-- * Non-validating +-- * No charset handling +-- * No namespace support +-- * Shallow well-formedness checking only (fails +-- to detect most semantic errors) +-- +-- API: +-- ==== +-- +-- The parser provides a partially object-oriented API with +-- functionality split into tokeniser and handler components. +-- +-- The handler instance is passed to the tokeniser and receives +-- callbacks for each XML element processed (if a suitable handler +-- function is defined). The API is conceptually similar to the +-- SAX API but implemented differently. +-- +-- The following events are generated by the tokeniser +-- +-- handler:start - Start Tag +-- handler:end - End Tag +-- handler:text - Text +-- handler:decl - XML Declaration +-- handler:pi - Processing Instruction +-- handler:comment - Comment +-- handler:dtd - DOCTYPE definition +-- handler:cdata - CDATA +-- +-- The function prototype for all the callback functions is +-- +-- callback(val,attrs,start,end) +-- +-- where attrs is a table and val/attrs are overloaded for +-- specific callbacks - ie. +-- +-- Callback val attrs (table) +-- -------- --- ------------- +-- start name { attributes (name=val).. } +-- end name nil +-- text nil +-- cdata nil +-- decl "xml" { attributes (name=val).. } +-- pi pi name { attributes (if present).. +-- _text = +-- } +-- comment nil +-- dtd root element { _root = , +-- _type = SYSTEM|PUBLIC, +-- _name = , +-- _uri = , +-- _internal = +-- } +-- +-- (start & end provide the character positions of the start/end +-- of the element) +-- +-- XML data is passed to the parser instance through the 'parse' +-- method (Note: must be passed a single string currently) +-- +-- Options +-- ======= +-- +-- Parser options are controlled through the 'self.options' table. +-- Available options are - +-- +-- * stripWS +-- +-- Strip non-significant whitespace (leading/trailing) +-- and do not generate events for empty text elements +-- +-- * expandEntities +-- +-- Expand entities (standard entities + single char +-- numeric entities only currently - could be extended +-- at runtime if suitable DTD parser added elements +-- to table (see obj._ENTITIES). May also be possible +-- to expand multibyre entities for UTF-8 only +-- +-- * errorHandler +-- +-- Custom error handler function +-- +-- NOTE: Boolean options must be set to 'nil' not '0' +-- +-- Usage +-- ===== +-- +-- Create a handler instance - +-- +-- h = { start = function(t,a,s,e) .... end, +-- end = function(t,a,s,e) .... end, +-- text = function(t,a,s,e) .... end, +-- cdata = text } +-- +-- (or use predefined handler - see handler.lua) +-- +-- Create parser instance - +-- +-- p = xmlParser(h) +-- +-- Set options - +-- +-- p.options.xxxx = nil +-- +-- Parse XML data - +-- +-- xmlParser:parse(" + + +---Parses a XML string +--@param handler Handler object to be used to convert the XML string +--to another formats. @see handler.lua +local xmlParser = function(handler) + local obj = {} + -- Public attributes + + obj.options = { + stripWS = 1, + expandEntities = 1, + errorHandler = function(err,pos) + error(string.format("%s [char=%d]\n", + err or "Parse Error",pos)) + end, + } + + -- Public methods + + obj.parse = function(self, str, parseAttributes) + if parseAttributes == nil then + parseAttributes = true + end + self._handler.parseAttributes = parseAttributes + + local match,endmatch,pos = 0,0,1 + local text,endt1,endt2,tagstr,tagname,attrs,starttext,endtext + local errstart,errend,extstart,extend + while match do + -- Get next tag (first pass - fix exceptions below) + match,endmatch,text,endt1,tagstr,endt2 = string.find(str,self._XML,pos) + if not match then + if string.find(str, self._WS,pos) then + -- No more text - check document complete + if #self._stack ~= 0 then + self:_err(self._errstr.incompleteXmlErr,pos) + else + break + end + else + -- Unparsable text + self:_err(self._errstr.xmlErr,pos) + end + end + -- Handle leading text + starttext = match + endtext = match + string.len(text) - 1 + match = match + string.len(text) + text = self:_parseEntities(self:_stripWS(text)) + if text ~= "" and self._handler.text then + self._handler:text(text,nil,match,endtext) + end + -- Test for tag type + if string.find(string.sub(tagstr,1,5),"?xml%s") then + -- XML Declaration + match,endmatch,text = string.find(str,self._PI,pos) + if not match then + self:_err(self._errstr.declErr,pos) + end + if match ~= 1 then + -- Must be at start of doc if present + self:_err(self._errstr.declStartErr,pos) + end + tagname,attrs = self:_parseTag(text) + -- TODO: Check attributes are valid + -- Check for version (mandatory) + if attrs.version == nil then + self:_err(self._errstr.declAttrErr,pos) + end + if self._handler.decl then + self._handler:decl(tagname,attrs,match,endmatch) + end + elseif string.sub(tagstr,1,1) == "?" then + -- Processing Instruction + match,endmatch,text = string.find(str,self._PI,pos) + if not match then + self:_err(self._errstr.piErr,pos) + end + if self._handler.pi then + -- Parse PI attributes & text + tagname,attrs = self:_parseTag(text) + local pi = string.sub(text,string.len(tagname)+1) + if pi ~= "" then + if attrs then + attrs._text = pi + else + attrs = { _text = pi } + end + end + self._handler:pi(tagname,attrs,match,endmatch) + end + elseif string.sub(tagstr,1,3) == "!--" then + -- Comment + match,endmatch,text = string.find(str,self._COMMENT,pos) + if not match then + self:_err(self._errstr.commentErr,pos) + end + if self._handler.comment then + text = self:_parseEntities(self:_stripWS(text)) + self._handler:comment(text,next,match,endmatch) + end + elseif string.sub(tagstr,1,8) == "!DOCTYPE" then + -- DTD + match,endmatch,attrs = self:_parseDTD(string,pos) + if not match then + self:_err(self._errstr.dtdErr,pos) + end + if self._handler.dtd then + self._handler:dtd(attrs._root,attrs,match,endmatch) + end + elseif string.sub(tagstr,1,8) == "![CDATA[" then + -- CDATA + match,endmatch,text = string.find(str,self._CDATA,pos) + if not match then + self:_err(self._errstr.cdataErr,pos) + end + if self._handler.cdata then + self._handler:cdata(text,nil,match,endmatch) + end + else + -- Normal tag + + -- Need check for embedded '>' in attribute value and extend + -- match recursively if necessary eg. + + while 1 do + errstart,errend = string.find(tagstr,self._ATTRERR1) + if errend == nil then + errstart,errend = string.find(tagstr,self._ATTRERR2) + if errend == nil then + break + end + end + extstart,extend,endt2 = string.find(str,self._TAGEXT,endmatch+1) + tagstr = tagstr .. string.sub(string,endmatch,extend-1) + if not match then + self:_err(self._errstr.xmlErr,pos) + end + endmatch = extend + end + + -- Extract tagname/attrs + + tagname,attrs = self:_parseTag(tagstr) + + if (endt1=="/") then + -- End tag + if self._handler.endtag then + if attrs then + -- Shouldnt have any attributes in endtag + self:_err(string.format("%s (/%s)", + self._errstr.endTagErr, + tagname) + ,pos) + end + if table.remove(self._stack) ~= tagname then + self:_err(string.format("%s (/%s)", + self._errstr.unmatchedTagErr, + tagname) + ,pos) + end + self._handler:endtag(tagname,nil,match,endmatch) + end + else + -- Start Tag + table.insert(self._stack,tagname) + if self._handler.starttag then + self._handler:starttag(tagname,attrs,match,endmatch) + end + --TODO: Tags com fechamento automático estão sendo + --retornadas como uma tabela, o que complica + --para a app NCLua tratar isso. É preciso + --fazer com que seja retornado um campo string vazio. + -- Self-Closing Tag + if (endt2=="/") then + table.remove(self._stack) + if self._handler.endtag then + self._handler:endtag(tagname,nil,match,endmatch) + end + end + end + end + pos = endmatch + 1 + end + end + + -- Private attribures/functions + + obj._handler = handler + obj._stack = {} + + obj._XML = '^([^<]*)<(%/?)([^>]-)(%/?)>' + obj._ATTR1 = '([%w-:_]+)%s*=%s*"(.-)"' + obj._ATTR2 = '([%w-:_]+)%s*=%s*\'(.-)\'' + obj._CDATA = '<%!%[CDATA%[(.-)%]%]>' + obj._PI = '<%?(.-)%?>' + obj._COMMENT = '' + obj._TAG = '^(.-)%s.*' + obj._LEADINGWS = '^%s+' + obj._TRAILINGWS = '%s+$' + obj._WS = '^%s*$' + obj._DTD1 = '' + obj._DTD2 = '' + obj._DTD3 = '' + obj._DTD4 = '' + obj._DTD5 = '' + + obj._ATTRERR1 = '=%s*"[^"]*$' + obj._ATTRERR2 = '=%s*\'[^\']*$' + obj._TAGEXT = '(%/?)>' + + obj._ENTITIES = { ["<"] = "<", + [">"] = ">", + ["&"] = "&", + ["""] = '"', + ["'"] = "'", + ["&#(%d+);"] = function (x) + local d = tonumber(x) + if d >= 0 and d < 256 then + return string.char(d) + else + return "&#"..d..";" + end + end, + ["&#x(%x+);"] = function (x) + local d = tonumber(x,16) + if d >= 0 and d < 256 then + return string.char(d) + else + return "&#x"..x..";" + end + end, + } + + obj._err = function(self,err,pos) + if self.options.errorHandler then + self.options.errorHandler(err,pos) + end + end + + obj._errstr = { xmlErr = "Error Parsing XML", + declErr = "Error Parsing XMLDecl", + declStartErr = "XMLDecl not at start of document", + declAttrErr = "Invalid XMLDecl attributes", + piErr = "Error Parsing Processing Instruction", + commentErr = "Error Parsing Comment", + cdataErr = "Error Parsing CDATA", + dtdErr = "Error Parsing DTD", + endTagErr = "End Tag Attributes Invalid", + unmatchedTagErr = "Unbalanced Tag", + incompleteXmlErr = "Incomplete XML Document", + } + + obj._stripWS = function(self,s) + if self.options.stripWS then + s = string.gsub(s,'^%s+','') + s = string.gsub(s,'%s+$','') + end + return s + end + + obj._parseEntities = function(self,s) + if self.options.expandEntities then + --for k,v in self._ENTITIES do + for k,v in pairs(self._ENTITIES) do + --print (k, v) + s = string.gsub(s,k,v) + end + end + return s + end + + obj._parseDTD = function(self,s,pos) + -- match,endmatch,root,type,name,uri,internal + local m,e,r,t,n,u,i + m,e,r,t,u,i = string.find(s,self._DTD1,pos) + if m then + return m,e,{_root=r,_type=t,_uri=u,_internal=i} + end + m,e,r,t,n,u,i = string.find(s,self._DTD2,pos) + if m then + return m,e,{_root=r,_type=t,_name=n,_uri=u,_internal=i} + end + m,e,r,i = string.find(s,self._DTD3,pos) + if m then + return m,e,{_root=r,_internal=i} + end + m,e,r,t,u = string.find(s,self._DTD4,pos) + if m then + return m,e,{_root=r,_type=t,_uri=u} + end + m,e,r,t,n,u = string.find(s,self._DTD5,pos) + if m then + return m,e,{_root=r,_type=t,_name=n,_uri=u} + end + return nil + end + + ---Parses a string representing a tag + --@param s String containing tag text + --@return Returns a string containing the tagname and a table attrs + --containing the atributtes of tag + obj._parseTag = function(self,s) + local attrs = {} + local tagname = string.gsub(s,self._TAG,'%1') + string.gsub(s,self._ATTR1,function (k,v) + attrs[string.lower(k)]=self:_parseEntities(v) + attrs._ = 1 + end) + string.gsub(s,self._ATTR2,function (k,v) + attrs[string.lower(k)]=self:_parseEntities(v) + attrs._ = 1 + end) + if attrs._ then + attrs._ = nil + else + attrs = nil + end + return tagname,attrs + end + + return obj + +end + +return { xmlParser = xmlParser } diff --git a/plugins/newsdownloader.koplugin/main.lua b/plugins/newsdownloader.koplugin/main.lua new file mode 100644 index 000000000..3d622aa2f --- /dev/null +++ b/plugins/newsdownloader.koplugin/main.lua @@ -0,0 +1,180 @@ +local WidgetContainer = require("ui/widget/container/widgetcontainer") +local InfoMessage = require("ui/widget/infomessage") +local UIManager = require("ui/uimanager") +local DataStorage = require("datastorage") +local FFIUtil = require("ffi/util") +local util = require("util") +local T = FFIUtil.template +local _ = require("gettext") +local logger = require("logger") +local ffi = require("ffi") +local http = require("socket.http") +local ltn12 = require("ltn12") + + +local NewsDownloader = WidgetContainer:new{} + +local initialized = false -- for only once lazy initialization +local FEED_CONFIG_FILE = "feed_config.lua" +local FILE_EXTENSION = ".html" +local NEWS_DL_DIR_NAME = "news" +local NEWS_DL_DIR, FEED_CONFIG_PATH + +local function deserializeXMLString(xml_str) + -- uses LuaXML https://github.com/manoelcampos/LuaXML + -- The MIT License (MIT) + -- Copyright (c) 2016 Manoel Campos da Silva Filho + local treehdl = require("lib/handler") + local libxml = require("lib/xml") + + --Instantiate the object the states the XML file as a Lua table + local xmlhandler = treehdl.simpleTreeHandler() + --Instantiate the object that parses the XML to a Lua table + local ok = pcall(function() + libxml.xmlParser(xmlhandler):parse(xml_str) + end) + if not ok then return end + return xmlhandler.root +end + +function NewsDownloader:init() + self.ui.menu:registerToMainMenu(self) +end + +function NewsDownloader:addToMainMenu(menu_items) + if not initialized then + NEWS_DL_DIR = ("%s/%s/"):format(DataStorage:getDataDir(), NEWS_DL_DIR_NAME) + if not lfs.attributes(NEWS_DL_DIR, "mode") then + lfs.mkdir(NEWS_DL_DIR) + end + + FEED_CONFIG_PATH = NEWS_DL_DIR .. FEED_CONFIG_FILE + initialized = true + end + + menu_items.rss_news_downloader = { + text = _("News (RSS/Atom) downloader"), + sub_item_table = { + { + text = _("Download news"), + callback = function() self:loadConfigAndProcessFeeds() end, + }, + { + text = _("Go to news folder"), + callback = function() + local FileManager = require("apps/filemanager/filemanager") + if FileManager.instance then + FileManager.instance:reinit(NEWS_DL_DIR) + else + FileManager:showFiles(NEWS_DL_DIR) + end + end, + }, + { + text = _("Remove news"), + callback = function() + -- puerge all downloaded news files, but keep the feed config + for entry in lfs.dir(NEWS_DL_DIR) do + if entry ~= "." and entry ~= ".." and entry ~= FEED_CONFIG_FILE then + local entry_path = NEWS_DL_DIR .. "/" .. entry + local entry_mode = lfs.attributes(entry_path, "mode") + if entry_mode == "file" then + ffi.C.remove(entry_path) + elseif entry_mode == "directory" then + FFIUtil.purgeDir(entry_path) + end + end + end + UIManager:show(InfoMessage:new{ + text = _("All news removed.") + }) + end, + }, + { + text = _("Help"), + callback = function() + UIManager:show(InfoMessage:new{ + text = T(_("News downloader can be configured in the feeds config file:\n%1\n\nIt downloads news items to:\n%2.\n\nTo set you own news sources edit foregoing feeds config file. Items download limit can be set there."), + FEED_CONFIG_PATH, + NEWS_DL_DIR) + }) + end, + }, + }, + } +end + +function NewsDownloader:loadConfigAndProcessFeeds() + local info = InfoMessage:new{ text = _("Loading news feed config…") } + UIManager:show(info) + -- force repaint due to upcoming blocking calls + UIManager:forceRePaint() + UIManager:close(info) + + if not lfs.attributes(FEED_CONFIG_PATH, "mode") then + logger.dbg("NewsDownloader: Creating initial feed config.") + FFIUtil.copyFile(FFIUtil.joinPath(self.path, FEED_CONFIG_FILE), + FEED_CONFIG_PATH) + end + local ok, feed_config = pcall(dofile, FEED_CONFIG_PATH) + if not ok or not feed_config then + logger.info("NewsDownloader: Feed config not found.") + return + end + + if #feed_config <= 0 then + logger.info('NewsDownloader: empty feed list.', FEED_CONFIG_PATH) + return + end + + for idx, feed in ipairs(feed_config) do + local url = feed[1] + local limit = feed.limit + if url and limit then + info = InfoMessage:new{ text = T(_("Processing: %1"), url) } + UIManager:show(info) + -- processFeedSource is a blocking call, so manually force a UI refresh beforehand + UIManager:forceRePaint() + self:processFeedSource(url, tonumber(limit)) + UIManager:close(info) + else + logger.warn('NewsDownloader: invalid feed config entry', feed) + end + end + + UIManager:show(InfoMessage:new{ + text = _("Downloading news finished."), + timeout = 1, + }) +end + +function NewsDownloader:processFeedSource(url, limit) + local resp_lines = {} + http.request({ url = url, sink = ltn12.sink.table(resp_lines), }) + local feeds = deserializeXMLString(table.concat(resp_lines)) + if not feeds then return end + if not feeds.rss or not feeds.rss.channel + or not feeds.rss.channel.title or not feeds.rss.channel.item then + logger.info('NewsDownloader: Got invalid feeds', feeds) + return + end + + local feed_output_dir = ("%s%s/"):format( + NEWS_DL_DIR, util.replaceInvalidChars(feeds.rss.channel.title)) + if not lfs.attributes(feed_output_dir, "mode") then + lfs.mkdir(feed_output_dir) + end + + for index, feed in pairs(feeds.rss.channel.item) do + if index -1 == limit then + break + end + local news_dl_path = ("%s%s%s"):format(feed_output_dir, + util.replaceInvalidChars(feed.title), + FILE_EXTENSION) + logger.dbg("NewsDownloader: News file will be stored to :", news_dl_path) + http.request({ url = url, sink = ltn12.sink.file(io.open(news_dl_path, 'w')), }) + end +end + +return NewsDownloader