Simple News (RSS/Atom) downloader plugin (#2592)

* Simple News (RSS/Atom) downloader plugin
* Limit news from sources
* new News Downloader menu entry - goto news dir
* Initial configuration logic
* Download each feed source to separate folder
* 2 feed sources in example config
pull/2795/head
mwoz123 7 years ago committed by Frans de Jonge
parent a1ec64b3ed
commit d6c81f5097

@ -97,6 +97,9 @@ read_globals = {
exclude_files = {
"frontend/luxl.lua",
"plugins/newsdownloader.koplugin/lib/handler.lua",
"plugins/newsdownloader.koplugin/lib/LICENSE",
"plugins/newsdownloader.koplugin/lib/xml.lua",
}
-- TODO: clean up and enforce max line width (631)

@ -36,6 +36,7 @@ local order = {
"storage_stat",
"cloud_storage",
"read_timer",
"rss_news_downloader",
"synchronize_time",
"terminal",
"----------------------------",

@ -59,6 +59,7 @@ local order = {
"synchronize_time",
"progress_sync",
"zsync",
"rss_news_downloader",
"terminal",
},
search = {

@ -0,0 +1,9 @@
return {
-- list your feeds here:
-- only supports http URL for now
-- Atom is currently not supported, only RSS
{ "http://www.pcworld.com/index.rss", limit = 1 },
{ "http://www.economist.com/sections/science-technology/rss.xml", limit = 2},
-- set limit to "0" means no download, "-1" no limit.
{ "http://www.economist.com/sections/culture/rss.xml", limit = 0 },
}

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016 Manoel Campos da Silva Filho
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,346 @@
---
-- Overview:
-- =========
-- Standard XML event handler(s) for XML parser module (xml.lua)
--
-- Features:
-- =========
-- printHandler - Generate XML event trace
-- domHandler - Generate DOM-like node tree
-- simpleTreeHandler - Generate 'simple' node tree
--
-- API:
-- ====
-- Must be called as handler function from xmlParser
-- and implement XML event callbacks (see xmlParser.lua
-- for callback API definition)
--
-- printHandler:
-- -------------
--
-- printHandler prints event trace for debugging
--
-- domHandler:
-- -----------
--
-- domHandler generates a DOM-like node tree structure with
-- a single ROOT node parent - each node is a table comprising
-- fields below.
--
-- node = { _name = <Element Name>,
-- _type = ROOT|ELEMENT|TEXT|COMMENT|PI|DECL|DTD,
-- _attr = { Node attributes - see callback API },
-- _parent = <Parent Node>
-- _children = { List of child nodes - ROOT/NODE only }
-- }
--
-- The dom structure is capable of representing any valid XML document
--
-- simpleTreeHandler
-- -----------------
--
-- simpleTreeHandler is a simplified handler which attempts
-- to generate a more 'natural' table based structure which
-- supports many common XML formats.
--
-- The XML tree structure is mapped directly into a recursive
-- table structure with node names as keys and child elements
-- as either a table of values or directly as a string value
-- for text. Where there is only a single child element this
-- is inserted as a named key - if there are multiple
-- elements these are inserted as a vector (in some cases it
-- may be preferable to always insert elements as a vector
-- which can be specified on a per element basis in the
-- options). Attributes are inserted as a child element with
-- a key of '_attr'.
--
-- Only Tag/Text & CDATA elements are processed - all others
-- are ignored.
--
-- This format has some limitations - primarily
--
-- * Mixed-Content behaves unpredictably - the relationship
-- between text elements and embedded tags is lost and
-- multiple levels of mixed content does not work
-- * If a leaf element has both a text element and attributes
-- then the text must be accessed through a vector (to
-- provide a container for the attribute)
--
-- In general however this format is relatively useful.
--
-- It is much easier to understand by running some test
-- data through 'textxml.lua -simpletree' than to read this)
--
-- Options
-- =======
-- simpleTreeHandler.options.noReduce = { <tag> = bool,.. }
--
-- - Nodes not to reduce children vector even if only
-- one child
--
-- domHandler.options.(comment|pi|dtd|decl)Node = bool
--
-- - Include/exclude given node types
--
-- Usage
-- =====
-- Pased as delegate in xmlParser constructor and called
-- as callback by xmlParser:parse(xml) method.
--
-- See textxml.lua for examples
-- License:
-- ========
--
-- This code is freely distributable under the terms of the Lua license
-- (<a href="http://www.lua.org/copyright.html">http://www.lua.org/copyright.html</a>)
--
-- History
-- =======
-- $Id: handler.lua,v 1.1.1.1 2001/11/28 06:11:33 paulc Exp $
--
-- $Log: handler.lua,v $
-- Revision 1.1.1.1 2001/11/28 06:11:33 paulc
-- Initial Import
--@author Paul Chakravarti (paulc@passtheaardvark.com)<p/>
---Handler to generate a string prepresentation of a table
--Convenience function for printHandler (Does not support recursive tables).
--@param t Table to be parsed
--@returns Returns a string representation of table
local function showTable(t)
local sep = ''
local res = ''
if type(t) ~= 'table' then
return t
end
for k,v in pairs(t) do
if type(v) == 'table' then
v = showTable(v)
end
res = res .. sep .. string.format("%s=%s",k,v)
sep = ','
end
res = '{'..res..'}'
return res
end
---Handler to generate a simple event trace
local printHandler = function()
local obj = {}
obj.starttag = function(self,t,a,s,e)
io.write("Start : "..t.."\n")
if a then
for k,v in pairs(a) do
io.write(string.format(" + %s='%s'\n",k,v))
end
end
end
obj.endtag = function(self,t,s,e)
io.write("End : "..t.."\n")
end
obj.text = function(self,t,s,e)
io.write("Text : "..t.."\n")
end
obj.cdata = function(self,t,s,e)
io.write("CDATA : "..t.."\n")
end
obj.comment = function(self,t,s,e)
io.write("Comment : "..t.."\n")
end
obj.dtd = function(self,t,a,s,e)
io.write("DTD : "..t.."\n")
if a then
for k,v in pairs(a) do
io.write(string.format(" + %s='%s'\n",k,v))
end
end
end
obj.pi = function(self,t,a,s,e)
io.write("PI : "..t.."\n")
if a then
for k,v in pairs(a) do
io. write(string.format(" + %s='%s'\n",k,v))
end
end
end
obj.decl = function(self,t,a,s,e)
io.write("XML Decl : "..t.."\n")
if a then
for k,v in pairs(a) do
io.write(string.format(" + %s='%s'\n",k,v))
end
end
end
return obj
end
--Obtém a primeira chave de uma tabela
--@param Tabela de onde deverá ser obtido o primeiro elemento
--@return Retorna a primeira chave da tabela
local function getFirstKey(tb)
if type(tb) == "table" then
--O uso da função next não funciona para pegar o primeiro elemento. Trava aqui
--k, v = next(tb)
--return k
for k, v in pairs(tb) do
return k
end
return nil
else
return tb
end
end
---Handler to generate a lua table from a XML content string
local function simpleTreeHandler()
local obj = {}
obj.root = {}
obj.stack = {obj.root;n=1}
obj.options = {noreduce = {}}
obj.reduce = function(self,node,key,parent)
-- Recursively remove redundant vectors for nodes
-- with single child elements
for k,v in pairs(node) do
if type(v) == 'table' then
self:reduce(v,k,node)
end
end
if #node == 1 and not self.options.noreduce[key] and
node._attr == nil then
parent[key] = node[1]
else
node.n = nil
end
end
--@param t Table that represents a XML tag
--@param a Attributes table (_attr)
obj.starttag = function(self,t,a)
local node = {}
if self.parseAttributes == true then
node._attr=a
end
local current = self.stack[#self.stack]
if current[t] then
table.insert(current[t],node)
else
current[t] = {node;n=1}
end
table.insert(self.stack,node)
end
--@param t Tag name
obj.endtag = function(self,t,s)
--Tabela que representa a tag atualmente sendo processada
local current = self.stack[#self.stack]
--Tabela que representa a tag na qual a tag
--atual está contida.
local prev = self.stack[#self.stack-1]
if not prev[t] then
error("XML Error - Unmatched Tag ["..s..":"..t.."]\n")
end
if prev == self.root then
-- Once parsing complete recursively reduce tree
self:reduce(prev,nil,nil)
end
local firstKey = getFirstKey(current)
--Se a primeira chave da tabela que representa
--a tag atual não possui nenhum elemento,
--é porque não há nenhum valor associado à tag
-- (como nos casos de tags automaticamente fechadas como <senha />).
--Assim, atribui uma string vazia a mesma para
--que seja retornado vazio no lugar da tag e não
--uma tabela. Retornando uma string vazia
--simplifica para as aplicações NCLua
--para imprimir tal valor.
if firstKey == nil then
current[t] = ""
prev[t] = ""
end
table.remove(self.stack)
end
obj.text = function(self,t)
local current = self.stack[#self.stack]
table.insert(current,t)
end
obj.cdata = obj.text
return obj
end
--- domHandler
local function domHandler()
local obj = {}
obj.options = {commentNode=1,piNode=1,dtdNode=1,declNode=1}
obj.root = { _children = {n=0}, _type = "ROOT" }
obj.current = obj.root
obj.starttag = function(self,t,a)
local node = { _type = 'ELEMENT',
_name = t,
_attr = a,
_parent = self.current,
_children = {n=0} }
table.insert(self.current._children,node)
self.current = node
end
obj.endtag = function(self,t,s)
if t ~= self.current._name then
error("XML Error - Unmatched Tag ["..s..":"..t.."]\n")
end
self.current = self.current._parent
end
obj.text = function(self,t)
local node = { _type = "TEXT",
_parent = self.current,
_text = t }
table.insert(self.current._children,node)
end
obj.comment = function(self,t)
if self.options.commentNode then
local node = { _type = "COMMENT",
_parent = self.current,
_text = t }
table.insert(self.current._children,node)
end
end
obj.pi = function(self,t,a)
if self.options.piNode then
local node = { _type = "PI",
_name = t,
_attr = a,
_parent = self.current }
table.insert(self.current._children,node)
end
end
obj.decl = function(self,t,a)
if self.options.declNode then
local node = { _type = "DECL",
_name = t,
_attr = a,
_parent = self.current }
table.insert(self.current._children,node)
end
end
obj.dtd = function(self,t,a)
if self.options.dtdNode then
local node = { _type = "DTD",
_name = t,
_attr = a,
_parent = self.current }
table.insert(self.current._children,node)
end
end
obj.cdata = obj.text
return obj
end
return { simpleTreeHandler = simpleTreeHandler }

@ -0,0 +1,483 @@
---
-- Overview:
-- =========
--
-- This module provides a non-validating XML stream parser in Lua.
--
-- Features:
-- =========
--
-- * Tokenises well-formed XML (relatively robustly)
-- * Flexible handler based event API (see below)
-- * Parses all XML Infoset elements - ie.
-- - Tags
-- - Text
-- - Comments
-- - CDATA
-- - XML Decl
-- - Processing Instructions
-- - DOCTYPE declarations
-- * Provides limited well-formedness checking
-- (checks for basic syntax & balanced tags only)
-- * Flexible whitespace handling (selectable)
-- * Entity Handling (selectable)
--
-- Limitations:
-- ============
--
-- * Non-validating
-- * No charset handling
-- * No namespace support
-- * Shallow well-formedness checking only (fails
-- to detect most semantic errors)
--
-- API:
-- ====
--
-- The parser provides a partially object-oriented API with
-- functionality split into tokeniser and handler components.
--
-- The handler instance is passed to the tokeniser and receives
-- callbacks for each XML element processed (if a suitable handler
-- function is defined). The API is conceptually similar to the
-- SAX API but implemented differently.
--
-- The following events are generated by the tokeniser
--
-- handler:start - Start Tag
-- handler:end - End Tag
-- handler:text - Text
-- handler:decl - XML Declaration
-- handler:pi - Processing Instruction
-- handler:comment - Comment
-- handler:dtd - DOCTYPE definition
-- handler:cdata - CDATA
--
-- The function prototype for all the callback functions is
--
-- callback(val,attrs,start,end)
--
-- where attrs is a table and val/attrs are overloaded for
-- specific callbacks - ie.
--
-- Callback val attrs (table)
-- -------- --- -------------
-- start name { attributes (name=val).. }
-- end name nil
-- text <text> nil
-- cdata <text> nil
-- decl "xml" { attributes (name=val).. }
-- pi pi name { attributes (if present)..
-- _text = <PI Text>
-- }
-- comment <text> nil
-- dtd root element { _root = <Root Element>,
-- _type = SYSTEM|PUBLIC,
-- _name = <name>,
-- _uri = <uri>,
-- _internal = <internal dtd>
-- }
--
-- (start & end provide the character positions of the start/end
-- of the element)
--
-- XML data is passed to the parser instance through the 'parse'
-- method (Note: must be passed a single string currently)
--
-- Options
-- =======
--
-- Parser options are controlled through the 'self.options' table.
-- Available options are -
--
-- * stripWS
--
-- Strip non-significant whitespace (leading/trailing)
-- and do not generate events for empty text elements
--
-- * expandEntities
--
-- Expand entities (standard entities + single char
-- numeric entities only currently - could be extended
-- at runtime if suitable DTD parser added elements
-- to table (see obj._ENTITIES). May also be possible
-- to expand multibyre entities for UTF-8 only
--
-- * errorHandler
--
-- Custom error handler function
--
-- NOTE: Boolean options must be set to 'nil' not '0'
--
-- Usage
-- =====
--
-- Create a handler instance -
--
-- h = { start = function(t,a,s,e) .... end,
-- end = function(t,a,s,e) .... end,
-- text = function(t,a,s,e) .... end,
-- cdata = text }
--
-- (or use predefined handler - see handler.lua)
--
-- Create parser instance -
--
-- p = xmlParser(h)
--
-- Set options -
--
-- p.options.xxxx = nil
--
-- Parse XML data -
--
-- xmlParser:parse("<?xml... ")
-- License:
-- ========
--
-- This code is freely distributable under the terms of the Lua license
-- (http://www.lua.org/copyright.html)
--
-- History
-- =======
-- Added parameter parseAttributes (boolean) in xmlParser.parse method
-- If true (default value), tag attributtes are parsed.
-- by Manoel Campos da Silva Filho
-- http://manoelcampos.com
-- http://about.me/manoelcampos
--
-- $Id: xml.lua,v 1.1.1.1 2001/11/28 06:11:33 paulc Exp $
--
-- $Log: xml.lua,v $
-- Revision 1.1.1.1 2001/11/28 06:11:33 paulc
-- Initial Import
--
--@author Paul Chakravarti (paulc@passtheaardvark.com)<p/>
---Parses a XML string
--@param handler Handler object to be used to convert the XML string
--to another formats. @see handler.lua
local xmlParser = function(handler)
local obj = {}
-- Public attributes
obj.options = {
stripWS = 1,
expandEntities = 1,
errorHandler = function(err,pos)
error(string.format("%s [char=%d]\n",
err or "Parse Error",pos))
end,
}
-- Public methods
obj.parse = function(self, str, parseAttributes)
if parseAttributes == nil then
parseAttributes = true
end
self._handler.parseAttributes = parseAttributes
local match,endmatch,pos = 0,0,1
local text,endt1,endt2,tagstr,tagname,attrs,starttext,endtext
local errstart,errend,extstart,extend
while match do
-- Get next tag (first pass - fix exceptions below)
match,endmatch,text,endt1,tagstr,endt2 = string.find(str,self._XML,pos)
if not match then
if string.find(str, self._WS,pos) then
-- No more text - check document complete
if #self._stack ~= 0 then
self:_err(self._errstr.incompleteXmlErr,pos)
else
break
end
else
-- Unparsable text
self:_err(self._errstr.xmlErr,pos)
end
end
-- Handle leading text
starttext = match
endtext = match + string.len(text) - 1
match = match + string.len(text)
text = self:_parseEntities(self:_stripWS(text))
if text ~= "" and self._handler.text then
self._handler:text(text,nil,match,endtext)
end
-- Test for tag type
if string.find(string.sub(tagstr,1,5),"?xml%s") then
-- XML Declaration
match,endmatch,text = string.find(str,self._PI,pos)
if not match then
self:_err(self._errstr.declErr,pos)
end
if match ~= 1 then
-- Must be at start of doc if present
self:_err(self._errstr.declStartErr,pos)
end
tagname,attrs = self:_parseTag(text)
-- TODO: Check attributes are valid
-- Check for version (mandatory)
if attrs.version == nil then
self:_err(self._errstr.declAttrErr,pos)
end
if self._handler.decl then
self._handler:decl(tagname,attrs,match,endmatch)
end
elseif string.sub(tagstr,1,1) == "?" then
-- Processing Instruction
match,endmatch,text = string.find(str,self._PI,pos)
if not match then
self:_err(self._errstr.piErr,pos)
end
if self._handler.pi then
-- Parse PI attributes & text
tagname,attrs = self:_parseTag(text)
local pi = string.sub(text,string.len(tagname)+1)
if pi ~= "" then
if attrs then
attrs._text = pi
else
attrs = { _text = pi }
end
end
self._handler:pi(tagname,attrs,match,endmatch)
end
elseif string.sub(tagstr,1,3) == "!--" then
-- Comment
match,endmatch,text = string.find(str,self._COMMENT,pos)
if not match then
self:_err(self._errstr.commentErr,pos)
end
if self._handler.comment then
text = self:_parseEntities(self:_stripWS(text))
self._handler:comment(text,next,match,endmatch)
end
elseif string.sub(tagstr,1,8) == "!DOCTYPE" then
-- DTD
match,endmatch,attrs = self:_parseDTD(string,pos)
if not match then
self:_err(self._errstr.dtdErr,pos)
end
if self._handler.dtd then
self._handler:dtd(attrs._root,attrs,match,endmatch)
end
elseif string.sub(tagstr,1,8) == "![CDATA[" then
-- CDATA
match,endmatch,text = string.find(str,self._CDATA,pos)
if not match then
self:_err(self._errstr.cdataErr,pos)
end
if self._handler.cdata then
self._handler:cdata(text,nil,match,endmatch)
end
else
-- Normal tag
-- Need check for embedded '>' in attribute value and extend
-- match recursively if necessary eg. <tag attr="123>456">
while 1 do
errstart,errend = string.find(tagstr,self._ATTRERR1)
if errend == nil then
errstart,errend = string.find(tagstr,self._ATTRERR2)
if errend == nil then
break
end
end
extstart,extend,endt2 = string.find(str,self._TAGEXT,endmatch+1)
tagstr = tagstr .. string.sub(string,endmatch,extend-1)
if not match then
self:_err(self._errstr.xmlErr,pos)
end
endmatch = extend
end
-- Extract tagname/attrs
tagname,attrs = self:_parseTag(tagstr)
if (endt1=="/") then
-- End tag
if self._handler.endtag then
if attrs then
-- Shouldnt have any attributes in endtag
self:_err(string.format("%s (/%s)",
self._errstr.endTagErr,
tagname)
,pos)
end
if table.remove(self._stack) ~= tagname then
self:_err(string.format("%s (/%s)",
self._errstr.unmatchedTagErr,
tagname)
,pos)
end
self._handler:endtag(tagname,nil,match,endmatch)
end
else
-- Start Tag
table.insert(self._stack,tagname)
if self._handler.starttag then
self._handler:starttag(tagname,attrs,match,endmatch)
end
--TODO: Tags com fechamento automático estão sendo
--retornadas como uma tabela, o que complica
--para a app NCLua tratar isso. É preciso
--fazer com que seja retornado um campo string vazio.
-- Self-Closing Tag
if (endt2=="/") then
table.remove(self._stack)
if self._handler.endtag then
self._handler:endtag(tagname,nil,match,endmatch)
end
end
end
end
pos = endmatch + 1
end
end
-- Private attribures/functions
obj._handler = handler
obj._stack = {}
obj._XML = '^([^<]*)<(%/?)([^>]-)(%/?)>'
obj._ATTR1 = '([%w-:_]+)%s*=%s*"(.-)"'
obj._ATTR2 = '([%w-:_]+)%s*=%s*\'(.-)\''
obj._CDATA = '<%!%[CDATA%[(.-)%]%]>'
obj._PI = '<%?(.-)%?>'
obj._COMMENT = '<!%-%-(.-)%-%->'
obj._TAG = '^(.-)%s.*'
obj._LEADINGWS = '^%s+'
obj._TRAILINGWS = '%s+$'
obj._WS = '^%s*$'
obj._DTD1 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*(%b[])%s*>'
obj._DTD2 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*(%b[])%s*>'
obj._DTD3 = '<!DOCTYPE%s+(.-)%s*(%b[])%s*>'
obj._DTD4 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*>'
obj._DTD5 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*>'
obj._ATTRERR1 = '=%s*"[^"]*$'
obj._ATTRERR2 = '=%s*\'[^\']*$'
obj._TAGEXT = '(%/?)>'
obj._ENTITIES = { ["&lt;"] = "<",
["&gt;"] = ">",
["&amp;"] = "&",
["&quot;"] = '"',
["&apos;"] = "'",
["&#(%d+);"] = function (x)
local d = tonumber(x)
if d >= 0 and d < 256 then
return string.char(d)
else
return "&#"..d..";"
end
end,
["&#x(%x+);"] = function (x)
local d = tonumber(x,16)
if d >= 0 and d < 256 then
return string.char(d)
else
return "&#x"..x..";"
end
end,
}
obj._err = function(self,err,pos)
if self.options.errorHandler then
self.options.errorHandler(err,pos)
end
end
obj._errstr = { xmlErr = "Error Parsing XML",
declErr = "Error Parsing XMLDecl",
declStartErr = "XMLDecl not at start of document",
declAttrErr = "Invalid XMLDecl attributes",
piErr = "Error Parsing Processing Instruction",
commentErr = "Error Parsing Comment",
cdataErr = "Error Parsing CDATA",
dtdErr = "Error Parsing DTD",
endTagErr = "End Tag Attributes Invalid",
unmatchedTagErr = "Unbalanced Tag",
incompleteXmlErr = "Incomplete XML Document",
}
obj._stripWS = function(self,s)
if self.options.stripWS then
s = string.gsub(s,'^%s+','')
s = string.gsub(s,'%s+$','')
end
return s
end
obj._parseEntities = function(self,s)
if self.options.expandEntities then
--for k,v in self._ENTITIES do
for k,v in pairs(self._ENTITIES) do
--print (k, v)
s = string.gsub(s,k,v)
end
end
return s
end
obj._parseDTD = function(self,s,pos)
-- match,endmatch,root,type,name,uri,internal
local m,e,r,t,n,u,i
m,e,r,t,u,i = string.find(s,self._DTD1,pos)
if m then
return m,e,{_root=r,_type=t,_uri=u,_internal=i}
end
m,e,r,t,n,u,i = string.find(s,self._DTD2,pos)
if m then
return m,e,{_root=r,_type=t,_name=n,_uri=u,_internal=i}
end
m,e,r,i = string.find(s,self._DTD3,pos)
if m then
return m,e,{_root=r,_internal=i}
end
m,e,r,t,u = string.find(s,self._DTD4,pos)
if m then
return m,e,{_root=r,_type=t,_uri=u}
end
m,e,r,t,n,u = string.find(s,self._DTD5,pos)
if m then
return m,e,{_root=r,_type=t,_name=n,_uri=u}
end
return nil
end
---Parses a string representing a tag
--@param s String containing tag text
--@return Returns a string containing the tagname and a table attrs
--containing the atributtes of tag
obj._parseTag = function(self,s)
local attrs = {}
local tagname = string.gsub(s,self._TAG,'%1')
string.gsub(s,self._ATTR1,function (k,v)
attrs[string.lower(k)]=self:_parseEntities(v)
attrs._ = 1
end)
string.gsub(s,self._ATTR2,function (k,v)
attrs[string.lower(k)]=self:_parseEntities(v)
attrs._ = 1
end)
if attrs._ then
attrs._ = nil
else
attrs = nil
end
return tagname,attrs
end
return obj
end
return { xmlParser = xmlParser }

@ -0,0 +1,180 @@
local WidgetContainer = require("ui/widget/container/widgetcontainer")
local InfoMessage = require("ui/widget/infomessage")
local UIManager = require("ui/uimanager")
local DataStorage = require("datastorage")
local FFIUtil = require("ffi/util")
local util = require("util")
local T = FFIUtil.template
local _ = require("gettext")
local logger = require("logger")
local ffi = require("ffi")
local http = require("socket.http")
local ltn12 = require("ltn12")
local NewsDownloader = WidgetContainer:new{}
local initialized = false -- for only once lazy initialization
local FEED_CONFIG_FILE = "feed_config.lua"
local FILE_EXTENSION = ".html"
local NEWS_DL_DIR_NAME = "news"
local NEWS_DL_DIR, FEED_CONFIG_PATH
local function deserializeXMLString(xml_str)
-- uses LuaXML https://github.com/manoelcampos/LuaXML
-- The MIT License (MIT)
-- Copyright (c) 2016 Manoel Campos da Silva Filho
local treehdl = require("lib/handler")
local libxml = require("lib/xml")
--Instantiate the object the states the XML file as a Lua table
local xmlhandler = treehdl.simpleTreeHandler()
--Instantiate the object that parses the XML to a Lua table
local ok = pcall(function()
libxml.xmlParser(xmlhandler):parse(xml_str)
end)
if not ok then return end
return xmlhandler.root
end
function NewsDownloader:init()
self.ui.menu:registerToMainMenu(self)
end
function NewsDownloader:addToMainMenu(menu_items)
if not initialized then
NEWS_DL_DIR = ("%s/%s/"):format(DataStorage:getDataDir(), NEWS_DL_DIR_NAME)
if not lfs.attributes(NEWS_DL_DIR, "mode") then
lfs.mkdir(NEWS_DL_DIR)
end
FEED_CONFIG_PATH = NEWS_DL_DIR .. FEED_CONFIG_FILE
initialized = true
end
menu_items.rss_news_downloader = {
text = _("News (RSS/Atom) downloader"),
sub_item_table = {
{
text = _("Download news"),
callback = function() self:loadConfigAndProcessFeeds() end,
},
{
text = _("Go to news folder"),
callback = function()
local FileManager = require("apps/filemanager/filemanager")
if FileManager.instance then
FileManager.instance:reinit(NEWS_DL_DIR)
else
FileManager:showFiles(NEWS_DL_DIR)
end
end,
},
{
text = _("Remove news"),
callback = function()
-- puerge all downloaded news files, but keep the feed config
for entry in lfs.dir(NEWS_DL_DIR) do
if entry ~= "." and entry ~= ".." and entry ~= FEED_CONFIG_FILE then
local entry_path = NEWS_DL_DIR .. "/" .. entry
local entry_mode = lfs.attributes(entry_path, "mode")
if entry_mode == "file" then
ffi.C.remove(entry_path)
elseif entry_mode == "directory" then
FFIUtil.purgeDir(entry_path)
end
end
end
UIManager:show(InfoMessage:new{
text = _("All news removed.")
})
end,
},
{
text = _("Help"),
callback = function()
UIManager:show(InfoMessage:new{
text = T(_("News downloader can be configured in the feeds config file:\n%1\n\nIt downloads news items to:\n%2.\n\nTo set you own news sources edit foregoing feeds config file. Items download limit can be set there."),
FEED_CONFIG_PATH,
NEWS_DL_DIR)
})
end,
},
},
}
end
function NewsDownloader:loadConfigAndProcessFeeds()
local info = InfoMessage:new{ text = _("Loading news feed config…") }
UIManager:show(info)
-- force repaint due to upcoming blocking calls
UIManager:forceRePaint()
UIManager:close(info)
if not lfs.attributes(FEED_CONFIG_PATH, "mode") then
logger.dbg("NewsDownloader: Creating initial feed config.")
FFIUtil.copyFile(FFIUtil.joinPath(self.path, FEED_CONFIG_FILE),
FEED_CONFIG_PATH)
end
local ok, feed_config = pcall(dofile, FEED_CONFIG_PATH)
if not ok or not feed_config then
logger.info("NewsDownloader: Feed config not found.")
return
end
if #feed_config <= 0 then
logger.info('NewsDownloader: empty feed list.', FEED_CONFIG_PATH)
return
end
for idx, feed in ipairs(feed_config) do
local url = feed[1]
local limit = feed.limit
if url and limit then
info = InfoMessage:new{ text = T(_("Processing: %1"), url) }
UIManager:show(info)
-- processFeedSource is a blocking call, so manually force a UI refresh beforehand
UIManager:forceRePaint()
self:processFeedSource(url, tonumber(limit))
UIManager:close(info)
else
logger.warn('NewsDownloader: invalid feed config entry', feed)
end
end
UIManager:show(InfoMessage:new{
text = _("Downloading news finished."),
timeout = 1,
})
end
function NewsDownloader:processFeedSource(url, limit)
local resp_lines = {}
http.request({ url = url, sink = ltn12.sink.table(resp_lines), })
local feeds = deserializeXMLString(table.concat(resp_lines))
if not feeds then return end
if not feeds.rss or not feeds.rss.channel
or not feeds.rss.channel.title or not feeds.rss.channel.item then
logger.info('NewsDownloader: Got invalid feeds', feeds)
return
end
local feed_output_dir = ("%s%s/"):format(
NEWS_DL_DIR, util.replaceInvalidChars(feeds.rss.channel.title))
if not lfs.attributes(feed_output_dir, "mode") then
lfs.mkdir(feed_output_dir)
end
for index, feed in pairs(feeds.rss.channel.item) do
if index -1 == limit then
break
end
local news_dl_path = ("%s%s%s"):format(feed_output_dir,
util.replaceInvalidChars(feed.title),
FILE_EXTENSION)
logger.dbg("NewsDownloader: News file will be stored to :", news_dl_path)
http.request({ url = url, sink = ltn12.sink.file(io.open(news_dl_path, 'w')), })
end
end
return NewsDownloader
Loading…
Cancel
Save