mirror of https://github.com/koreader/koreader
Simple News (RSS/Atom) downloader plugin (#2592)
* Simple News (RSS/Atom) downloader plugin * Limit news from sources * new News Downloader menu entry - goto news dir * Initial configuration logic * Download each feed source to separate folder * 2 feed sources in example configpull/2795/head
parent
a1ec64b3ed
commit
d6c81f5097
@ -0,0 +1,9 @@
|
||||
return {
|
||||
-- list your feeds here:
|
||||
-- only supports http URL for now
|
||||
-- Atom is currently not supported, only RSS
|
||||
{ "http://www.pcworld.com/index.rss", limit = 1 },
|
||||
{ "http://www.economist.com/sections/science-technology/rss.xml", limit = 2},
|
||||
-- set limit to "0" means no download, "-1" no limit.
|
||||
{ "http://www.economist.com/sections/culture/rss.xml", limit = 0 },
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Manoel Campos da Silva Filho
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,346 @@
|
||||
---
|
||||
-- Overview:
|
||||
-- =========
|
||||
-- Standard XML event handler(s) for XML parser module (xml.lua)
|
||||
--
|
||||
-- Features:
|
||||
-- =========
|
||||
-- printHandler - Generate XML event trace
|
||||
-- domHandler - Generate DOM-like node tree
|
||||
-- simpleTreeHandler - Generate 'simple' node tree
|
||||
--
|
||||
-- API:
|
||||
-- ====
|
||||
-- Must be called as handler function from xmlParser
|
||||
-- and implement XML event callbacks (see xmlParser.lua
|
||||
-- for callback API definition)
|
||||
--
|
||||
-- printHandler:
|
||||
-- -------------
|
||||
--
|
||||
-- printHandler prints event trace for debugging
|
||||
--
|
||||
-- domHandler:
|
||||
-- -----------
|
||||
--
|
||||
-- domHandler generates a DOM-like node tree structure with
|
||||
-- a single ROOT node parent - each node is a table comprising
|
||||
-- fields below.
|
||||
--
|
||||
-- node = { _name = <Element Name>,
|
||||
-- _type = ROOT|ELEMENT|TEXT|COMMENT|PI|DECL|DTD,
|
||||
-- _attr = { Node attributes - see callback API },
|
||||
-- _parent = <Parent Node>
|
||||
-- _children = { List of child nodes - ROOT/NODE only }
|
||||
-- }
|
||||
--
|
||||
-- The dom structure is capable of representing any valid XML document
|
||||
--
|
||||
-- simpleTreeHandler
|
||||
-- -----------------
|
||||
--
|
||||
-- simpleTreeHandler is a simplified handler which attempts
|
||||
-- to generate a more 'natural' table based structure which
|
||||
-- supports many common XML formats.
|
||||
--
|
||||
-- The XML tree structure is mapped directly into a recursive
|
||||
-- table structure with node names as keys and child elements
|
||||
-- as either a table of values or directly as a string value
|
||||
-- for text. Where there is only a single child element this
|
||||
-- is inserted as a named key - if there are multiple
|
||||
-- elements these are inserted as a vector (in some cases it
|
||||
-- may be preferable to always insert elements as a vector
|
||||
-- which can be specified on a per element basis in the
|
||||
-- options). Attributes are inserted as a child element with
|
||||
-- a key of '_attr'.
|
||||
--
|
||||
-- Only Tag/Text & CDATA elements are processed - all others
|
||||
-- are ignored.
|
||||
--
|
||||
-- This format has some limitations - primarily
|
||||
--
|
||||
-- * Mixed-Content behaves unpredictably - the relationship
|
||||
-- between text elements and embedded tags is lost and
|
||||
-- multiple levels of mixed content does not work
|
||||
-- * If a leaf element has both a text element and attributes
|
||||
-- then the text must be accessed through a vector (to
|
||||
-- provide a container for the attribute)
|
||||
--
|
||||
-- In general however this format is relatively useful.
|
||||
--
|
||||
-- It is much easier to understand by running some test
|
||||
-- data through 'textxml.lua -simpletree' than to read this)
|
||||
--
|
||||
-- Options
|
||||
-- =======
|
||||
-- simpleTreeHandler.options.noReduce = { <tag> = bool,.. }
|
||||
--
|
||||
-- - Nodes not to reduce children vector even if only
|
||||
-- one child
|
||||
--
|
||||
-- domHandler.options.(comment|pi|dtd|decl)Node = bool
|
||||
--
|
||||
-- - Include/exclude given node types
|
||||
--
|
||||
-- Usage
|
||||
-- =====
|
||||
-- Pased as delegate in xmlParser constructor and called
|
||||
-- as callback by xmlParser:parse(xml) method.
|
||||
--
|
||||
-- See textxml.lua for examples
|
||||
-- License:
|
||||
-- ========
|
||||
--
|
||||
-- This code is freely distributable under the terms of the Lua license
|
||||
-- (<a href="http://www.lua.org/copyright.html">http://www.lua.org/copyright.html</a>)
|
||||
--
|
||||
-- History
|
||||
-- =======
|
||||
-- $Id: handler.lua,v 1.1.1.1 2001/11/28 06:11:33 paulc Exp $
|
||||
--
|
||||
-- $Log: handler.lua,v $
|
||||
-- Revision 1.1.1.1 2001/11/28 06:11:33 paulc
|
||||
-- Initial Import
|
||||
--@author Paul Chakravarti (paulc@passtheaardvark.com)<p/>
|
||||
|
||||
|
||||
---Handler to generate a string prepresentation of a table
|
||||
--Convenience function for printHandler (Does not support recursive tables).
|
||||
--@param t Table to be parsed
|
||||
--@returns Returns a string representation of table
|
||||
local function showTable(t)
|
||||
local sep = ''
|
||||
local res = ''
|
||||
if type(t) ~= 'table' then
|
||||
return t
|
||||
end
|
||||
for k,v in pairs(t) do
|
||||
if type(v) == 'table' then
|
||||
v = showTable(v)
|
||||
end
|
||||
res = res .. sep .. string.format("%s=%s",k,v)
|
||||
sep = ','
|
||||
end
|
||||
res = '{'..res..'}'
|
||||
return res
|
||||
end
|
||||
|
||||
---Handler to generate a simple event trace
|
||||
local printHandler = function()
|
||||
local obj = {}
|
||||
obj.starttag = function(self,t,a,s,e)
|
||||
io.write("Start : "..t.."\n")
|
||||
if a then
|
||||
for k,v in pairs(a) do
|
||||
io.write(string.format(" + %s='%s'\n",k,v))
|
||||
end
|
||||
end
|
||||
end
|
||||
obj.endtag = function(self,t,s,e)
|
||||
io.write("End : "..t.."\n")
|
||||
end
|
||||
obj.text = function(self,t,s,e)
|
||||
io.write("Text : "..t.."\n")
|
||||
end
|
||||
obj.cdata = function(self,t,s,e)
|
||||
io.write("CDATA : "..t.."\n")
|
||||
end
|
||||
obj.comment = function(self,t,s,e)
|
||||
io.write("Comment : "..t.."\n")
|
||||
end
|
||||
obj.dtd = function(self,t,a,s,e)
|
||||
io.write("DTD : "..t.."\n")
|
||||
if a then
|
||||
for k,v in pairs(a) do
|
||||
io.write(string.format(" + %s='%s'\n",k,v))
|
||||
end
|
||||
end
|
||||
end
|
||||
obj.pi = function(self,t,a,s,e)
|
||||
io.write("PI : "..t.."\n")
|
||||
if a then
|
||||
for k,v in pairs(a) do
|
||||
io. write(string.format(" + %s='%s'\n",k,v))
|
||||
end
|
||||
end
|
||||
end
|
||||
obj.decl = function(self,t,a,s,e)
|
||||
io.write("XML Decl : "..t.."\n")
|
||||
if a then
|
||||
for k,v in pairs(a) do
|
||||
io.write(string.format(" + %s='%s'\n",k,v))
|
||||
end
|
||||
end
|
||||
end
|
||||
return obj
|
||||
end
|
||||
|
||||
|
||||
--Obtém a primeira chave de uma tabela
|
||||
--@param Tabela de onde deverá ser obtido o primeiro elemento
|
||||
--@return Retorna a primeira chave da tabela
|
||||
local function getFirstKey(tb)
|
||||
if type(tb) == "table" then
|
||||
--O uso da função next não funciona para pegar o primeiro elemento. Trava aqui
|
||||
--k, v = next(tb)
|
||||
--return k
|
||||
for k, v in pairs(tb) do
|
||||
return k
|
||||
end
|
||||
return nil
|
||||
else
|
||||
return tb
|
||||
end
|
||||
end
|
||||
|
||||
---Handler to generate a lua table from a XML content string
|
||||
local function simpleTreeHandler()
|
||||
local obj = {}
|
||||
|
||||
obj.root = {}
|
||||
obj.stack = {obj.root;n=1}
|
||||
obj.options = {noreduce = {}}
|
||||
|
||||
obj.reduce = function(self,node,key,parent)
|
||||
-- Recursively remove redundant vectors for nodes
|
||||
-- with single child elements
|
||||
for k,v in pairs(node) do
|
||||
if type(v) == 'table' then
|
||||
self:reduce(v,k,node)
|
||||
end
|
||||
end
|
||||
if #node == 1 and not self.options.noreduce[key] and
|
||||
node._attr == nil then
|
||||
parent[key] = node[1]
|
||||
else
|
||||
node.n = nil
|
||||
end
|
||||
end
|
||||
|
||||
--@param t Table that represents a XML tag
|
||||
--@param a Attributes table (_attr)
|
||||
obj.starttag = function(self,t,a)
|
||||
local node = {}
|
||||
if self.parseAttributes == true then
|
||||
node._attr=a
|
||||
end
|
||||
|
||||
local current = self.stack[#self.stack]
|
||||
if current[t] then
|
||||
table.insert(current[t],node)
|
||||
else
|
||||
current[t] = {node;n=1}
|
||||
end
|
||||
table.insert(self.stack,node)
|
||||
end
|
||||
|
||||
--@param t Tag name
|
||||
obj.endtag = function(self,t,s)
|
||||
--Tabela que representa a tag atualmente sendo processada
|
||||
local current = self.stack[#self.stack]
|
||||
--Tabela que representa a tag na qual a tag
|
||||
--atual está contida.
|
||||
local prev = self.stack[#self.stack-1]
|
||||
if not prev[t] then
|
||||
error("XML Error - Unmatched Tag ["..s..":"..t.."]\n")
|
||||
end
|
||||
if prev == self.root then
|
||||
-- Once parsing complete recursively reduce tree
|
||||
self:reduce(prev,nil,nil)
|
||||
end
|
||||
|
||||
local firstKey = getFirstKey(current)
|
||||
--Se a primeira chave da tabela que representa
|
||||
--a tag atual não possui nenhum elemento,
|
||||
--é porque não há nenhum valor associado à tag
|
||||
-- (como nos casos de tags automaticamente fechadas como <senha />).
|
||||
--Assim, atribui uma string vazia a mesma para
|
||||
--que seja retornado vazio no lugar da tag e não
|
||||
--uma tabela. Retornando uma string vazia
|
||||
--simplifica para as aplicações NCLua
|
||||
--para imprimir tal valor.
|
||||
if firstKey == nil then
|
||||
current[t] = ""
|
||||
prev[t] = ""
|
||||
end
|
||||
|
||||
table.remove(self.stack)
|
||||
end
|
||||
|
||||
obj.text = function(self,t)
|
||||
local current = self.stack[#self.stack]
|
||||
table.insert(current,t)
|
||||
end
|
||||
|
||||
obj.cdata = obj.text
|
||||
|
||||
return obj
|
||||
end
|
||||
|
||||
--- domHandler
|
||||
local function domHandler()
|
||||
local obj = {}
|
||||
obj.options = {commentNode=1,piNode=1,dtdNode=1,declNode=1}
|
||||
obj.root = { _children = {n=0}, _type = "ROOT" }
|
||||
obj.current = obj.root
|
||||
obj.starttag = function(self,t,a)
|
||||
local node = { _type = 'ELEMENT',
|
||||
_name = t,
|
||||
_attr = a,
|
||||
_parent = self.current,
|
||||
_children = {n=0} }
|
||||
table.insert(self.current._children,node)
|
||||
self.current = node
|
||||
end
|
||||
obj.endtag = function(self,t,s)
|
||||
if t ~= self.current._name then
|
||||
error("XML Error - Unmatched Tag ["..s..":"..t.."]\n")
|
||||
end
|
||||
self.current = self.current._parent
|
||||
end
|
||||
obj.text = function(self,t)
|
||||
local node = { _type = "TEXT",
|
||||
_parent = self.current,
|
||||
_text = t }
|
||||
table.insert(self.current._children,node)
|
||||
end
|
||||
obj.comment = function(self,t)
|
||||
if self.options.commentNode then
|
||||
local node = { _type = "COMMENT",
|
||||
_parent = self.current,
|
||||
_text = t }
|
||||
table.insert(self.current._children,node)
|
||||
end
|
||||
end
|
||||
obj.pi = function(self,t,a)
|
||||
if self.options.piNode then
|
||||
local node = { _type = "PI",
|
||||
_name = t,
|
||||
_attr = a,
|
||||
_parent = self.current }
|
||||
table.insert(self.current._children,node)
|
||||
end
|
||||
end
|
||||
obj.decl = function(self,t,a)
|
||||
if self.options.declNode then
|
||||
local node = { _type = "DECL",
|
||||
_name = t,
|
||||
_attr = a,
|
||||
_parent = self.current }
|
||||
table.insert(self.current._children,node)
|
||||
end
|
||||
end
|
||||
obj.dtd = function(self,t,a)
|
||||
if self.options.dtdNode then
|
||||
local node = { _type = "DTD",
|
||||
_name = t,
|
||||
_attr = a,
|
||||
_parent = self.current }
|
||||
table.insert(self.current._children,node)
|
||||
end
|
||||
end
|
||||
obj.cdata = obj.text
|
||||
return obj
|
||||
end
|
||||
|
||||
return { simpleTreeHandler = simpleTreeHandler }
|
@ -0,0 +1,483 @@
|
||||
---
|
||||
-- Overview:
|
||||
-- =========
|
||||
--
|
||||
-- This module provides a non-validating XML stream parser in Lua.
|
||||
--
|
||||
-- Features:
|
||||
-- =========
|
||||
--
|
||||
-- * Tokenises well-formed XML (relatively robustly)
|
||||
-- * Flexible handler based event API (see below)
|
||||
-- * Parses all XML Infoset elements - ie.
|
||||
-- - Tags
|
||||
-- - Text
|
||||
-- - Comments
|
||||
-- - CDATA
|
||||
-- - XML Decl
|
||||
-- - Processing Instructions
|
||||
-- - DOCTYPE declarations
|
||||
-- * Provides limited well-formedness checking
|
||||
-- (checks for basic syntax & balanced tags only)
|
||||
-- * Flexible whitespace handling (selectable)
|
||||
-- * Entity Handling (selectable)
|
||||
--
|
||||
-- Limitations:
|
||||
-- ============
|
||||
--
|
||||
-- * Non-validating
|
||||
-- * No charset handling
|
||||
-- * No namespace support
|
||||
-- * Shallow well-formedness checking only (fails
|
||||
-- to detect most semantic errors)
|
||||
--
|
||||
-- API:
|
||||
-- ====
|
||||
--
|
||||
-- The parser provides a partially object-oriented API with
|
||||
-- functionality split into tokeniser and handler components.
|
||||
--
|
||||
-- The handler instance is passed to the tokeniser and receives
|
||||
-- callbacks for each XML element processed (if a suitable handler
|
||||
-- function is defined). The API is conceptually similar to the
|
||||
-- SAX API but implemented differently.
|
||||
--
|
||||
-- The following events are generated by the tokeniser
|
||||
--
|
||||
-- handler:start - Start Tag
|
||||
-- handler:end - End Tag
|
||||
-- handler:text - Text
|
||||
-- handler:decl - XML Declaration
|
||||
-- handler:pi - Processing Instruction
|
||||
-- handler:comment - Comment
|
||||
-- handler:dtd - DOCTYPE definition
|
||||
-- handler:cdata - CDATA
|
||||
--
|
||||
-- The function prototype for all the callback functions is
|
||||
--
|
||||
-- callback(val,attrs,start,end)
|
||||
--
|
||||
-- where attrs is a table and val/attrs are overloaded for
|
||||
-- specific callbacks - ie.
|
||||
--
|
||||
-- Callback val attrs (table)
|
||||
-- -------- --- -------------
|
||||
-- start name { attributes (name=val).. }
|
||||
-- end name nil
|
||||
-- text <text> nil
|
||||
-- cdata <text> nil
|
||||
-- decl "xml" { attributes (name=val).. }
|
||||
-- pi pi name { attributes (if present)..
|
||||
-- _text = <PI Text>
|
||||
-- }
|
||||
-- comment <text> nil
|
||||
-- dtd root element { _root = <Root Element>,
|
||||
-- _type = SYSTEM|PUBLIC,
|
||||
-- _name = <name>,
|
||||
-- _uri = <uri>,
|
||||
-- _internal = <internal dtd>
|
||||
-- }
|
||||
--
|
||||
-- (start & end provide the character positions of the start/end
|
||||
-- of the element)
|
||||
--
|
||||
-- XML data is passed to the parser instance through the 'parse'
|
||||
-- method (Note: must be passed a single string currently)
|
||||
--
|
||||
-- Options
|
||||
-- =======
|
||||
--
|
||||
-- Parser options are controlled through the 'self.options' table.
|
||||
-- Available options are -
|
||||
--
|
||||
-- * stripWS
|
||||
--
|
||||
-- Strip non-significant whitespace (leading/trailing)
|
||||
-- and do not generate events for empty text elements
|
||||
--
|
||||
-- * expandEntities
|
||||
--
|
||||
-- Expand entities (standard entities + single char
|
||||
-- numeric entities only currently - could be extended
|
||||
-- at runtime if suitable DTD parser added elements
|
||||
-- to table (see obj._ENTITIES). May also be possible
|
||||
-- to expand multibyre entities for UTF-8 only
|
||||
--
|
||||
-- * errorHandler
|
||||
--
|
||||
-- Custom error handler function
|
||||
--
|
||||
-- NOTE: Boolean options must be set to 'nil' not '0'
|
||||
--
|
||||
-- Usage
|
||||
-- =====
|
||||
--
|
||||
-- Create a handler instance -
|
||||
--
|
||||
-- h = { start = function(t,a,s,e) .... end,
|
||||
-- end = function(t,a,s,e) .... end,
|
||||
-- text = function(t,a,s,e) .... end,
|
||||
-- cdata = text }
|
||||
--
|
||||
-- (or use predefined handler - see handler.lua)
|
||||
--
|
||||
-- Create parser instance -
|
||||
--
|
||||
-- p = xmlParser(h)
|
||||
--
|
||||
-- Set options -
|
||||
--
|
||||
-- p.options.xxxx = nil
|
||||
--
|
||||
-- Parse XML data -
|
||||
--
|
||||
-- xmlParser:parse("<?xml... ")
|
||||
-- License:
|
||||
-- ========
|
||||
--
|
||||
-- This code is freely distributable under the terms of the Lua license
|
||||
-- (http://www.lua.org/copyright.html)
|
||||
--
|
||||
-- History
|
||||
-- =======
|
||||
-- Added parameter parseAttributes (boolean) in xmlParser.parse method
|
||||
-- If true (default value), tag attributtes are parsed.
|
||||
-- by Manoel Campos da Silva Filho
|
||||
-- http://manoelcampos.com
|
||||
-- http://about.me/manoelcampos
|
||||
|
||||
--
|
||||
-- $Id: xml.lua,v 1.1.1.1 2001/11/28 06:11:33 paulc Exp $
|
||||
--
|
||||
-- $Log: xml.lua,v $
|
||||
-- Revision 1.1.1.1 2001/11/28 06:11:33 paulc
|
||||
-- Initial Import
|
||||
--
|
||||
--@author Paul Chakravarti (paulc@passtheaardvark.com)<p/>
|
||||
|
||||
|
||||
---Parses a XML string
|
||||
--@param handler Handler object to be used to convert the XML string
|
||||
--to another formats. @see handler.lua
|
||||
local xmlParser = function(handler)
|
||||
local obj = {}
|
||||
-- Public attributes
|
||||
|
||||
obj.options = {
|
||||
stripWS = 1,
|
||||
expandEntities = 1,
|
||||
errorHandler = function(err,pos)
|
||||
error(string.format("%s [char=%d]\n",
|
||||
err or "Parse Error",pos))
|
||||
end,
|
||||
}
|
||||
|
||||
-- Public methods
|
||||
|
||||
obj.parse = function(self, str, parseAttributes)
|
||||
if parseAttributes == nil then
|
||||
parseAttributes = true
|
||||
end
|
||||
self._handler.parseAttributes = parseAttributes
|
||||
|
||||
local match,endmatch,pos = 0,0,1
|
||||
local text,endt1,endt2,tagstr,tagname,attrs,starttext,endtext
|
||||
local errstart,errend,extstart,extend
|
||||
while match do
|
||||
-- Get next tag (first pass - fix exceptions below)
|
||||
match,endmatch,text,endt1,tagstr,endt2 = string.find(str,self._XML,pos)
|
||||
if not match then
|
||||
if string.find(str, self._WS,pos) then
|
||||
-- No more text - check document complete
|
||||
if #self._stack ~= 0 then
|
||||
self:_err(self._errstr.incompleteXmlErr,pos)
|
||||
else
|
||||
break
|
||||
end
|
||||
else
|
||||
-- Unparsable text
|
||||
self:_err(self._errstr.xmlErr,pos)
|
||||
end
|
||||
end
|
||||
-- Handle leading text
|
||||
starttext = match
|
||||
endtext = match + string.len(text) - 1
|
||||
match = match + string.len(text)
|
||||
text = self:_parseEntities(self:_stripWS(text))
|
||||
if text ~= "" and self._handler.text then
|
||||
self._handler:text(text,nil,match,endtext)
|
||||
end
|
||||
-- Test for tag type
|
||||
if string.find(string.sub(tagstr,1,5),"?xml%s") then
|
||||
-- XML Declaration
|
||||
match,endmatch,text = string.find(str,self._PI,pos)
|
||||
if not match then
|
||||
self:_err(self._errstr.declErr,pos)
|
||||
end
|
||||
if match ~= 1 then
|
||||
-- Must be at start of doc if present
|
||||
self:_err(self._errstr.declStartErr,pos)
|
||||
end
|
||||
tagname,attrs = self:_parseTag(text)
|
||||
-- TODO: Check attributes are valid
|
||||
-- Check for version (mandatory)
|
||||
if attrs.version == nil then
|
||||
self:_err(self._errstr.declAttrErr,pos)
|
||||
end
|
||||
if self._handler.decl then
|
||||
self._handler:decl(tagname,attrs,match,endmatch)
|
||||
end
|
||||
elseif string.sub(tagstr,1,1) == "?" then
|
||||
-- Processing Instruction
|
||||
match,endmatch,text = string.find(str,self._PI,pos)
|
||||
if not match then
|
||||
self:_err(self._errstr.piErr,pos)
|
||||
end
|
||||
if self._handler.pi then
|
||||
-- Parse PI attributes & text
|
||||
tagname,attrs = self:_parseTag(text)
|
||||
local pi = string.sub(text,string.len(tagname)+1)
|
||||
if pi ~= "" then
|
||||
if attrs then
|
||||
attrs._text = pi
|
||||
else
|
||||
attrs = { _text = pi }
|
||||
end
|
||||
end
|
||||
self._handler:pi(tagname,attrs,match,endmatch)
|
||||
end
|
||||
elseif string.sub(tagstr,1,3) == "!--" then
|
||||
-- Comment
|
||||
match,endmatch,text = string.find(str,self._COMMENT,pos)
|
||||
if not match then
|
||||
self:_err(self._errstr.commentErr,pos)
|
||||
end
|
||||
if self._handler.comment then
|
||||
text = self:_parseEntities(self:_stripWS(text))
|
||||
self._handler:comment(text,next,match,endmatch)
|
||||
end
|
||||
elseif string.sub(tagstr,1,8) == "!DOCTYPE" then
|
||||
-- DTD
|
||||
match,endmatch,attrs = self:_parseDTD(string,pos)
|
||||
if not match then
|
||||
self:_err(self._errstr.dtdErr,pos)
|
||||
end
|
||||
if self._handler.dtd then
|
||||
self._handler:dtd(attrs._root,attrs,match,endmatch)
|
||||
end
|
||||
elseif string.sub(tagstr,1,8) == "![CDATA[" then
|
||||
-- CDATA
|
||||
match,endmatch,text = string.find(str,self._CDATA,pos)
|
||||
if not match then
|
||||
self:_err(self._errstr.cdataErr,pos)
|
||||
end
|
||||
if self._handler.cdata then
|
||||
self._handler:cdata(text,nil,match,endmatch)
|
||||
end
|
||||
else
|
||||
-- Normal tag
|
||||
|
||||
-- Need check for embedded '>' in attribute value and extend
|
||||
-- match recursively if necessary eg. <tag attr="123>456">
|
||||
|
||||
while 1 do
|
||||
errstart,errend = string.find(tagstr,self._ATTRERR1)
|
||||
if errend == nil then
|
||||
errstart,errend = string.find(tagstr,self._ATTRERR2)
|
||||
if errend == nil then
|
||||
break
|
||||
end
|
||||
end
|
||||
extstart,extend,endt2 = string.find(str,self._TAGEXT,endmatch+1)
|
||||
tagstr = tagstr .. string.sub(string,endmatch,extend-1)
|
||||
if not match then
|
||||
self:_err(self._errstr.xmlErr,pos)
|
||||
end
|
||||
endmatch = extend
|
||||
end
|
||||
|
||||
-- Extract tagname/attrs
|
||||
|
||||
tagname,attrs = self:_parseTag(tagstr)
|
||||
|
||||
if (endt1=="/") then
|
||||
-- End tag
|
||||
if self._handler.endtag then
|
||||
if attrs then
|
||||
-- Shouldnt have any attributes in endtag
|
||||
self:_err(string.format("%s (/%s)",
|
||||
self._errstr.endTagErr,
|
||||
tagname)
|
||||
,pos)
|
||||
end
|
||||
if table.remove(self._stack) ~= tagname then
|
||||
self:_err(string.format("%s (/%s)",
|
||||
self._errstr.unmatchedTagErr,
|
||||
tagname)
|
||||
,pos)
|
||||
end
|
||||
self._handler:endtag(tagname,nil,match,endmatch)
|
||||
end
|
||||
else
|
||||
-- Start Tag
|
||||
table.insert(self._stack,tagname)
|
||||
if self._handler.starttag then
|
||||
self._handler:starttag(tagname,attrs,match,endmatch)
|
||||
end
|
||||
--TODO: Tags com fechamento automático estão sendo
|
||||
--retornadas como uma tabela, o que complica
|
||||
--para a app NCLua tratar isso. É preciso
|
||||
--fazer com que seja retornado um campo string vazio.
|
||||
-- Self-Closing Tag
|
||||
if (endt2=="/") then
|
||||
table.remove(self._stack)
|
||||
if self._handler.endtag then
|
||||
self._handler:endtag(tagname,nil,match,endmatch)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
pos = endmatch + 1
|
||||
end
|
||||
end
|
||||
|
||||
-- Private attribures/functions
|
||||
|
||||
obj._handler = handler
|
||||
obj._stack = {}
|
||||
|
||||
obj._XML = '^([^<]*)<(%/?)([^>]-)(%/?)>'
|
||||
obj._ATTR1 = '([%w-:_]+)%s*=%s*"(.-)"'
|
||||
obj._ATTR2 = '([%w-:_]+)%s*=%s*\'(.-)\''
|
||||
obj._CDATA = '<%!%[CDATA%[(.-)%]%]>'
|
||||
obj._PI = '<%?(.-)%?>'
|
||||
obj._COMMENT = '<!%-%-(.-)%-%->'
|
||||
obj._TAG = '^(.-)%s.*'
|
||||
obj._LEADINGWS = '^%s+'
|
||||
obj._TRAILINGWS = '%s+$'
|
||||
obj._WS = '^%s*$'
|
||||
obj._DTD1 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*(%b[])%s*>'
|
||||
obj._DTD2 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*(%b[])%s*>'
|
||||
obj._DTD3 = '<!DOCTYPE%s+(.-)%s*(%b[])%s*>'
|
||||
obj._DTD4 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*>'
|
||||
obj._DTD5 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*>'
|
||||
|
||||
obj._ATTRERR1 = '=%s*"[^"]*$'
|
||||
obj._ATTRERR2 = '=%s*\'[^\']*$'
|
||||
obj._TAGEXT = '(%/?)>'
|
||||
|
||||
obj._ENTITIES = { ["<"] = "<",
|
||||
[">"] = ">",
|
||||
["&"] = "&",
|
||||
["""] = '"',
|
||||
["'"] = "'",
|
||||
["&#(%d+);"] = function (x)
|
||||
local d = tonumber(x)
|
||||
if d >= 0 and d < 256 then
|
||||
return string.char(d)
|
||||
else
|
||||
return "&#"..d..";"
|
||||
end
|
||||
end,
|
||||
["&#x(%x+);"] = function (x)
|
||||
local d = tonumber(x,16)
|
||||
if d >= 0 and d < 256 then
|
||||
return string.char(d)
|
||||
else
|
||||
return "&#x"..x..";"
|
||||
end
|
||||
end,
|
||||
}
|
||||
|
||||
obj._err = function(self,err,pos)
|
||||
if self.options.errorHandler then
|
||||
self.options.errorHandler(err,pos)
|
||||
end
|
||||
end
|
||||
|
||||
obj._errstr = { xmlErr = "Error Parsing XML",
|
||||
declErr = "Error Parsing XMLDecl",
|
||||
declStartErr = "XMLDecl not at start of document",
|
||||
declAttrErr = "Invalid XMLDecl attributes",
|
||||
piErr = "Error Parsing Processing Instruction",
|
||||
commentErr = "Error Parsing Comment",
|
||||
cdataErr = "Error Parsing CDATA",
|
||||
dtdErr = "Error Parsing DTD",
|
||||
endTagErr = "End Tag Attributes Invalid",
|
||||
unmatchedTagErr = "Unbalanced Tag",
|
||||
incompleteXmlErr = "Incomplete XML Document",
|
||||
}
|
||||
|
||||
obj._stripWS = function(self,s)
|
||||
if self.options.stripWS then
|
||||
s = string.gsub(s,'^%s+','')
|
||||
s = string.gsub(s,'%s+$','')
|
||||
end
|
||||
return s
|
||||
end
|
||||
|
||||
obj._parseEntities = function(self,s)
|
||||
if self.options.expandEntities then
|
||||
--for k,v in self._ENTITIES do
|
||||
for k,v in pairs(self._ENTITIES) do
|
||||
--print (k, v)
|
||||
s = string.gsub(s,k,v)
|
||||
end
|
||||
end
|
||||
return s
|
||||
end
|
||||
|
||||
obj._parseDTD = function(self,s,pos)
|
||||
-- match,endmatch,root,type,name,uri,internal
|
||||
local m,e,r,t,n,u,i
|
||||
m,e,r,t,u,i = string.find(s,self._DTD1,pos)
|
||||
if m then
|
||||
return m,e,{_root=r,_type=t,_uri=u,_internal=i}
|
||||
end
|
||||
m,e,r,t,n,u,i = string.find(s,self._DTD2,pos)
|
||||
if m then
|
||||
return m,e,{_root=r,_type=t,_name=n,_uri=u,_internal=i}
|
||||
end
|
||||
m,e,r,i = string.find(s,self._DTD3,pos)
|
||||
if m then
|
||||
return m,e,{_root=r,_internal=i}
|
||||
end
|
||||
m,e,r,t,u = string.find(s,self._DTD4,pos)
|
||||
if m then
|
||||
return m,e,{_root=r,_type=t,_uri=u}
|
||||
end
|
||||
m,e,r,t,n,u = string.find(s,self._DTD5,pos)
|
||||
if m then
|
||||
return m,e,{_root=r,_type=t,_name=n,_uri=u}
|
||||
end
|
||||
return nil
|
||||
end
|
||||
|
||||
---Parses a string representing a tag
|
||||
--@param s String containing tag text
|
||||
--@return Returns a string containing the tagname and a table attrs
|
||||
--containing the atributtes of tag
|
||||
obj._parseTag = function(self,s)
|
||||
local attrs = {}
|
||||
local tagname = string.gsub(s,self._TAG,'%1')
|
||||
string.gsub(s,self._ATTR1,function (k,v)
|
||||
attrs[string.lower(k)]=self:_parseEntities(v)
|
||||
attrs._ = 1
|
||||
end)
|
||||
string.gsub(s,self._ATTR2,function (k,v)
|
||||
attrs[string.lower(k)]=self:_parseEntities(v)
|
||||
attrs._ = 1
|
||||
end)
|
||||
if attrs._ then
|
||||
attrs._ = nil
|
||||
else
|
||||
attrs = nil
|
||||
end
|
||||
return tagname,attrs
|
||||
end
|
||||
|
||||
return obj
|
||||
|
||||
end
|
||||
|
||||
return { xmlParser = xmlParser }
|
@ -0,0 +1,180 @@
|
||||
local WidgetContainer = require("ui/widget/container/widgetcontainer")
|
||||
local InfoMessage = require("ui/widget/infomessage")
|
||||
local UIManager = require("ui/uimanager")
|
||||
local DataStorage = require("datastorage")
|
||||
local FFIUtil = require("ffi/util")
|
||||
local util = require("util")
|
||||
local T = FFIUtil.template
|
||||
local _ = require("gettext")
|
||||
local logger = require("logger")
|
||||
local ffi = require("ffi")
|
||||
local http = require("socket.http")
|
||||
local ltn12 = require("ltn12")
|
||||
|
||||
|
||||
local NewsDownloader = WidgetContainer:new{}
|
||||
|
||||
local initialized = false -- for only once lazy initialization
|
||||
local FEED_CONFIG_FILE = "feed_config.lua"
|
||||
local FILE_EXTENSION = ".html"
|
||||
local NEWS_DL_DIR_NAME = "news"
|
||||
local NEWS_DL_DIR, FEED_CONFIG_PATH
|
||||
|
||||
local function deserializeXMLString(xml_str)
|
||||
-- uses LuaXML https://github.com/manoelcampos/LuaXML
|
||||
-- The MIT License (MIT)
|
||||
-- Copyright (c) 2016 Manoel Campos da Silva Filho
|
||||
local treehdl = require("lib/handler")
|
||||
local libxml = require("lib/xml")
|
||||
|
||||
--Instantiate the object the states the XML file as a Lua table
|
||||
local xmlhandler = treehdl.simpleTreeHandler()
|
||||
--Instantiate the object that parses the XML to a Lua table
|
||||
local ok = pcall(function()
|
||||
libxml.xmlParser(xmlhandler):parse(xml_str)
|
||||
end)
|
||||
if not ok then return end
|
||||
return xmlhandler.root
|
||||
end
|
||||
|
||||
function NewsDownloader:init()
|
||||
self.ui.menu:registerToMainMenu(self)
|
||||
end
|
||||
|
||||
function NewsDownloader:addToMainMenu(menu_items)
|
||||
if not initialized then
|
||||
NEWS_DL_DIR = ("%s/%s/"):format(DataStorage:getDataDir(), NEWS_DL_DIR_NAME)
|
||||
if not lfs.attributes(NEWS_DL_DIR, "mode") then
|
||||
lfs.mkdir(NEWS_DL_DIR)
|
||||
end
|
||||
|
||||
FEED_CONFIG_PATH = NEWS_DL_DIR .. FEED_CONFIG_FILE
|
||||
initialized = true
|
||||
end
|
||||
|
||||
menu_items.rss_news_downloader = {
|
||||
text = _("News (RSS/Atom) downloader"),
|
||||
sub_item_table = {
|
||||
{
|
||||
text = _("Download news"),
|
||||
callback = function() self:loadConfigAndProcessFeeds() end,
|
||||
},
|
||||
{
|
||||
text = _("Go to news folder"),
|
||||
callback = function()
|
||||
local FileManager = require("apps/filemanager/filemanager")
|
||||
if FileManager.instance then
|
||||
FileManager.instance:reinit(NEWS_DL_DIR)
|
||||
else
|
||||
FileManager:showFiles(NEWS_DL_DIR)
|
||||
end
|
||||
end,
|
||||
},
|
||||
{
|
||||
text = _("Remove news"),
|
||||
callback = function()
|
||||
-- puerge all downloaded news files, but keep the feed config
|
||||
for entry in lfs.dir(NEWS_DL_DIR) do
|
||||
if entry ~= "." and entry ~= ".." and entry ~= FEED_CONFIG_FILE then
|
||||
local entry_path = NEWS_DL_DIR .. "/" .. entry
|
||||
local entry_mode = lfs.attributes(entry_path, "mode")
|
||||
if entry_mode == "file" then
|
||||
ffi.C.remove(entry_path)
|
||||
elseif entry_mode == "directory" then
|
||||
FFIUtil.purgeDir(entry_path)
|
||||
end
|
||||
end
|
||||
end
|
||||
UIManager:show(InfoMessage:new{
|
||||
text = _("All news removed.")
|
||||
})
|
||||
end,
|
||||
},
|
||||
{
|
||||
text = _("Help"),
|
||||
callback = function()
|
||||
UIManager:show(InfoMessage:new{
|
||||
text = T(_("News downloader can be configured in the feeds config file:\n%1\n\nIt downloads news items to:\n%2.\n\nTo set you own news sources edit foregoing feeds config file. Items download limit can be set there."),
|
||||
FEED_CONFIG_PATH,
|
||||
NEWS_DL_DIR)
|
||||
})
|
||||
end,
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
function NewsDownloader:loadConfigAndProcessFeeds()
|
||||
local info = InfoMessage:new{ text = _("Loading news feed config…") }
|
||||
UIManager:show(info)
|
||||
-- force repaint due to upcoming blocking calls
|
||||
UIManager:forceRePaint()
|
||||
UIManager:close(info)
|
||||
|
||||
if not lfs.attributes(FEED_CONFIG_PATH, "mode") then
|
||||
logger.dbg("NewsDownloader: Creating initial feed config.")
|
||||
FFIUtil.copyFile(FFIUtil.joinPath(self.path, FEED_CONFIG_FILE),
|
||||
FEED_CONFIG_PATH)
|
||||
end
|
||||
local ok, feed_config = pcall(dofile, FEED_CONFIG_PATH)
|
||||
if not ok or not feed_config then
|
||||
logger.info("NewsDownloader: Feed config not found.")
|
||||
return
|
||||
end
|
||||
|
||||
if #feed_config <= 0 then
|
||||
logger.info('NewsDownloader: empty feed list.', FEED_CONFIG_PATH)
|
||||
return
|
||||
end
|
||||
|
||||
for idx, feed in ipairs(feed_config) do
|
||||
local url = feed[1]
|
||||
local limit = feed.limit
|
||||
if url and limit then
|
||||
info = InfoMessage:new{ text = T(_("Processing: %1"), url) }
|
||||
UIManager:show(info)
|
||||
-- processFeedSource is a blocking call, so manually force a UI refresh beforehand
|
||||
UIManager:forceRePaint()
|
||||
self:processFeedSource(url, tonumber(limit))
|
||||
UIManager:close(info)
|
||||
else
|
||||
logger.warn('NewsDownloader: invalid feed config entry', feed)
|
||||
end
|
||||
end
|
||||
|
||||
UIManager:show(InfoMessage:new{
|
||||
text = _("Downloading news finished."),
|
||||
timeout = 1,
|
||||
})
|
||||
end
|
||||
|
||||
function NewsDownloader:processFeedSource(url, limit)
|
||||
local resp_lines = {}
|
||||
http.request({ url = url, sink = ltn12.sink.table(resp_lines), })
|
||||
local feeds = deserializeXMLString(table.concat(resp_lines))
|
||||
if not feeds then return end
|
||||
if not feeds.rss or not feeds.rss.channel
|
||||
or not feeds.rss.channel.title or not feeds.rss.channel.item then
|
||||
logger.info('NewsDownloader: Got invalid feeds', feeds)
|
||||
return
|
||||
end
|
||||
|
||||
local feed_output_dir = ("%s%s/"):format(
|
||||
NEWS_DL_DIR, util.replaceInvalidChars(feeds.rss.channel.title))
|
||||
if not lfs.attributes(feed_output_dir, "mode") then
|
||||
lfs.mkdir(feed_output_dir)
|
||||
end
|
||||
|
||||
for index, feed in pairs(feeds.rss.channel.item) do
|
||||
if index -1 == limit then
|
||||
break
|
||||
end
|
||||
local news_dl_path = ("%s%s%s"):format(feed_output_dir,
|
||||
util.replaceInvalidChars(feed.title),
|
||||
FILE_EXTENSION)
|
||||
logger.dbg("NewsDownloader: News file will be stored to :", news_dl_path)
|
||||
http.request({ url = url, sink = ltn12.sink.file(io.open(news_dl_path, 'w')), })
|
||||
end
|
||||
end
|
||||
|
||||
return NewsDownloader
|
Loading…
Reference in New Issue