diff --git a/frontend/luxl.lua b/frontend/luxl.lua
index 5eebda773..ce7d0142d 100644
--- a/frontend/luxl.lua
+++ b/frontend/luxl.lua
@@ -8,6 +8,7 @@
Pure Lua Version written by: William A Adams
Dramatic Speed Improvements by: Robert G Jakabosky
+ https://github.com/Wiladams/LAPHLibs/blob/master/laphlibs/luxl.lua
References
@@ -20,10 +21,15 @@ local ffi = require "ffi"
local bit = require "bit"
local band = bit.band
+
--[[
- Types of characters; 0 is not valid, 1 is letters, 2 are digits
- (including '.') and 3 whitespace.
+ Types of characters;
+ 0 is not valid
+ 1 is letters,
+ 2 are digits (including '.')
+ 3 whitespace
--]]
+
local char_type = ffi.new("const int[256]", {
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 3, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -43,22 +49,24 @@ local char_type = ffi.new("const int[256]", {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
});
+
+
-- Internal states that the parser can be in at any given time.
-local ST_START = 0; -- starting base state; default state
+local ST_START = 0; -- starting base state; default state
local ST_TEXT =1; -- text state
-local ST_START_TAG = 2; -- start tag state
+local ST_START_TAG = 2; -- start tag state
local ST_START_TAGNAME =3; -- start tagname state
local ST_START_TAGNAME_END =4; -- start tagname ending state
local ST_END_TAG =5; -- end tag state
-local ST_END_TAGNAME=6; -- end tag tagname state
-local ST_END_TAGNAME_END=7; -- end tag tagname ending
-local ST_EMPTY_TAG=8; -- empty tag state
-local ST_SPACE=9; -- linear whitespace state
+local ST_END_TAGNAME=6; -- end tag tagname state
+local ST_END_TAGNAME_END=7; -- end tag tagname ending
+local ST_EMPTY_TAG=8; -- empty tag state
+local ST_SPACE=9; -- linear whitespace state
local ST_ATTR_NAME=10; -- attribute name state
local ST_ATTR_NAME_END=11; -- attribute name ending state
local ST_ATTR_VAL=12; -- attribute value starting state
local ST_ATTR_VAL2=13; -- attribute value state
-local ST_ERROR=14; -- error state
+local ST_ERROR=14; -- error state
-- character classes that we will match against; This could be expanded if
-- need be, however, we are aiming for simple.
@@ -74,14 +82,14 @@ local CCLASS_ANY=8; -- matches any ASCII character; will match all ab
-- Types of events: start element, end element, text, attr name, attr
-- val and start/end document. Other events can be ignored!
-local EVENT_START = 0; -- Start tag
-local EVENT_END = 1; -- End tag
-local EVENT_TEXT = 2; -- Text
-local EVENT_ATTR_NAME = 3; -- Attribute name
-local EVENT_ATTR_VAL = 4; -- Attribute value
-local EVENT_END_DOC = 5; -- End of document
-local EVENT_MARK = 6; -- Internal only; notes position in buffer
-local EVENT_NONE = 7; -- Internal only; should never see this event
+local EVENT_START = 0; -- Start tag
+local EVENT_END = 1; -- End tag
+local EVENT_TEXT = 2; -- Text
+local EVENT_ATTR_NAME = 3; -- Attribute name
+local EVENT_ATTR_VAL = 4; -- Attribute value
+local EVENT_END_DOC = 5; -- End of document
+local EVENT_MARK = 6; -- Internal only; notes position in buffer
+local EVENT_NONE = 7; -- Internal only; should never see this event
local entity_refs = {
["<"] = '<',
@@ -133,7 +141,7 @@ local LEXER_STATES = {
-- [6-8] handle start tag
{ state = ST_START_TAG, cclass = CCLASS_LETTERS, next_state = ST_START_TAGNAME, event = EVENT_MARK },
{ state = ST_START_TAG, cclass = CCLASS_SLASH, next_state = ST_END_TAG, event = EVENT_MARK },
- { state = ST_START_TAG, cclass = CCLASS_SPACE, next_state = ST_START_TAG, event = EVENT_NONE }, -- < tag >
+ { state = ST_START_TAG, cclass = CCLASS_SPACE, next_state = ST_START_TAG, event = EVENT_NONE }, -- < tag >
-- [9-12] handle start tag name
{ state = ST_START_TAGNAME, cclass = CCLASS_LETTERS, next_state = ST_START_TAGNAME, event = EVENT_NONE },
@@ -145,10 +153,10 @@ local LEXER_STATES = {
{ state = ST_START_TAGNAME_END, cclass = CCLASS_LETTERS, next_state = ST_ATTR_NAME, event = EVENT_MARK },
{ state = ST_START_TAGNAME_END, cclass = CCLASS_SPACE, next_state = ST_START_TAGNAME_END, event = EVENT_NONE },
{ state = ST_START_TAGNAME_END, cclass = CCLASS_RIGHT_ANGLE, next_state = ST_START, event = EVENT_START },
- { state = ST_START_TAGNAME_END, cclass = CCLASS_SLASH, next_state = ST_EMPTY_TAG, event = EVENT_MARK }, -- Empty tag
+ { state = ST_START_TAGNAME_END, cclass = CCLASS_SLASH, next_state = ST_EMPTY_TAG, event = EVENT_MARK }, -- Empty tag
-- [17] handle empty tags, e.g.,
- { state = ST_EMPTY_TAG, cclass = CCLASS_RIGHT_ANGLE, next_state = ST_START, event = EVENT_END }, -- Empty tag
+ { state = ST_EMPTY_TAG, cclass = CCLASS_RIGHT_ANGLE, next_state = ST_START, event = EVENT_END }, -- Empty tag
-- [18] handle end tag, e.g.,
{ state = ST_END_TAG, cclass = CCLASS_LETTERS, next_state = ST_END_TAGNAME, event = EVENT_NONE },
@@ -156,7 +164,7 @@ local LEXER_STATES = {
-- [19-21] handle end tag name
{ state = ST_END_TAGNAME, cclass = CCLASS_LETTERS, next_state = ST_END_TAGNAME, event = EVENT_NONE },
{ state = ST_END_TAGNAME, cclass = CCLASS_RIGHT_ANGLE, next_state = ST_START, event = EVENT_END },
- { state = ST_END_TAGNAME, cclass = CCLASS_SPACE, next_state = ST_END_TAGNAME_END, event = EVENT_END }, -- space after end tag name
+ { state = ST_END_TAGNAME, cclass = CCLASS_SPACE, next_state = ST_END_TAGNAME_END, event = EVENT_END }, -- space after end tag name
-- [22-23] handle ending of end tag name
{ state = ST_END_TAGNAME_END, cclass = CCLASS_SPACE, next_state = ST_END_TAGNAME_END, event = EVENT_NONE },
@@ -169,8 +177,8 @@ local LEXER_STATES = {
-- [27-29] handle attribute names
{ state = ST_ATTR_NAME, cclass = CCLASS_LETTERS, next_state = ST_ATTR_NAME, event = EVENT_MARK },
- { state = ST_ATTR_NAME, cclass = CCLASS_SPACE, next_state = ST_ATTR_NAME_END, event = EVENT_ATTR_NAME }, -- space before '=' sign
- { state = ST_ATTR_NAME, cclass = CCLASS_EQUALS, next_state = ST_ATTR_VAL, event = EVENT_ATTR_NAME }, --
+ { state = ST_ATTR_NAME, cclass = CCLASS_SPACE, next_state = ST_ATTR_NAME_END, event = EVENT_ATTR_NAME }, -- space before '=' sign
+ { state = ST_ATTR_NAME, cclass = CCLASS_EQUALS, next_state = ST_ATTR_VAL, event = EVENT_ATTR_NAME }, --
-- [30-32] attribute name end
{ state = ST_ATTR_NAME_END, cclass = CCLASS_SPACE, next_state = ST_ATTR_NAME_END, event = EVENT_NONE },
@@ -198,6 +206,7 @@ struct parse_state {
int i;
int ix; /* index into buffer */
};
+
]]
local cclass_match = {
@@ -320,28 +329,28 @@ fsm_code = nil
local luxl = {
- EVENT_START = EVENT_START; -- Start tag
- EVENT_END = EVENT_END; -- End tag
- EVENT_TEXT = EVENT_TEXT; -- Text
+ EVENT_START = EVENT_START; -- Start tag
+ EVENT_END = EVENT_END; -- End tag
+ EVENT_TEXT = EVENT_TEXT; -- Text
EVENT_ATTR_NAME = EVENT_ATTR_NAME; -- Attribute name
- EVENT_ATTR_VAL = EVENT_ATTR_VAL; -- Attribute value
- EVENT_END_DOC = EVENT_END_DOC; -- End of document
- EVENT_MARK = EVENT_MARK; -- Internal only; notes position in buffer
- EVENT_NONE = EVENT_NONE; -- Internal only; should never see this event
+ EVENT_ATTR_VAL = EVENT_ATTR_VAL; -- Attribute value
+ EVENT_END_DOC = EVENT_END_DOC; -- End of document
+ EVENT_MARK = EVENT_MARK; -- Internal only; notes position in buffer
+ EVENT_NONE = EVENT_NONE; -- Internal only; should never see this event
}
local luxl_mt = { __index = luxl }
function luxl.new(buffer, bufflen)
local newone = {
- buf = ffi.cast("const uint8_t *", buffer); -- pointer to "uint8_t *" buffer (0 based)
- bufsz = bufflen; -- size of input buffer
- state = ST_START; -- current state
- event = EVENT_NONE; -- current event
- err = 0; -- number of errors thus far
- markix = 0; -- offset of current item of interest
- marksz = 0; -- size of current item of interest
- MsgHandler = nil; -- Routine to handle messages
- ErrHandler = nil; -- Routine to call when there's an error
+ buf = ffi.cast("const uint8_t *", buffer); -- pointer to "uint8_t *" buffer (0 based)
+ bufsz = bufflen; -- size of input buffer
+ state = ST_START; -- current state
+ event = EVENT_NONE; -- current event
+ err = 0; -- number of errors thus far
+ markix = 0; -- offset of current item of interest
+ marksz = 0; -- size of current item of interest
+ MsgHandler = nil; -- Routine to handle messages
+ ErrHandler = nil; -- Routine to call when there's an error
EventHandler = nil;
ps = ffi.new('struct parse_state', {
buf = buffer,
@@ -357,13 +366,13 @@ function luxl.new(buffer, bufflen)
end
function luxl:Reset(buffer, bufflen)
- self.buf = buffer -- pointer to "uint8_t *" buffer (0 based)
- self.bufsz = bufflen -- size of input buffer
- self.state = ST_START -- current state
- self.event = EVENT_NONE -- current event
- self.err = 0 -- number of errors thus far
- self.markix = 0 -- offset of current item of interest
- self.marksz = 0 -- size of current item of interest
+ self.buf = buffer -- pointer to "uint8_t *" buffer (0 based)
+ self.bufsz = bufflen -- size of input buffer
+ self.state = ST_START -- current state
+ self.event = EVENT_NONE -- current event
+ self.err = 0 -- number of errors thus far
+ self.markix = 0 -- offset of current item of interest
+ self.marksz = 0 -- size of current item of interest
local ps = self.ps
ps.buf = buffer
ps.bufsz = bufflen
diff --git a/frontend/ui/opdsparser.lua b/frontend/ui/opdsparser.lua
index f415c294a..a78eadcf7 100644
--- a/frontend/ui/opdsparser.lua
+++ b/frontend/ui/opdsparser.lua
@@ -79,6 +79,9 @@ function OPDSParser:parse(text)
-- but will kick the ass of luxl
text = text:gsub("
", "
")
text = text:gsub("
", "
")
+ -- Same deal with hr
+ text = text:gsub("
", "
")
+ text = text:gsub("
", "
")
-- some OPDS catalogs wrap text in a CDATA section, remove it as it causes parsing problems
text = text:gsub("", function (s)
return s:gsub( "%p", {["&"] = "&", ["<"] = "<", [">"] = ">" } )