From 098c1a7844937b8d72c9e7525ddb17216d7b8f78 Mon Sep 17 00:00:00 2001 From: NiLuJe Date: Sat, 14 Mar 2020 21:17:42 +0100 Subject: [PATCH] [fix] Don't break OPDS parsing on HR tags (#5949) Apply the same crappy workaround as for BR. Fix #5948 --- frontend/luxl.lua | 101 ++++++++++++++++++++----------------- frontend/ui/opdsparser.lua | 3 ++ 2 files changed, 58 insertions(+), 46 deletions(-) diff --git a/frontend/luxl.lua b/frontend/luxl.lua index 5eebda773..ce7d0142d 100644 --- a/frontend/luxl.lua +++ b/frontend/luxl.lua @@ -8,6 +8,7 @@ Pure Lua Version written by: William A Adams Dramatic Speed Improvements by: Robert G Jakabosky + https://github.com/Wiladams/LAPHLibs/blob/master/laphlibs/luxl.lua References @@ -20,10 +21,15 @@ local ffi = require "ffi" local bit = require "bit" local band = bit.band + --[[ - Types of characters; 0 is not valid, 1 is letters, 2 are digits - (including '.') and 3 whitespace. + Types of characters; + 0 is not valid + 1 is letters, + 2 are digits (including '.') + 3 whitespace --]] + local char_type = ffi.new("const int[256]", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -43,22 +49,24 @@ local char_type = ffi.new("const int[256]", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }); + + -- Internal states that the parser can be in at any given time. -local ST_START = 0; -- starting base state; default state +local ST_START = 0; -- starting base state; default state local ST_TEXT =1; -- text state -local ST_START_TAG = 2; -- start tag state +local ST_START_TAG = 2; -- start tag state local ST_START_TAGNAME =3; -- start tagname state local ST_START_TAGNAME_END =4; -- start tagname ending state local ST_END_TAG =5; -- end tag state -local ST_END_TAGNAME=6; -- end tag tagname state -local ST_END_TAGNAME_END=7; -- end tag tagname ending -local ST_EMPTY_TAG=8; -- empty tag state -local ST_SPACE=9; -- linear whitespace state +local ST_END_TAGNAME=6; -- end tag tagname state +local ST_END_TAGNAME_END=7; -- end tag tagname ending +local ST_EMPTY_TAG=8; -- empty tag state +local ST_SPACE=9; -- linear whitespace state local ST_ATTR_NAME=10; -- attribute name state local ST_ATTR_NAME_END=11; -- attribute name ending state local ST_ATTR_VAL=12; -- attribute value starting state local ST_ATTR_VAL2=13; -- attribute value state -local ST_ERROR=14; -- error state +local ST_ERROR=14; -- error state -- character classes that we will match against; This could be expanded if -- need be, however, we are aiming for simple. @@ -74,14 +82,14 @@ local CCLASS_ANY=8; -- matches any ASCII character; will match all ab -- Types of events: start element, end element, text, attr name, attr -- val and start/end document. Other events can be ignored! -local EVENT_START = 0; -- Start tag -local EVENT_END = 1; -- End tag -local EVENT_TEXT = 2; -- Text -local EVENT_ATTR_NAME = 3; -- Attribute name -local EVENT_ATTR_VAL = 4; -- Attribute value -local EVENT_END_DOC = 5; -- End of document -local EVENT_MARK = 6; -- Internal only; notes position in buffer -local EVENT_NONE = 7; -- Internal only; should never see this event +local EVENT_START = 0; -- Start tag +local EVENT_END = 1; -- End tag +local EVENT_TEXT = 2; -- Text +local EVENT_ATTR_NAME = 3; -- Attribute name +local EVENT_ATTR_VAL = 4; -- Attribute value +local EVENT_END_DOC = 5; -- End of document +local EVENT_MARK = 6; -- Internal only; notes position in buffer +local EVENT_NONE = 7; -- Internal only; should never see this event local entity_refs = { ["<"] = '<', @@ -133,7 +141,7 @@ local LEXER_STATES = { -- [6-8] handle start tag { state = ST_START_TAG, cclass = CCLASS_LETTERS, next_state = ST_START_TAGNAME, event = EVENT_MARK }, { state = ST_START_TAG, cclass = CCLASS_SLASH, next_state = ST_END_TAG, event = EVENT_MARK }, - { state = ST_START_TAG, cclass = CCLASS_SPACE, next_state = ST_START_TAG, event = EVENT_NONE }, -- < tag > + { state = ST_START_TAG, cclass = CCLASS_SPACE, next_state = ST_START_TAG, event = EVENT_NONE }, -- < tag > -- [9-12] handle start tag name { state = ST_START_TAGNAME, cclass = CCLASS_LETTERS, next_state = ST_START_TAGNAME, event = EVENT_NONE }, @@ -145,10 +153,10 @@ local LEXER_STATES = { { state = ST_START_TAGNAME_END, cclass = CCLASS_LETTERS, next_state = ST_ATTR_NAME, event = EVENT_MARK }, { state = ST_START_TAGNAME_END, cclass = CCLASS_SPACE, next_state = ST_START_TAGNAME_END, event = EVENT_NONE }, { state = ST_START_TAGNAME_END, cclass = CCLASS_RIGHT_ANGLE, next_state = ST_START, event = EVENT_START }, - { state = ST_START_TAGNAME_END, cclass = CCLASS_SLASH, next_state = ST_EMPTY_TAG, event = EVENT_MARK }, -- Empty tag
+ { state = ST_START_TAGNAME_END, cclass = CCLASS_SLASH, next_state = ST_EMPTY_TAG, event = EVENT_MARK }, -- Empty tag
-- [17] handle empty tags, e.g.,
- { state = ST_EMPTY_TAG, cclass = CCLASS_RIGHT_ANGLE, next_state = ST_START, event = EVENT_END }, -- Empty tag
+ { state = ST_EMPTY_TAG, cclass = CCLASS_RIGHT_ANGLE, next_state = ST_START, event = EVENT_END }, -- Empty tag
-- [18] handle end tag, e.g., { state = ST_END_TAG, cclass = CCLASS_LETTERS, next_state = ST_END_TAGNAME, event = EVENT_NONE }, @@ -156,7 +164,7 @@ local LEXER_STATES = { -- [19-21] handle end tag name { state = ST_END_TAGNAME, cclass = CCLASS_LETTERS, next_state = ST_END_TAGNAME, event = EVENT_NONE }, { state = ST_END_TAGNAME, cclass = CCLASS_RIGHT_ANGLE, next_state = ST_START, event = EVENT_END }, - { state = ST_END_TAGNAME, cclass = CCLASS_SPACE, next_state = ST_END_TAGNAME_END, event = EVENT_END }, -- space after end tag name
+ { state = ST_END_TAGNAME, cclass = CCLASS_SPACE, next_state = ST_END_TAGNAME_END, event = EVENT_END }, -- space after end tag name
-- [22-23] handle ending of end tag name { state = ST_END_TAGNAME_END, cclass = CCLASS_SPACE, next_state = ST_END_TAGNAME_END, event = EVENT_NONE }, @@ -169,8 +177,8 @@ local LEXER_STATES = { -- [27-29] handle attribute names { state = ST_ATTR_NAME, cclass = CCLASS_LETTERS, next_state = ST_ATTR_NAME, event = EVENT_MARK }, - { state = ST_ATTR_NAME, cclass = CCLASS_SPACE, next_state = ST_ATTR_NAME_END, event = EVENT_ATTR_NAME }, -- space before '=' sign - { state = ST_ATTR_NAME, cclass = CCLASS_EQUALS, next_state = ST_ATTR_VAL, event = EVENT_ATTR_NAME }, -- + { state = ST_ATTR_NAME, cclass = CCLASS_SPACE, next_state = ST_ATTR_NAME_END, event = EVENT_ATTR_NAME }, -- space before '=' sign + { state = ST_ATTR_NAME, cclass = CCLASS_EQUALS, next_state = ST_ATTR_VAL, event = EVENT_ATTR_NAME }, -- -- [30-32] attribute name end { state = ST_ATTR_NAME_END, cclass = CCLASS_SPACE, next_state = ST_ATTR_NAME_END, event = EVENT_NONE }, @@ -198,6 +206,7 @@ struct parse_state { int i; int ix; /* index into buffer */ }; + ]] local cclass_match = { @@ -320,28 +329,28 @@ fsm_code = nil local luxl = { - EVENT_START = EVENT_START; -- Start tag - EVENT_END = EVENT_END; -- End tag - EVENT_TEXT = EVENT_TEXT; -- Text + EVENT_START = EVENT_START; -- Start tag + EVENT_END = EVENT_END; -- End tag + EVENT_TEXT = EVENT_TEXT; -- Text EVENT_ATTR_NAME = EVENT_ATTR_NAME; -- Attribute name - EVENT_ATTR_VAL = EVENT_ATTR_VAL; -- Attribute value - EVENT_END_DOC = EVENT_END_DOC; -- End of document - EVENT_MARK = EVENT_MARK; -- Internal only; notes position in buffer - EVENT_NONE = EVENT_NONE; -- Internal only; should never see this event + EVENT_ATTR_VAL = EVENT_ATTR_VAL; -- Attribute value + EVENT_END_DOC = EVENT_END_DOC; -- End of document + EVENT_MARK = EVENT_MARK; -- Internal only; notes position in buffer + EVENT_NONE = EVENT_NONE; -- Internal only; should never see this event } local luxl_mt = { __index = luxl } function luxl.new(buffer, bufflen) local newone = { - buf = ffi.cast("const uint8_t *", buffer); -- pointer to "uint8_t *" buffer (0 based) - bufsz = bufflen; -- size of input buffer - state = ST_START; -- current state - event = EVENT_NONE; -- current event - err = 0; -- number of errors thus far - markix = 0; -- offset of current item of interest - marksz = 0; -- size of current item of interest - MsgHandler = nil; -- Routine to handle messages - ErrHandler = nil; -- Routine to call when there's an error + buf = ffi.cast("const uint8_t *", buffer); -- pointer to "uint8_t *" buffer (0 based) + bufsz = bufflen; -- size of input buffer + state = ST_START; -- current state + event = EVENT_NONE; -- current event + err = 0; -- number of errors thus far + markix = 0; -- offset of current item of interest + marksz = 0; -- size of current item of interest + MsgHandler = nil; -- Routine to handle messages + ErrHandler = nil; -- Routine to call when there's an error EventHandler = nil; ps = ffi.new('struct parse_state', { buf = buffer, @@ -357,13 +366,13 @@ function luxl.new(buffer, bufflen) end function luxl:Reset(buffer, bufflen) - self.buf = buffer -- pointer to "uint8_t *" buffer (0 based) - self.bufsz = bufflen -- size of input buffer - self.state = ST_START -- current state - self.event = EVENT_NONE -- current event - self.err = 0 -- number of errors thus far - self.markix = 0 -- offset of current item of interest - self.marksz = 0 -- size of current item of interest + self.buf = buffer -- pointer to "uint8_t *" buffer (0 based) + self.bufsz = bufflen -- size of input buffer + self.state = ST_START -- current state + self.event = EVENT_NONE -- current event + self.err = 0 -- number of errors thus far + self.markix = 0 -- offset of current item of interest + self.marksz = 0 -- size of current item of interest local ps = self.ps ps.buf = buffer ps.bufsz = bufflen diff --git a/frontend/ui/opdsparser.lua b/frontend/ui/opdsparser.lua index f415c294a..a78eadcf7 100644 --- a/frontend/ui/opdsparser.lua +++ b/frontend/ui/opdsparser.lua @@ -79,6 +79,9 @@ function OPDSParser:parse(text) -- but will kick the ass of luxl text = text:gsub("
", "
") text = text:gsub("
", "
") + -- Same deal with hr + text = text:gsub("
", "
") + text = text:gsub("
", "
") -- some OPDS catalogs wrap text in a CDATA section, remove it as it causes parsing problems text = text:gsub("", function (s) return s:gsub( "%p", {["&"] = "&", ["<"] = "<", [">"] = ">" } )