diff --git a/plugins/opds.koplugin/opdsparser.lua b/plugins/opds.koplugin/opdsparser.lua
index 20ee727ed..a1c247a99 100644
--- a/plugins/opds.koplugin/opdsparser.lua
+++ b/plugins/opds.koplugin/opdsparser.lua
@@ -72,10 +72,6 @@ function OPDSParser:createFlatXTable(xlex, curr_element)
end
function OPDSParser:parse(text)
- -- Murder Calibre's whole "content" block, because luxl doesn't really deal well with various XHTML quirks,
- -- as the list of crappy replacements below attests to...
- -- There's also a high probability of finding orphaned tags or badly nested ones in there, which will screw everything up.
- text = text:gsub('.-', '')
-- luxl doesn't handle XML comments, so strip them
text = text:gsub("", "")
-- luxl is also particular about the syntax for self-closing, empty & orphaned tags...
@@ -84,8 +80,18 @@ function OPDSParser:parse(text)
text = text:gsub("<([bh]r)>", "<%1 />")
-- Some OPDS catalogs wrap text in a CDATA section, remove it as it causes parsing problems
text = text:gsub("", function (s)
- return s:gsub( "%p", {["&"] = "&", ["<"] = "<", [">"] = ">" } )
+ return s:gsub("%p", {["&"] = "&", ["<"] = "<", [">"] = ">"})
end )
+
+ -- NOTE: OPDS content tags are likely to contain a bunch of HTML or XHTML. We do *NOT* want to let luxl parse that,
+ -- because it doesn't really deal well with various XHTML quirks, as the list of crappy replacements above attests to...
+ -- There's also a high probability of finding orphaned tags or badly nested ones in there, which would screw everything up.
+ -- In any case, we just want to treat the whole thing as a single text node anyway, so, just mangle the markup to force luxl's hand.
+ text = text:gsub('', "")
+ text = text:gsub("(.-)", function (s)
+ return '' .. s:gsub("%p", {["<"] = "<", [">"] = ">", ['"'] = """, ["'"] = "'"}) .. ""
+ end )
+
local xlex = luxl.new(text, #text)
return assert(self:createFlatXTable(xlex))
end