OPDS*: Mangle Calibre feeds some more so that they don't confuse luxl/us (#6902)

By essentially dropping the whole XHTML block, instead of trying to salvage each and every tag one by one as we did before.

Also, as that's usually the result after broken parsing, handle nil URLs slightly better in the frontend, so that they get caught/reported properly instead of doing nothing and/or crashing half the time.
pull/6756/head
NiLuJe 3 years ago committed by GitHub
parent dde497a0df
commit 960b2ae62a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -33,13 +33,12 @@ end
function OPDSParser:createFlatXTable(xlex, curr_element)
curr_element = curr_element or {}
local curr_attr_name;
local attr_count = 0;
local curr_attr_name
local attr_count = 0
-- start reading the thing
local txt
for event, offset, size in xlex:Lexemes() do
txt = ffi.string(xlex.buf + offset, size)
local txt = ffi.string(xlex.buf + offset, size)
if event == luxl.EVENT_START then
if txt ~= "xml" then
-- does current element already have something
@ -61,7 +60,7 @@ function OPDSParser:createFlatXTable(xlex, curr_element)
curr_attr_name = unescape(txt)
elseif event == luxl.EVENT_ATTR_VAL then
curr_element[curr_attr_name] = unescape(txt)
attr_count = attr_count + 1;
attr_count = attr_count + 1
curr_attr_name = nil
elseif event == luxl.EVENT_TEXT then
curr_element = unescape(txt)
@ -73,16 +72,23 @@ function OPDSParser:createFlatXTable(xlex, curr_element)
end
function OPDSParser:parse(text)
-- luxl cannot properly handle xml comments and we need first remove them
text = text:gsub("<!--.--->", "")
-- luxl prefers <br />, other two forms are valid in HTML,
-- but will kick the ass of luxl
-- Murder Calibre's whole "content" block, because luxl doesn't really deal well with various XHTML quirks,
-- as the list of crappy replacements below attests to...
-- There's also a high probability of finding orphaned tags or badly nested ones in there, which will screw everything up.
text = text:gsub('<content type="xhtml">.-</content>', '')
-- luxl doesn't handle XML comments, so strip them
text = text:gsub("<!%-%-.-%-%->", "")
-- luxl prefers <br />, the other two forms are valid in HTML, but will kick luxl's ass
text = text:gsub("<br>", "<br />")
text = text:gsub("<br/>", "<br />")
-- Same deal with hr
text = text:gsub("<hr>", "<hr />")
text = text:gsub("<hr/>", "<hr />")
-- some OPDS catalogs wrap text in a CDATA section, remove it as it causes parsing problems
-- It's also allergic to orphaned <em/> (As opposed to a balanced <em></em> pair)...
text = text:gsub("<em/>", "")
-- Let's assume it might also happen to strong...
text = text:gsub("<strong/>", "")
-- Some OPDS catalogs wrap text in a CDATA section, remove it as it causes parsing problems
text = text:gsub("<!%[CDATA%[(.-)%]%]>", function (s)
return s:gsub( "%p", {["&"] = "&amp;", ["<"] = "&lt;", [">"] = "&gt;" } )
end )

@ -78,7 +78,7 @@ function OPDSBrowser:init()
servers = {
{
title = "Project Gutenberg",
url = "http://m.gutenberg.org/ebooks.opds/?format=opds",
url = "https://m.gutenberg.org/ebooks.opds/?format=opds",
},
{
title = "Project Gutenberg [Searchable]",
@ -87,11 +87,11 @@ function OPDSBrowser:init()
},
{
title = "Feedbooks",
url = "http://www.feedbooks.com/publicdomain/catalog.atom",
url = "https://catalog.feedbooks.com/catalog/public_domain.atom",
},
{
title = "ManyBooks",
url = "http://manybooks.net/opds/index.php",
url = "https://manybooks.net/opds/index.php",
},
{
title = "Internet Archive",
@ -99,11 +99,11 @@ function OPDSBrowser:init()
},
{
title = "Flibusta (Russian)",
url = "http://www.flibusta.is/opds",
url = "https://www.flibusta.is/opds",
},
{
title = "Flibusta [Ru] [Searchable]",
url = "http://www.flibusta.is/opds/search?searchTerm=%s",
url = "https://www.flibusta.is/opds/search?searchTerm=%s",
searchable = true,
},
{
@ -388,9 +388,9 @@ end
function OPDSBrowser:getCatalog(item_url, username, password)
local ok, catalog = pcall(self.parseFeed, self, item_url, username, password)
if not ok and catalog then
logger.info("cannot get catalog info from", item_url, catalog)
logger.info("cannot get catalog info from", item_url or "nil", catalog)
UIManager:show(InfoMessage:new{
text = T(_("Cannot get catalog info from %1"), (BD.url(item_url) or "")),
text = T(_("Cannot get catalog info from %1"), (item_url and BD.url(item_url) or "nil")),
})
return
end
@ -498,7 +498,7 @@ function OPDSBrowser:genItemTableFromCatalog(catalog, item_url, username, passwo
end
end
if author then
item.text = title .. "\n" .. author
item.text = title .. " - " .. author
end
end
item.title = title
@ -676,7 +676,7 @@ function OPDSBrowser:showDownloads(item)
end
function OPDSBrowser:browse(browse_url, username, password)
logger.dbg("Browse opds url", browse_url)
logger.dbg("Browse opds url", browse_url or "nil")
table.insert(self.paths, {
url = browse_url,
username = username,

Loading…
Cancel
Save