diff --git a/frontend/apps/reader/modules/readerwikipedia.lua b/frontend/apps/reader/modules/readerwikipedia.lua index d42733d8c..ad2305281 100644 --- a/frontend/apps/reader/modules/readerwikipedia.lua +++ b/frontend/apps/reader/modules/readerwikipedia.lua @@ -6,6 +6,7 @@ local KeyValuePage = require("ui/widget/keyvaluepage") local LuaData = require("luadata") local NetworkMgr = require("ui/network/manager") local ReaderDictionary = require("apps/reader/modules/readerdictionary") +local Trapper = require("ui/trapper") local Translator = require("ui/translator") local UIManager = require("ui/uimanager") local Wikipedia = require("ui/wikipedia") @@ -21,7 +22,6 @@ local ReaderWikipedia = ReaderDictionary:extend{ -- identify itself is_wiki = true, wiki_languages = {}, - no_page = _("No wiki page found."), disable_history = G_reader_settings:isTrue("wikipedia_disable_history"), } @@ -271,7 +271,29 @@ function ReaderWikipedia:addToMainMenu(menu_items) end, }) end, - } + separator = true, + }, + { -- setting used in wikipedia.lua + text = _("Show image in search results"), + checked_func = function() + return G_reader_settings:nilOrTrue("wikipedia_show_image") + end, + callback = function() + G_reader_settings:flipNilOrTrue("wikipedia_show_image") + end, + }, + { -- setting used in wikipedia.lua + text = _("Show more images in full article"), + enabled_func = function() + return G_reader_settings:nilOrTrue("wikipedia_show_image") + end, + checked_func = function() + return G_reader_settings:nilOrTrue("wikipedia_show_more_images") and G_reader_settings:nilOrTrue("wikipedia_show_image") + end, + callback = function() + G_reader_settings:flipNilOrTrue("wikipedia_show_more_images") + end, + }, } } end @@ -319,6 +341,14 @@ function ReaderWikipedia:initLanguages(word) end function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang) + -- Wrapped through Trapper, as we may be using Trapper:dismissableRunInSubprocess() in it + Trapper:wrap(function() + self:lookupWikipedia(word, box, get_fullpage, forced_lang) + end) + return true +end + +function ReaderWikipedia:lookupWikipedia(word, box, get_fullpage, forced_lang) if not NetworkMgr:isOnline() then NetworkMgr:promptWifiOn() return @@ -358,19 +388,35 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang) }) end - -- Fix lookup message to include lang + -- Fix lookup message to include lang and set appropriate error texts + local no_result_text, req_failure_text if get_fullpage then - self.lookup_msg = T(_("Getting Wikipedia %2 page:\n%1"), "%1", lang:upper()) + self.lookup_msg = T(_("Retrieving Wikipedia %2 article:\n%1"), "%1", lang:upper()) + req_failure_text = _("Failed to retrieve Wikipedia article.") + no_result_text = _("Wikipedia article not found.") else self.lookup_msg = T(_("Searching Wikipedia %2 for:\n%1"), "%1", lang:upper()) + req_failure_text = _("Failed searching Wikipedia.") + no_result_text = _("No Wikipedia articles matching search term.") end self:showLookupInfo(display_word) + local results = {} local ok, pages + local lookup_cancelled = false + Wikipedia:setTrapWidget(self.lookup_progress_msg) if get_fullpage then - ok, pages = pcall(Wikipedia.wikifull, Wikipedia, word, lang) + ok, pages = pcall(Wikipedia.getFullPage, Wikipedia, word, lang) else - ok, pages = pcall(Wikipedia.wikintro, Wikipedia, word, lang) + ok, pages = pcall(Wikipedia.searchAndGetIntros, Wikipedia, word, lang) + end + Wikipedia:resetTrapWidget() + if not ok and pages and string.find(pages, Wikipedia.dismissed_error_code) then + -- So we can display an alternate dummy result + lookup_cancelled = true + -- Or we could just not show anything with: + -- self:dismissLookupInfo() + -- return end if ok and pages then -- sort pages according to 'index' attribute if present (not present @@ -387,14 +433,14 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang) pages = sorted_pages end for pageid, page in pairs(pages) do - local definition = page.extract or self.no_page + local definition = page.extract or no_result_text if page.length then -- we get 'length' only for intro results -- let's append it to definition so we know -- how big/valuable the full page is local fullkb = math.ceil(page.length/1024) local more_factor = math.ceil( page.length / (1+definition:len()) ) -- +1 just in case len()=0 - definition = definition .. "\n" .. T(_("(full page : %1 kB, = %2 x this intro length)"), fullkb, more_factor) + definition = definition .. "\n" .. T(_("(full article : %1 kB, = %2 x this intro length)"), fullkb, more_factor) end local result = { dict = T(_("Wikipedia %1"), lang:upper()), @@ -402,18 +448,27 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang) definition = definition, is_fullpage = get_fullpage, lang = lang, + images = page.images, } table.insert(results, result) end -- logger.dbg of results will be done by ReaderDictionary:showDict() else - logger.dbg("error:", pages) -- dummy results + local definition + if lookup_cancelled then + definition = _("Wikipedia request canceled.") + elseif ok then + definition = no_result_text + else + definition = req_failure_text + logger.dbg("error:", pages) + end results = { { dict = T(_("Wikipedia %1"), lang:upper()), word = word, - definition = self.no_page, + definition = definition, is_fullpage = get_fullpage, lang = lang, } diff --git a/frontend/ui/wikipedia.lua b/frontend/ui/wikipedia.lua index 09e0141bb..4d8253098 100644 --- a/frontend/ui/wikipedia.lua +++ b/frontend/ui/wikipedia.lua @@ -1,8 +1,10 @@ local JSON = require("json") +local Screen = require("device").screen +local ffiutil = require("ffi/util") local logger = require("logger") -local util = require("ffi/util") +local util = require("util") local _ = require("gettext") -local T = require("ffi/util").template +local T = ffiutil.template --[[ -- Query wikipedia using Wikimedia Web API. @@ -18,18 +20,9 @@ local T = require("ffi/util").template local Wikipedia = { wiki_server = "https://%s.wikipedia.org", wiki_path = "/w/api.php", - wiki_params = { - action = "query", - prop = "extracts", - format = "json", - -- exintro = nil, -- get more than only the intro - explaintext = "", - redirects = "", - -- title = nil, -- text to lookup, will be added below - }, default_lang = "en", - -- Search query for better results - -- see https://www.mediawiki.org/wiki/API:Main_page + -- See https://www.mediawiki.org/wiki/API:Main_page for details. + -- Search query, returns introductory texts (+ main thumbnail image) wiki_search_params = { action = "query", generator = "search", @@ -37,7 +30,7 @@ local Wikipedia = { -- gsrsearch = nil, -- text to lookup, will be added below gsrlimit = 20, -- max nb of results to get exlimit = "max", - prop = "extracts|info", -- 'extracts' to get text, 'info' to get full page length + prop = "extracts|info|pageimages", -- 'extracts' to get text, 'info' to get full page length format = "json", explaintext = "", exintro = "", @@ -45,6 +38,17 @@ local Wikipedia = { -- (otherwise, we get the full text for only the first result, and -- no text at all for the others }, + -- Full article, parsed to output text (+ main thumbnail image) + wiki_full_params = { + action = "query", + prop = "extracts|pageimages", + format = "json", + -- exintro = nil, -- get more than only the intro + explaintext = "", + redirects = "", + -- title = nil, -- text to lookup, will be added below + }, + -- Full article, parsed to output HTML, for Save as EPUB wiki_phtml_params = { action = "parse", format = "json", @@ -55,32 +59,152 @@ local Wikipedia = { disablelimitreport = "", disableeditsection = "", }, - -- allow for disabling prettifying full page text + -- Full article, parsed to output HTML, for images extraction + -- (used with full article as text, if "show more images" enabled) + wiki_images_params = { -- same as previous one, with just text html + action = "parse", + format = "json", + -- we only need the following informations + prop = "text", + -- page = nil, -- text to lookup, will be added below + redirects = "", + disabletoc = "", -- remove toc in html + disablelimitreport = "", + disableeditsection = "", + }, + -- There is an alternative for obtaining page's images: + -- prop=imageinfo&action=query&iiprop=url|dimensions|mime|extmetadata&generator=images&pageids=49448&iiurlwidth=100&iiextmetadatafilter=ImageDescription + -- but it gives all images (including wikipedia icons) in any order, without + -- any score or information that would help considering if they matter or not + -- + + -- Allow for disabling prettifying full page text wiki_prettify = G_reader_settings:nilOrTrue("wikipedia_prettify"), + + -- Can be set so HTTP requests will be done under Trapper and + -- be interruptible + trap_widget = nil, + -- For actions done with Trapper:dismissable methods, we may throw + -- and error() with this code. We make the value of this error + -- accessible here so that caller can know it's a user dismiss. + dismissed_error_code = "Interrupted by user", } function Wikipedia:getWikiServer(lang) return string.format(self.wiki_server, lang or self.default_lang) end +-- Codes that getUrlContent may get from requester.request() +local TIMEOUT_CODE = "timeout" -- from socket.lua +local MAXTIME_CODE = "maxtime reached" -- from sink_table_with_maxtime + +-- Sink that stores into a table, aborting if maxtime has elapsed +local function sink_table_with_maxtime(t, maxtime) + -- Start counting as soon as this sink is created + local start_secs, start_usecs = ffiutil.gettime() + local starttime = start_secs + start_usecs/1000000 + t = t or {} + local f = function(chunk, err) + local secs, usecs = ffiutil.gettime() + if secs + usecs/1000000 - starttime > maxtime then + return nil, MAXTIME_CODE + end + if chunk then table.insert(t, chunk) end + return 1 + end + return f, t +end + +-- Get URL content +local function getUrlContent(url, timeout, maxtime) + local socket = require('socket') + local ltn12 = require('ltn12') + local http = require('socket.http') + local https = require('ssl.https') + + local requester + if url:sub(1,7) == "http://" then + requester = http + elseif url:sub(1,8) == "https://" then + requester = https + else + return false, "Unsupported protocol" + end + if not timeout then timeout = 10 end + -- timeout needs to be set to 'http', even if we use 'https' + http.TIMEOUT, https.TIMEOUT = timeout, timeout + + local request = {} + local sink = {} + request['url'] = url + request['method'] = 'GET' + -- 'timeout' delay works on socket, and is triggered when + -- that time has passed trying to connect, or after connection + -- when no data has been read for this time. + -- On a slow connection, it may not be triggered (as we could read + -- 1 byte every 1 second, not triggering any timeout). + -- 'maxtime' can be provided to overcome that, and we start counting + -- as soon as the first content byte is received (but it is checked + -- for only when data is received). + -- Setting 'maxtime' and 'timeout' gives more chance to abort the request when + -- it takes too much time (in the worst case: in timeout+maxtime seconds). + -- But time taken by DNS lookup cannot easily be accounted for, so + -- a request may (when dns lookup takes time) exceed timeout and maxtime... + if maxtime then + request['sink'] = sink_table_with_maxtime(sink, maxtime) + else + request['sink'] = ltn12.sink.table(sink) + end + + local code, headers, status = socket.skip(1, requester.request(request)) + local content = table.concat(sink) -- empty or content accumulated till now + -- logger.dbg("code:", code) + -- logger.dbg("headers:", headers) + -- logger.dbg("status:", status) + -- logger.dbg("#content:", #content) + + if code == TIMEOUT_CODE or code == MAXTIME_CODE then + logger.warn("request interrupted:", code) + return false, code + end + if headers == nil then + logger.warn("No HTTP headers:", code, status) + return false, "Network or remote server unavailable" + end + if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK + logger.warn("HTTP status not okay:", code, status) + return false, "Remote server error or unavailable" + end + if headers and headers["content-length"] then + -- Check we really got the announced content size + local content_length = tonumber(headers["content-length"]) + if #content ~= content_length then + return false, "Incomplete content received" + end + end + return true, content +end + +function Wikipedia:setTrapWidget(trap_widget) + self.trap_widget = trap_widget +end + +function Wikipedia:resetTrapWidget() + self.trap_widget = nil +end + -- Possible values for page_type parameter to loadPage() local WIKIPEDIA_INTRO = 1 local WIKIPEDIA_FULL = 2 local WIKIPEDIA_PHTML = 3 +local WIKIPEDIA_IMAGES = 4 --[[ -- return decoded JSON table from Wikipedia --]] function Wikipedia:loadPage(text, lang, page_type, plain) - local socket = require('socket') local url = require('socket.url') - local http = require('socket.http') - local https = require('ssl.https') - local ltn12 = require('ltn12') - - local request, sink = {}, {} local query = "" - local parsed = url.parse(self:getWikiServer(lang)) parsed.path = self.wiki_path if page_type == WIKIPEDIA_INTRO then -- search query @@ -90,8 +214,8 @@ function Wikipedia:loadPage(text, lang, page_type, plain) end parsed.query = query .. "gsrsearch=" .. url.escape(text) elseif page_type == WIKIPEDIA_FULL then -- full page content - self.wiki_params.explaintext = plain and "" or nil - for k,v in pairs(self.wiki_params) do + self.wiki_full_params.explaintext = plain and "" or nil + for k,v in pairs(self.wiki_full_params) do query = string.format("%s%s=%s&", query, k, v) end parsed.query = query .. "titles=" .. url.escape(text) @@ -100,66 +224,90 @@ function Wikipedia:loadPage(text, lang, page_type, plain) query = string.format("%s%s=%s&", query, k, v) end parsed.query = query .. "page=" .. url.escape(text) + elseif page_type == WIKIPEDIA_IMAGES then -- images found in page html + for k,v in pairs(self.wiki_images_params) do + query = string.format("%s%s=%s&", query, k, v) + end + parsed.query = query .. "page=" .. url.escape(text) else return end - -- HTTP request - request['url'] = url.build(parsed) - request['method'] = 'GET' - request['sink'] = ltn12.sink.table(sink) - http.TIMEOUT, https.TIMEOUT = 10, 10 - local httpRequest = parsed.scheme == 'http' and http.request or https.request - -- first argument returned by skip is code - local _, headers, status = socket.skip(1, httpRequest(request)) - - -- raise error message when network is unavailable - if headers == nil then - error("Network is unreachable") + local built_url = url.build(parsed) + local completed, success, content + if self.trap_widget then -- if previously set with Wikipedia:setTrapWidget() + local Trapper = require("ui/trapper") + local timeout, maxtime = 30, 60 + -- We use dismissableRunInSubprocess with complex return values: + completed, success, content = Trapper:dismissableRunInSubprocess(function() + return getUrlContent(built_url, timeout, maxtime) + end, self.trap_widget) + if not completed then + error(self.dismissed_error_code) -- "Interrupted by user" + end + else + local timeout, maxtime = 10, 60 + success, content = getUrlContent(built_url, timeout, maxtime) end - - if status ~= "HTTP/1.1 200 OK" then - logger.warn("HTTP status not okay:", status) - return + if not success then + error(content) end - local content = table.concat(sink) if content ~= "" and string.sub(content, 1,1) == "{" then local ok, result = pcall(JSON.decode, content) if ok and result then - logger.dbg("wiki result", result) + logger.dbg("wiki result json:", result) return result else - logger.warn("wiki error:", result) + logger.warn("wiki result json decoding error:", result) + error("Failed decoding JSON") end else - logger.warn("not JSON from wiki response:", content) + logger.warn("wiki response is not json:", content) + error("Response is not JSON") end end -- search wikipedia and get intros for results -function Wikipedia:wikintro(text, lang) +function Wikipedia:searchAndGetIntros(text, lang) local result = self:loadPage(text, lang, WIKIPEDIA_INTRO, true) if result then local query = result.query if query then + local show_image = G_reader_settings:nilOrTrue("wikipedia_show_image") + -- Scale wikipedia normalized (we hope) thumbnail by 2 (adjusted + -- to screen size/dpi) for intros (and x8 more for highres image) + local image_size_factor = Screen:scaleBySize(200)/100.0 + if show_image then + for pageid, page in pairs(query.pages) do + self:addImages(page, lang, false, image_size_factor, 8) + end + end return query.pages end end end -- get full content of a wiki page -function Wikipedia:wikifull(text, lang) - local result = self:loadPage(text, lang, WIKIPEDIA_FULL, true) +function Wikipedia:getFullPage(wiki_title, lang) + local result = self:loadPage(wiki_title, lang, WIKIPEDIA_FULL, true) if result then local query = result.query if query then - if self.wiki_prettify then - -- Prettification of the plain text full page + local show_image = G_reader_settings:nilOrTrue("wikipedia_show_image") + local show_more_images = G_reader_settings:nilOrTrue("wikipedia_show_more_images") + -- Scale wikipedia normalized (we hope) thumbnails by 4 (adjusted + -- to screen size/dpi) for full page (and this *4 for highres image) + local image_size_factor = Screen:scaleBySize(400)/100.0 + if self.wiki_prettify or show_image then for pageid, page in pairs(query.pages) do - if page.extract then + if self.wiki_prettify and page.extract then + -- Prettification of the plain text full page page.extract = self:prettifyText(page.extract) end + if show_image then + self:addImages(page, lang, show_more_images, image_size_factor, 4) + end end end return query.pages @@ -168,8 +316,8 @@ function Wikipedia:wikifull(text, lang) end -- get parsed html content and other infos of a wiki page -function Wikipedia:wikiphtml(text, lang) - local result = self:loadPage(text, lang, WIKIPEDIA_PHTML, true) +function Wikipedia:getFullPageHtml(wiki_title, lang) + local result = self:loadPage(wiki_title, lang, WIKIPEDIA_PHTML, true) if result and result.parse then return result.parse end @@ -178,6 +326,247 @@ function Wikipedia:wikiphtml(text, lang) end end +-- get images extracted from parsed html +function Wikipedia:getFullPageImages(wiki_title, lang) + local images = {} -- will be returned, each in a format similar to page.thumbnail + local result = self:loadPage(wiki_title, lang, WIKIPEDIA_IMAGES, true) + if result and result.parse and result.parse.text and result.parse.text["*"] then + local html = result.parse.text["*"] -- html content + local url = require('socket.url') + local wiki_base_url = self:getWikiServer(lang) + + local thumbs = {} -- bits of HTML containing an image + -- We first try to catch images in
, which should exclude + -- wikipedia icons, flags... These seem to all end with a double
. + for thtml in html:gmatch([[
%s*
]]) do + table.insert(thumbs, thtml) + end + -- We then also try to catch images in galleries (which often are less + -- interesting than those in thumbinner) as a 2nd set. + for thtml in html:gmatch([[
  • %s*%s*
    .-
    %s*]]) do + table.insert(thumbs, thtml) + end + -- We may miss some interesting images in the page's top right table, but + -- there's no easy way to distinguish them from icons/flags in this table... + + for _, thtml in ipairs(thumbs) do + -- We get )(.*)]]) + if timg and tremain then + -- (Should we discard those without caption ?) + local caption = tremain and util.htmlToPlainText(tremain) + if caption == "" then caption = nil end + logger.dbg(" caption:", caption) + -- logger.dbg(timg) + local src = timg:match([[src="([^"]*)"]]) + if src and src ~= "" then + if src:sub(1,2) == "//" then + src = "https:" .. src + elseif src:sub(1,1) == "/" then -- non absolute url + src = wiki_base_url .. src + end + local width = tonumber(timg:match([[width="([^"]*)"]])) + local height = tonumber(timg:match([[height="([^"]*)"]])) + -- Ignore img without width and height, which should exlude + -- javascript maps and other unsupported stuff + if width and height then + -- Images in the html we got seem to be x4.5 the size of + -- the thumbnail we get with searchAndGetIntros() or + -- getFullPage(). Normalize them to the size of the thumbnail, + -- so we can resize them all later with the same rules. + width = math.ceil(width/4.5) + height = math.ceil(height/4.5) + -- No need to adjust width in src url here, as it will be + -- done in addImages() anyway + -- src = src:gsub("(.*/)%d+(px-[^/]*)", "%1"..width.."%2") + logger.dbg(" size:", width, "x", height, "url:", src) + table.insert(images, { + source = src, + width = width, + height = height, + filename = filename, + caption = caption, + }) + end + end + end + end + end + return images +end + +-- Function wrapped and plugged to image objects returned by :addImages() +local function image_load_bb_func(image, highres) + local source, trap_widget + if not highres then + -- We use an invisible widget that will resend the dismiss event, + -- so that image loading in TextBoxWdiget is unobtrusive and + -- interruptible + trap_widget = false + source = image.source + else + -- We need to let the user know image loading is happening, + -- with a discreet TrapWidget + trap_widget = _("Loading high-res image… (tap to cancel)") + source = image.hi_source + end + -- Image may be big or take some time to be resized on wikipedia servers. + -- As we use dismissableRunInSubprocess and can interrupt this loading, + -- we can use quite high timeouts + local timeout, maxtime = 60, 120 + + logger.dbg("fetching", source) + local Trapper = require("ui/trapper") + -- We use dismissableRunInSubprocess with simple string return value to + -- avoid dump()/load() a long string of image bytes + local completed, data = Trapper:dismissableRunInSubprocess(function() + local success, data = getUrlContent(source, timeout, maxtime) + -- With simple string value, we're not able to return the failure + -- reason, so log it here + if not success then + logger.warn("failed fetching image from", source, ":", data) + end + return success and data or nil + end, trap_widget, true) -- task_returns_simple_string=true + + local success = data and true or false -- guess success from data + + if not completed then + logger.dbg("image fetching interrupted by user") + return true -- let caller know it was interrupted + end + if not success then + -- log it again (on Android, log from sub-process seem to not work) + logger.warn("failed fetching image from", source) + return + end + logger.dbg(" fetched", #data) + + -- Use mupdf to render image to blitbuffer + local mupdf = require("ffi/mupdf") + local ok, bb_or_error + if not highres then + -- For low-res, we should ensure the image we got from wikipedia is + -- the right size, so it does not overflow our reserved area + -- (TextBoxWidget may have adjusted image.width and height) + ok, bb_or_error = pcall(mupdf.renderImage, data, #data, image.width, image.height) + else + -- No need for width and height for high-res + ok, bb_or_error = pcall(mupdf.renderImage, data, #data) + end + if not ok then + logger.warn("failed building image from", source, ":", bb_or_error) + return + end + if not highres then + image.bb = bb_or_error + else + image.hi_bb = bb_or_error + end +end + +function Wikipedia:addImages(page, lang, more_images, image_size_factor, hi_image_size_factor) + -- List of images, table with keys as expected by TextBoxWidget + page.images = {} + -- List of wikipedia images data structures (page.thumbnail and images + -- extracted from html) made to have the same keys for common processing + local wimages = {} + + -- We got what Wikipedia scored as the most interesting image for this + -- page in page.thumbnail, and its filename in page.pageimage, ie: + -- "thumbnail": { + -- "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/Reading_on_the_bus_train_or_transit.jpg/37px-Reading_on_the_bus_train_or_transit.jpg", + -- "width": 37, + -- "height": 50 + -- }, + -- "pageimage": "Reading_on_the_bus_train_or_transit.jpg" + -- + local first_image_filename = nil + if page.thumbnail and page.thumbnail.source then + page.thumbnail.filename = page.pageimage + first_image_filename = page.pageimage + table.insert(wimages, page.thumbnail) + end + -- To get more images, we need to make a second request to wikipedia + if more_images then + local ok, images_or_err = pcall(Wikipedia.getFullPageImages, Wikipedia, page.title, lang) + if not ok then + logger.warn("error getting more images", images_or_err) + else + for _, wimage in ipairs(images_or_err) do + if first_image_filename and wimage.filename == first_image_filename then + -- We got the same image as the thumbnail one, but it may have + -- a caption: replace thumbnail one with this one + table.remove(wimages, 1) + table.insert(wimages, 1, wimage) + else + table.insert(wimages, wimage) + end + end + end + end + + -- All our wimages now have the keys: source, width, height, filename, caption + for _, wimage in ipairs(wimages) do + -- We trust wikipedia, and our x4.5 factor in :getFullPageImages(), for adequate + -- and homogeneous images' sizes. We'll just scale them according to the + -- provided 'image_size_factor' (which should account for screen size/DPI) + local width = wimage.width or 100 -- in case we don't get any width or height + local height = wimage.height or 100 + -- Give a little boost in size to thin images + if width < height / 2 or height < width / 2 then + width = width * 1.3 + height = height * 1.3 + end + width = math.ceil(width * image_size_factor) + height = math.ceil(height * image_size_factor) + -- All wikipedia image urls like .../wikipedia/commons/A/BC/ + -- or .../wikipedia/commons/thumb/A/BC//px- + -- can be transformed to another url with a requested new_width with the form: + -- /wikipedia/commons/thumb/A/BC//px- + -- (Additionally, the image format can be changed by appending .png, + -- .jpg or .gif to it) + -- The resize is so done on Wikipedia servers from the source image for + -- the best quality. + local source = wimage.source:gsub("(.*/)%d+(px-[^/]*)", "%1"..width.."%2") + -- We build values for a high resolution version of the image, to be displayed + -- with ImageViewer (x 4 by default) + local hi_width = width * (hi_image_size_factor or 4) + local hi_height = height * (hi_image_size_factor or 4) + local hi_source = wimage.source:gsub("(.*/)%d+(px-[^/]*)", "%1"..hi_width.."%2") + local title = wimage.filename + if title then + title = title:gsub("_", " ") + end + local image = { + -- As expected by TextBoxWidget (with additional source and + -- hi_source, that will be used by load_bb_func) + title = title, + caption = wimage.caption, + source = source, + width = width, + height = height, + bb = nil, -- will be loaded and build only if needed + hi_source = hi_source, + hi_width = hi_width, + hi_height = hi_height, + hi_bb = nil, -- will be loaded and build only if needed + } + -- If bb or hi_bb is nil, TextBoxWidget will call a method named "load_bb_func" + image.load_bb_func = function(highres) + return image_load_bb_func(image, highres) + end + table.insert(page.images, image) + end +end + -- UTF8 of unicode geometrical shapes we can use to replace -- the "=== title ===" of wkipedia plaintext pages -- These chosen ones are available in most fonts (prettier symbols @@ -218,38 +607,6 @@ function Wikipedia:prettifyText(text) end -local function getUrlContent(url, timeout) - local socket = require('socket') - local ltn12 = require('ltn12') - local requester - if url:sub(1,7) == "http://" then - requester = require('socket.http') - elseif url:sub(1,8) == "https://" then - requester = require('ssl.https') - else - return false, "Unsupported protocol" - end - requester.TIMEOUT = timeout or 10 - local request = {} - local sink = {} - request['url'] = url - request['method'] = 'GET' - request['sink'] = ltn12.sink.table(sink) - -- first argument returned by skip is code - local _, headers, status = socket.skip(1, requester.request(request)) - - if headers == nil then - logger.warn("No HTTP headers") - return false, "Network unavailable" - end - if status ~= "HTTP/1.1 200 OK" then - logger.warn("HTTP status not okay:", status) - return false, "Network unavailable" - end - - return true, table.concat(sink) -end - -- UTF8 of unicode geometrical shapes we'll prepend to wikipedia section headers, -- to help identifying hierarchy (othewise, the small font size differences helps). -- Best if identical to the ones used above for prettifying full plain text page. @@ -292,12 +649,12 @@ function Wikipedia:createEpub(epub_path, page, lang, with_images) -- Trapper:info() and Trapper:confirm() will just use logger. local UI = require("ui/trapper") - UI:info(_("Fetching Wikipedia page…")) - local ok, phtml = pcall(self.wikiphtml, self, page, lang) + UI:info(_("Retrieving Wikipedia article…")) + local ok, phtml = pcall(self.getFullPageHtml, self, page, lang) if not ok then UI:info(phtml) -- display error in InfoMessage -- Sleep a bit to make that error seen - util.sleep(2) + ffiutil.sleep(2) UI:reset() return false end @@ -403,13 +760,13 @@ function Wikipedia:createEpub(epub_path, page, lang, with_images) if with_images then -- If no UI (Trapper:wrap() not called), UI:confirm() will answer true if #images > 0 then - include_images = UI:confirm(T(_("The page contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include")) + include_images = UI:confirm(T(_("This article contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include")) if include_images then use_img_2x = UI:confirm(_("Would you like to use slightly higher quality images? This will result in a bigger file size."), _("Standard quality"), _("Higher quality")) end else - UI:info(_("The page does not contain any images.")) - util.sleep(1) -- Let the user see that + UI:info(_("This article does not contain any images.")) + ffiutil.sleep(1) -- Let the user see that end end if not include_images then @@ -568,6 +925,10 @@ div.thumb { ul, ol { margin-left: 0em; } +/* avoid a line with a standalone bullet */ +li.gallerybox { + display: inline; +} /* helps crengine to not display them as block elements */ time, abbr, sup { display: inline; @@ -655,6 +1016,10 @@ time, abbr, sup { -- external link for us, so let's remove this link. html = html:gsub("]*>%s*(<%s*img [^>]*>)%s*", "%1") + -- TODO: do something for
  • so they are no more + -- a
  • (crengine displays them one above the other) and can be displayed + -- side by side + -- For some
    , which include nested divs, although -- perfectly balanced, crengine seems to miss some closing
    and we -- end up having our image bordered box including the remaining main wiki text. @@ -771,7 +1136,7 @@ time, abbr, sup { -- Process can be interrupted at this point between each image download -- by tapping while the InfoMessage is displayed -- We use the fast_refresh option from image #2 for a quicker download - local go_on = UI:info(T(_("Fetching image %1 / %2 …"), inum, nb_images), inum >= 2) + local go_on = UI:info(T(_("Retrieving image %1 / %2 …"), inum, nb_images), inum >= 2) if not go_on then cancelled = true break @@ -813,7 +1178,7 @@ time, abbr, sup { end epub:close() -- This was nearly a no-op, so sleep a bit to make that progress step seen - util.usleep(300000) + ffiutil.usleep(300000) UI:reset() -- close last InfoMessage if cancelled then