Wikipedia: show images and allow interrupting queries

Reword some english messages
6 years ago · 775b7a364f
parent 43ad2cef99
commit 775b7a364f
2 changed files with 522 additions and 102 deletions
--- a/frontend/apps/reader/modules/readerwikipedia.lua
+++ b/frontend/apps/reader/modules/readerwikipedia.lua
@ -6,6 +6,7 @@ local KeyValuePage = require("ui/widget/keyvaluepage")
 local LuaData = require("luadata")
 local NetworkMgr = require("ui/network/manager")
 local ReaderDictionary = require("apps/reader/modules/readerdictionary")
+local Trapper = require("ui/trapper")
 local Translator = require("ui/translator")
 local UIManager = require("ui/uimanager")
 local Wikipedia = require("ui/wikipedia")
@ -21,7 +22,6 @@ local ReaderWikipedia = ReaderDictionary:extend{
    -- identify itself
    is_wiki = true,
    wiki_languages = {},
-    no_page = _("No wiki page found."),
    disable_history = G_reader_settings:isTrue("wikipedia_disable_history"),
 }

@ -271,7 +271,29 @@ function ReaderWikipedia:addToMainMenu(menu_items)
                        end,
                    })
                end,
-            }
+                separator = true,
+            },
+            { -- setting used in wikipedia.lua
+                text = _("Show image in search results"),
+                checked_func = function()
+                    return G_reader_settings:nilOrTrue("wikipedia_show_image")
+                end,
+                callback = function()
+                    G_reader_settings:flipNilOrTrue("wikipedia_show_image")
+                end,
+            },
+            { -- setting used in wikipedia.lua
+                text = _("Show more images in full article"),
+                enabled_func = function()
+                    return G_reader_settings:nilOrTrue("wikipedia_show_image")
+                end,
+                checked_func = function()
+                    return G_reader_settings:nilOrTrue("wikipedia_show_more_images") and G_reader_settings:nilOrTrue("wikipedia_show_image")
+                end,
+                callback = function()
+                    G_reader_settings:flipNilOrTrue("wikipedia_show_more_images")
+                end,
+            },
        }
    }
 end
@ -319,6 +341,14 @@ function ReaderWikipedia:initLanguages(word)
 end

 function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
+    -- Wrapped through Trapper, as we may be using Trapper:dismissableRunInSubprocess() in it
+    Trapper:wrap(function()
+        self:lookupWikipedia(word, box, get_fullpage, forced_lang)
+    end)
+    return true
+end
+
+function ReaderWikipedia:lookupWikipedia(word, box, get_fullpage, forced_lang)
    if not NetworkMgr:isOnline() then
        NetworkMgr:promptWifiOn()
        return
@ -358,19 +388,35 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
        })
    end

-    -- Fix lookup message to include lang
+    -- Fix lookup message to include lang and set appropriate error texts
+    local no_result_text, req_failure_text
    if get_fullpage then
-        self.lookup_msg = T(_("Getting Wikipedia %2 page:\n%1"), "%1", lang:upper())
+        self.lookup_msg = T(_("Retrieving Wikipedia %2 article:\n%1"), "%1", lang:upper())
+        req_failure_text = _("Failed to retrieve Wikipedia article.")
+        no_result_text = _("Wikipedia article not found.")
    else
        self.lookup_msg = T(_("Searching Wikipedia %2 for:\n%1"), "%1", lang:upper())
+        req_failure_text = _("Failed searching Wikipedia.")
+        no_result_text = _("No Wikipedia articles matching search term.")
    end
    self:showLookupInfo(display_word)
+
    local results = {}
    local ok, pages
+    local lookup_cancelled = false
+    Wikipedia:setTrapWidget(self.lookup_progress_msg)
    if get_fullpage then
-        ok, pages = pcall(Wikipedia.wikifull, Wikipedia, word, lang)
+        ok, pages = pcall(Wikipedia.getFullPage, Wikipedia, word, lang)
    else
-        ok, pages = pcall(Wikipedia.wikintro, Wikipedia, word, lang)
+        ok, pages = pcall(Wikipedia.searchAndGetIntros, Wikipedia, word, lang)
+    end
+    Wikipedia:resetTrapWidget()
+    if not ok and pages and string.find(pages, Wikipedia.dismissed_error_code) then
+        -- So we can display an alternate dummy result
+        lookup_cancelled = true
+        -- Or we could just not show anything with:
+        -- self:dismissLookupInfo()
+        -- return
    end
    if ok and pages then
        -- sort pages according to 'index' attribute if present (not present
@ -387,14 +433,14 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
            pages = sorted_pages
        end
        for pageid, page in pairs(pages) do
-            local definition = page.extract or self.no_page
+            local definition = page.extract or no_result_text
            if page.length then
                -- we get 'length' only for intro results
                -- let's append it to definition so we know
                -- how big/valuable the full page is
                local fullkb = math.ceil(page.length/1024)
                local more_factor = math.ceil( page.length / (1+definition:len()) ) -- +1 just in case len()=0
-                definition = definition .. "\n" .. T(_("(full page : %1 kB, = %2 x this intro length)"), fullkb, more_factor)
+                definition = definition .. "\n" .. T(_("(full article : %1 kB, = %2 x this intro length)"), fullkb, more_factor)
            end
            local result = {
                dict = T(_("Wikipedia %1"), lang:upper()),
@ -402,18 +448,27 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
                definition = definition,
                is_fullpage = get_fullpage,
                lang = lang,
+                images = page.images,
            }
            table.insert(results, result)
        end
        -- logger.dbg of results will be done by ReaderDictionary:showDict()
    else
-        logger.dbg("error:", pages)
        -- dummy results
+        local definition
+        if lookup_cancelled then
+            definition = _("Wikipedia request canceled.")
+        elseif ok then
+            definition = no_result_text
+        else
+            definition = req_failure_text
+            logger.dbg("error:", pages)
+        end
        results = {
            {
                dict = T(_("Wikipedia %1"), lang:upper()),
                word = word,
-                definition = self.no_page,
+                definition = definition,
                is_fullpage = get_fullpage,
                lang = lang,
            }
--- a/frontend/ui/wikipedia.lua
+++ b/frontend/ui/wikipedia.lua
@ -1,8 +1,10 @@
 local JSON = require("json")
+local Screen = require("device").screen
+local ffiutil = require("ffi/util")
 local logger = require("logger")
-local util = require("ffi/util")
+local util = require("util")
 local _ = require("gettext")
-local T = require("ffi/util").template
+local T = ffiutil.template

 --[[
 -- Query wikipedia using Wikimedia Web API.
@ -18,18 +20,9 @@ local T = require("ffi/util").template
 local Wikipedia = {
   wiki_server = "https://%s.wikipedia.org",
   wiki_path = "/w/api.php",
-   wiki_params = {
-       action = "query",
-       prop = "extracts",
-       format = "json",
-       -- exintro = nil, -- get more than only the intro
-       explaintext = "",
-       redirects = "",
-       -- title = nil, -- text to lookup, will be added below
-   },
   default_lang = "en",
-   -- Search query for better results
-   -- see https://www.mediawiki.org/wiki/API:Main_page
+   -- See https://www.mediawiki.org/wiki/API:Main_page for details.
+   -- Search query, returns introductory texts (+ main thumbnail image)
   wiki_search_params = {
       action = "query",
       generator = "search",
@ -37,7 +30,7 @@ local Wikipedia = {
       -- gsrsearch = nil, -- text to lookup, will be added below
       gsrlimit = 20, -- max nb of results to get
       exlimit = "max",
-       prop = "extracts|info", -- 'extracts' to get text, 'info' to get full page length
+       prop = "extracts|info|pageimages", -- 'extracts' to get text, 'info' to get full page length
       format = "json",
       explaintext = "",
       exintro = "",
@ -45,6 +38,17 @@ local Wikipedia = {
       -- (otherwise, we get the full text for only the first result, and
       -- no text at all for the others
   },
+   -- Full article, parsed to output text (+ main thumbnail image)
+   wiki_full_params = {
+       action = "query",
+       prop = "extracts|pageimages",
+       format = "json",
+       -- exintro = nil, -- get more than only the intro
+       explaintext = "",
+       redirects = "",
+       -- title = nil, -- text to lookup, will be added below
+   },
+   -- Full article, parsed to output HTML, for Save as EPUB
   wiki_phtml_params = {
       action = "parse",
       format = "json",
@ -55,32 +59,152 @@ local Wikipedia = {
       disablelimitreport = "",
       disableeditsection = "",
   },
-   -- allow for disabling prettifying full page text
+   -- Full article, parsed to output HTML, for images extraction
+   -- (used with full article as text, if "show more images" enabled)
+   wiki_images_params = { -- same as previous one, with just text html
+       action = "parse",
+       format = "json",
+       -- we only need the following informations
+       prop = "text",
+       -- page = nil, -- text to lookup, will be added below
+       redirects = "",
+       disabletoc = "", -- remove toc in html
+       disablelimitreport = "",
+       disableeditsection = "",
+   },
+   -- There is an alternative for obtaining page's images:
+   -- prop=imageinfo&action=query&iiprop=url|dimensions|mime|extmetadata&generator=images&pageids=49448&iiurlwidth=100&iiextmetadatafilter=ImageDescription
+   -- but it gives all images (including wikipedia icons) in any order, without
+   -- any score or information that would help considering if they matter or not
+   --
+
+   -- Allow for disabling prettifying full page text
   wiki_prettify = G_reader_settings:nilOrTrue("wikipedia_prettify"),
+
+   -- Can be set so HTTP requests will be done under Trapper and
+   -- be interruptible
+   trap_widget = nil,
+   -- For actions done with Trapper:dismissable methods, we may throw
+   -- and error() with this code. We make the value of this error
+   -- accessible here so that caller can know it's a user dismiss.
+   dismissed_error_code = "Interrupted by user",
 }

 function Wikipedia:getWikiServer(lang)
    return string.format(self.wiki_server, lang or self.default_lang)
 end

+-- Codes that getUrlContent may get from requester.request()
+local TIMEOUT_CODE = "timeout" -- from socket.lua
+local MAXTIME_CODE = "maxtime reached" -- from sink_table_with_maxtime
+
+-- Sink that stores into a table, aborting if maxtime has elapsed
+local function sink_table_with_maxtime(t, maxtime)
+    -- Start counting as soon as this sink is created
+    local start_secs, start_usecs = ffiutil.gettime()
+    local starttime = start_secs + start_usecs/1000000
+    t = t or {}
+    local f = function(chunk, err)
+        local secs, usecs = ffiutil.gettime()
+        if secs + usecs/1000000 - starttime > maxtime then
+            return nil, MAXTIME_CODE
+        end
+        if chunk then table.insert(t, chunk) end
+        return 1
+    end
+    return f, t
+end
+
+-- Get URL content
+local function getUrlContent(url, timeout, maxtime)
+    local socket = require('socket')
+    local ltn12 = require('ltn12')
+    local http = require('socket.http')
+    local https = require('ssl.https')
+
+    local requester
+    if url:sub(1,7) == "http://" then
+        requester = http
+    elseif url:sub(1,8) == "https://" then
+        requester = https
+    else
+        return false, "Unsupported protocol"
+    end
+    if not timeout then timeout = 10 end
+    -- timeout needs to be set to 'http', even if we use 'https'
+    http.TIMEOUT, https.TIMEOUT = timeout, timeout
+
+    local request = {}
+    local sink = {}
+    request['url'] = url
+    request['method'] = 'GET'
+    -- 'timeout' delay works on socket, and is triggered when
+    -- that time has passed trying to connect, or after connection
+    -- when no data has been read for this time.
+    -- On a slow connection, it may not be triggered (as we could read
+    -- 1 byte every 1 second, not triggering any timeout).
+    -- 'maxtime' can be provided to overcome that, and we start counting
+    -- as soon as the first content byte is received (but it is checked
+    -- for only when data is received).
+    -- Setting 'maxtime' and 'timeout' gives more chance to abort the request when
+    -- it takes too much time (in the worst case: in timeout+maxtime seconds).
+    -- But time taken by DNS lookup cannot easily be accounted for, so
+    -- a request may (when dns lookup takes time) exceed timeout and maxtime...
+    if maxtime then
+        request['sink'] = sink_table_with_maxtime(sink, maxtime)
+    else
+        request['sink'] = ltn12.sink.table(sink)
+    end
+
+    local code, headers, status = socket.skip(1, requester.request(request))
+    local content = table.concat(sink) -- empty or content accumulated till now
+    -- logger.dbg("code:", code)
+    -- logger.dbg("headers:", headers)
+    -- logger.dbg("status:", status)
+    -- logger.dbg("#content:", #content)
+
+    if code == TIMEOUT_CODE or code == MAXTIME_CODE then
+        logger.warn("request interrupted:", code)
+        return false, code
+    end
+    if headers == nil then
+        logger.warn("No HTTP headers:", code, status)
+        return false, "Network or remote server unavailable"
+    end
+    if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK
+        logger.warn("HTTP status not okay:", code, status)
+        return false, "Remote server error or unavailable"
+    end
+    if headers and headers["content-length"] then
+        -- Check we really got the announced content size
+        local content_length = tonumber(headers["content-length"])
+        if #content ~= content_length then
+            return false, "Incomplete content received"
+        end
+    end
+    return true, content
+end
+
+function Wikipedia:setTrapWidget(trap_widget)
+    self.trap_widget = trap_widget
+end
+
+function Wikipedia:resetTrapWidget()
+    self.trap_widget = nil
+end
+
 -- Possible values for page_type parameter to loadPage()
 local WIKIPEDIA_INTRO = 1
 local WIKIPEDIA_FULL = 2
 local WIKIPEDIA_PHTML = 3
+local WIKIPEDIA_IMAGES = 4

 --[[
 --  return decoded JSON table from Wikipedia
 --]]
 function Wikipedia:loadPage(text, lang, page_type, plain)
-    local socket = require('socket')
    local url = require('socket.url')
-    local http = require('socket.http')
-    local https = require('ssl.https')
-    local ltn12 = require('ltn12')
-
-    local request, sink = {}, {}
    local query = ""
-
    local parsed = url.parse(self:getWikiServer(lang))
    parsed.path = self.wiki_path
    if page_type == WIKIPEDIA_INTRO then -- search query
@ -90,8 +214,8 @@ function Wikipedia:loadPage(text, lang, page_type, plain)
        end
        parsed.query = query .. "gsrsearch=" .. url.escape(text)
    elseif page_type == WIKIPEDIA_FULL then -- full page content
-        self.wiki_params.explaintext = plain and "" or nil
-        for k,v in pairs(self.wiki_params) do
+        self.wiki_full_params.explaintext = plain and "" or nil
+        for k,v in pairs(self.wiki_full_params) do
            query = string.format("%s%s=%s&", query, k, v)
        end
        parsed.query = query .. "titles=" .. url.escape(text)
@ -100,66 +224,90 @@ function Wikipedia:loadPage(text, lang, page_type, plain)
            query = string.format("%s%s=%s&", query, k, v)
        end
        parsed.query = query .. "page=" .. url.escape(text)
+    elseif page_type == WIKIPEDIA_IMAGES then -- images found in page html
+        for k,v in pairs(self.wiki_images_params) do
+            query = string.format("%s%s=%s&", query, k, v)
+        end
+        parsed.query = query .. "page=" .. url.escape(text)
    else
        return
    end

-    -- HTTP request
-    request['url'] = url.build(parsed)
-    request['method'] = 'GET'
-    request['sink'] = ltn12.sink.table(sink)
-    http.TIMEOUT, https.TIMEOUT = 10, 10
-    local httpRequest = parsed.scheme == 'http' and http.request or https.request
-    -- first argument returned by skip is code
-    local _, headers, status = socket.skip(1, httpRequest(request))
-
-    -- raise error message when network is unavailable
-    if headers == nil then
-        error("Network is unreachable")
+    local built_url = url.build(parsed)
+    local completed, success, content
+    if self.trap_widget then -- if previously set with Wikipedia:setTrapWidget()
+        local Trapper = require("ui/trapper")
+        local timeout, maxtime = 30, 60
+        -- We use dismissableRunInSubprocess with complex return values:
+        completed, success, content = Trapper:dismissableRunInSubprocess(function()
+            return getUrlContent(built_url, timeout, maxtime)
+        end, self.trap_widget)
+        if not completed then
+            error(self.dismissed_error_code) -- "Interrupted by user"
        end
-
-    if status ~= "HTTP/1.1 200 OK" then
-        logger.warn("HTTP status not okay:", status)
-        return
+    else
+        local timeout, maxtime = 10, 60
+        success, content = getUrlContent(built_url, timeout, maxtime)
+    end
+    if not success then
+        error(content)
    end

-    local content = table.concat(sink)
    if content ~= "" and string.sub(content, 1,1) == "{" then
        local ok, result = pcall(JSON.decode, content)
        if ok and result then
-            logger.dbg("wiki result", result)
+            logger.dbg("wiki result json:", result)
            return result
        else
-            logger.warn("wiki error:", result)
+            logger.warn("wiki result json decoding error:", result)
+            error("Failed decoding JSON")
        end
    else
-        logger.warn("not JSON from wiki response:", content)
+        logger.warn("wiki response is not json:", content)
+        error("Response is not JSON")
    end
 end

 -- search wikipedia and get intros for results
-function Wikipedia:wikintro(text, lang)
+function Wikipedia:searchAndGetIntros(text, lang)
    local result = self:loadPage(text, lang, WIKIPEDIA_INTRO, true)
    if result then
        local query = result.query
        if query then
+            local show_image = G_reader_settings:nilOrTrue("wikipedia_show_image")
+            -- Scale wikipedia normalized (we hope) thumbnail by 2 (adjusted
+            -- to screen size/dpi) for intros (and x8 more for highres image)
+            local image_size_factor = Screen:scaleBySize(200)/100.0
+            if show_image then
+                for pageid, page in pairs(query.pages) do
+                    self:addImages(page, lang, false, image_size_factor, 8)
+                end
+            end
            return query.pages
        end
    end
 end

 -- get full content of a wiki page
-function Wikipedia:wikifull(text, lang)
-    local result = self:loadPage(text, lang, WIKIPEDIA_FULL, true)
+function Wikipedia:getFullPage(wiki_title, lang)
+    local result = self:loadPage(wiki_title, lang, WIKIPEDIA_FULL, true)
    if result then
        local query = result.query
        if query then
-            if self.wiki_prettify then
-                -- Prettification of the plain text full page
+            local show_image = G_reader_settings:nilOrTrue("wikipedia_show_image")
+            local show_more_images = G_reader_settings:nilOrTrue("wikipedia_show_more_images")
+            -- Scale wikipedia normalized (we hope) thumbnails by 4 (adjusted
+            -- to screen size/dpi) for full page (and this *4 for highres image)
+            local image_size_factor = Screen:scaleBySize(400)/100.0
+            if self.wiki_prettify or show_image then
                for pageid, page in pairs(query.pages) do
-                    if page.extract then
+                    if self.wiki_prettify and page.extract then
+                        -- Prettification of the plain text full page
                        page.extract = self:prettifyText(page.extract)
                    end
+                    if show_image then
+                        self:addImages(page, lang, show_more_images, image_size_factor, 4)
+                    end
                end
            end
            return query.pages
@ -168,8 +316,8 @@ function Wikipedia:wikifull(text, lang)
 end

 -- get parsed html content and other infos of a wiki page
-function Wikipedia:wikiphtml(text, lang)
-    local result = self:loadPage(text, lang, WIKIPEDIA_PHTML, true)
+function Wikipedia:getFullPageHtml(wiki_title, lang)
+    local result = self:loadPage(wiki_title, lang, WIKIPEDIA_PHTML, true)
    if result and result.parse then
        return result.parse
    end
@ -178,6 +326,247 @@ function Wikipedia:wikiphtml(text, lang)
    end
 end

+-- get images extracted from parsed html
+function Wikipedia:getFullPageImages(wiki_title, lang)
+    local images = {} -- will be returned, each in a format similar to page.thumbnail
+    local result = self:loadPage(wiki_title, lang, WIKIPEDIA_IMAGES, true)
+    if result and result.parse and result.parse.text and result.parse.text["*"] then
+        local html = result.parse.text["*"] -- html content
+        local url = require('socket.url')
+        local wiki_base_url = self:getWikiServer(lang)
+
+        local thumbs = {} -- bits of HTML containing an image
+        -- We first try to catch images in <div class=thumbinner>, which should exclude
+        -- wikipedia icons, flags... These seem to all end with a double </div>.
+        for thtml in html:gmatch([[<div class="thumbinner".-</div>%s*</div>]]) do
+            table.insert(thumbs, thtml)
+        end
+        -- We then also try to catch images in galleries (which often are less
+        -- interesting than those in thumbinner) as a 2nd set.
+        for thtml in html:gmatch([[<li class="gallerybox".-<div class="thumb".-</div>%s*</div>%s*<div class="gallerytext">.-</div>%s*</div>]]) do
+            table.insert(thumbs, thtml)
+        end
+        -- We may miss some interesting images in the page's top right table, but
+        -- there's no easy way to distinguish them from icons/flags in this table...
+
+        for _, thtml in ipairs(thumbs) do
+            -- We get <a href="/wiki/File:real_file_name.jpg (or /wiki/Fichier:real_file_name.jpg
+            -- depending on Wikipedia lang)
+            local filename = thtml:match([[<a href="/wiki/[^:]*:([^"]*)" class="image"]])
+            if filename then
+                filename = url.unescape(filename)
+            end
+            logger.dbg("found image with filename:", filename)
+            -- logger.dbg(thtml)
+            local timg, tremain = thtml:match([[(<img .->)(.*)]])
+            if timg and tremain then
+                -- (Should we discard those without caption ?)
+                local caption = tremain and util.htmlToPlainText(tremain)
+                if caption == "" then caption = nil end
+                logger.dbg("  caption:", caption)
+                -- logger.dbg(timg)
+                local src = timg:match([[src="([^"]*)"]])
+                if src and src ~= "" then
+                    if src:sub(1,2) == "//" then
+                        src = "https:" .. src
+                    elseif src:sub(1,1) == "/" then -- non absolute url
+                        src = wiki_base_url .. src
+                    end
+                    local width = tonumber(timg:match([[width="([^"]*)"]]))
+                    local height = tonumber(timg:match([[height="([^"]*)"]]))
+                    -- Ignore img without width and height, which should exlude
+                    -- javascript maps and other unsupported stuff
+                    if width and height then
+                        -- Images in the html we got seem to be x4.5 the size of
+                        -- the thumbnail we get with searchAndGetIntros() or
+                        -- getFullPage(). Normalize them to the size of the thumbnail,
+                        -- so we can resize them all later with the same rules.
+                        width = math.ceil(width/4.5)
+                        height = math.ceil(height/4.5)
+                        -- No need to adjust width in src url here, as it will be
+                        -- done in addImages() anyway
+                        -- src = src:gsub("(.*/)%d+(px-[^/]*)", "%1"..width.."%2")
+                        logger.dbg("  size:", width, "x", height, "url:", src)
+                        table.insert(images, {
+                            source = src,
+                            width = width,
+                            height = height,
+                            filename = filename,
+                            caption = caption,
+                        })
+                    end
+                end
+            end
+        end
+    end
+    return images
+end
+
+-- Function wrapped and plugged to image objects returned by :addImages()
+local function image_load_bb_func(image, highres)
+    local source, trap_widget
+    if not highres then
+        -- We use an invisible widget that will resend the dismiss event,
+        -- so that image loading in TextBoxWdiget is unobtrusive and
+        -- interruptible
+        trap_widget = false
+        source = image.source
+    else
+        -- We need to let the user know image loading is happening,
+        -- with a discreet TrapWidget
+        trap_widget = _("Loading high-res image… (tap to cancel)")
+        source = image.hi_source
+    end
+    -- Image may be big or take some time to be resized on wikipedia servers.
+    -- As we use dismissableRunInSubprocess and can interrupt this loading,
+    -- we can use quite high timeouts
+    local timeout, maxtime = 60, 120
+
+    logger.dbg("fetching", source)
+    local Trapper = require("ui/trapper")
+    -- We use dismissableRunInSubprocess with simple string return value to
+    -- avoid dump()/load() a long string of image bytes
+    local completed, data = Trapper:dismissableRunInSubprocess(function()
+        local success, data = getUrlContent(source, timeout, maxtime)
+        -- With simple string value, we're not able to return the failure
+        -- reason, so log it here
+        if not success then
+            logger.warn("failed fetching image from", source, ":", data)
+        end
+        return success and data or nil
+    end, trap_widget, true) -- task_returns_simple_string=true
+
+    local success = data and true or false -- guess success from data
+
+    if not completed then
+        logger.dbg("image fetching interrupted by user")
+        return true -- let caller know it was interrupted
+    end
+    if not success then
+        -- log it again (on Android, log from sub-process seem to not work)
+        logger.warn("failed fetching image from", source)
+        return
+    end
+    logger.dbg(" fetched", #data)
+
+    -- Use mupdf to render image to blitbuffer
+    local mupdf = require("ffi/mupdf")
+    local ok, bb_or_error
+    if not highres then
+        -- For low-res, we should ensure the image we got from wikipedia is
+        -- the right size, so it does not overflow our reserved area
+        -- (TextBoxWidget may have adjusted image.width and height)
+        ok, bb_or_error = pcall(mupdf.renderImage, data, #data, image.width, image.height)
+    else
+        -- No need for width and height for high-res
+        ok, bb_or_error = pcall(mupdf.renderImage, data, #data)
+    end
+    if not ok then
+        logger.warn("failed building image from", source, ":", bb_or_error)
+        return
+    end
+    if not highres then
+        image.bb = bb_or_error
+    else
+        image.hi_bb = bb_or_error
+    end
+end
+
+function Wikipedia:addImages(page, lang, more_images, image_size_factor, hi_image_size_factor)
+    -- List of images, table with keys as expected by TextBoxWidget
+    page.images = {}
+    -- List of wikipedia images data structures (page.thumbnail and images
+    -- extracted from html) made to have the same keys for common processing
+    local wimages = {}
+
+    -- We got what Wikipedia scored as the most interesting image for this
+    -- page in page.thumbnail, and its filename in page.pageimage, ie:
+    --  "thumbnail": {
+    --    "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/Reading_on_the_bus_train_or_transit.jpg/37px-Reading_on_the_bus_train_or_transit.jpg",
+    --    "width": 37,
+    --    "height": 50
+    --  },
+    --  "pageimage": "Reading_on_the_bus_train_or_transit.jpg"
+    --
+    local first_image_filename = nil
+    if page.thumbnail and page.thumbnail.source then
+        page.thumbnail.filename = page.pageimage
+        first_image_filename = page.pageimage
+        table.insert(wimages, page.thumbnail)
+    end
+    -- To get more images, we need to make a second request to wikipedia
+    if more_images then
+        local ok, images_or_err = pcall(Wikipedia.getFullPageImages, Wikipedia, page.title, lang)
+        if not ok then
+            logger.warn("error getting more images", images_or_err)
+        else
+            for _, wimage in ipairs(images_or_err) do
+                if first_image_filename and wimage.filename == first_image_filename then
+                    -- We got the same image as the thumbnail one, but it may have
+                    -- a caption: replace thumbnail one with this one
+                    table.remove(wimages, 1)
+                    table.insert(wimages, 1, wimage)
+                else
+                    table.insert(wimages, wimage)
+                end
+            end
+        end
+    end
+
+    -- All our wimages now have the keys: source, width, height, filename, caption
+    for _, wimage in ipairs(wimages) do
+        -- We trust wikipedia, and our x4.5 factor in :getFullPageImages(), for adequate
+        -- and homogeneous images' sizes. We'll just scale them according to the
+        -- provided 'image_size_factor' (which should account for screen size/DPI)
+        local width = wimage.width or 100 -- in case we don't get any width or height
+        local height = wimage.height or 100
+        -- Give a little boost in size to thin images
+        if width < height / 2 or height < width / 2 then
+            width = width * 1.3
+            height = height * 1.3
+        end
+        width = math.ceil(width * image_size_factor)
+        height = math.ceil(height * image_size_factor)
+        -- All wikipedia image urls like .../wikipedia/commons/A/BC/<filename>
+        -- or .../wikipedia/commons/thumb/A/BC/<filename>/<width>px-<filename>
+        -- can be transformed to another url with a requested new_width with the form:
+        --   /wikipedia/commons/thumb/A/BC/<filename>/<new_width>px-<filename>
+        -- (Additionally, the image format can be changed by appending .png,
+        -- .jpg or .gif to it)
+        -- The resize is so done on Wikipedia servers from the source image for
+        -- the best quality.
+        local source = wimage.source:gsub("(.*/)%d+(px-[^/]*)", "%1"..width.."%2")
+        -- We build values for a high resolution version of the image, to be displayed
+        -- with ImageViewer (x 4 by default)
+        local hi_width = width * (hi_image_size_factor or 4)
+        local hi_height = height * (hi_image_size_factor or 4)
+        local hi_source = wimage.source:gsub("(.*/)%d+(px-[^/]*)", "%1"..hi_width.."%2")
+        local title = wimage.filename
+        if title then
+            title = title:gsub("_", " ")
+        end
+        local image = {
+            -- As expected by TextBoxWidget (with additional source and
+            -- hi_source, that will be used by load_bb_func)
+            title = title,
+            caption = wimage.caption,
+            source = source,
+            width = width,
+            height = height,
+            bb = nil, -- will be loaded and build only if needed
+            hi_source = hi_source,
+            hi_width = hi_width,
+            hi_height = hi_height,
+            hi_bb = nil, -- will be loaded and build only if needed
+        }
+        -- If bb or hi_bb is nil, TextBoxWidget will call a method named "load_bb_func"
+        image.load_bb_func = function(highres)
+            return image_load_bb_func(image, highres)
+        end
+        table.insert(page.images, image)
+    end
+end
+
 -- UTF8 of unicode geometrical shapes we can use to replace
 -- the "=== title ===" of wkipedia plaintext pages
 -- These chosen ones are available in most fonts (prettier symbols
@ -218,38 +607,6 @@ function Wikipedia:prettifyText(text)
 end


-local function getUrlContent(url, timeout)
-    local socket = require('socket')
-    local ltn12 = require('ltn12')
-    local requester
-    if url:sub(1,7) == "http://" then
-        requester = require('socket.http')
-    elseif url:sub(1,8) == "https://" then
-        requester = require('ssl.https')
-    else
-        return false, "Unsupported protocol"
-    end
-    requester.TIMEOUT = timeout or 10
-    local request = {}
-    local sink = {}
-    request['url'] = url
-    request['method'] = 'GET'
-    request['sink'] = ltn12.sink.table(sink)
-    -- first argument returned by skip is code
-    local _, headers, status = socket.skip(1, requester.request(request))
-
-    if headers == nil then
-        logger.warn("No HTTP headers")
-        return false, "Network unavailable"
-    end
-    if status ~= "HTTP/1.1 200 OK" then
-        logger.warn("HTTP status not okay:", status)
-        return false, "Network unavailable"
-    end
-
-    return true, table.concat(sink)
-end
-
 -- UTF8 of unicode geometrical shapes we'll prepend to wikipedia section headers,
 -- to help identifying hierarchy (othewise, the small font size differences helps).
 -- Best if identical to the ones used above for prettifying full plain text page.
@ -292,12 +649,12 @@ function Wikipedia:createEpub(epub_path, page, lang, with_images)
    -- Trapper:info() and Trapper:confirm() will just use logger.
    local UI = require("ui/trapper")

-    UI:info(_("Fetching Wikipedia page…"))
-    local ok, phtml = pcall(self.wikiphtml, self, page, lang)
+    UI:info(_("Retrieving Wikipedia article…"))
+    local ok, phtml = pcall(self.getFullPageHtml, self, page, lang)
    if not ok then
        UI:info(phtml) -- display error in InfoMessage
        -- Sleep a bit to make that error seen
-        util.sleep(2)
+        ffiutil.sleep(2)
        UI:reset()
        return false
    end
@ -403,13 +760,13 @@ function Wikipedia:createEpub(epub_path, page, lang, with_images)
    if with_images then
        -- If no UI (Trapper:wrap() not called), UI:confirm() will answer true
        if #images > 0 then
-            include_images = UI:confirm(T(_("The page contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include"))
+            include_images = UI:confirm(T(_("This article contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include"))
            if include_images then
                use_img_2x = UI:confirm(_("Would you like to use slightly higher quality images? This will result in a bigger file size."), _("Standard quality"), _("Higher quality"))
            end
        else
-            UI:info(_("The page does not contain any images."))
-            util.sleep(1) -- Let the user see that
+            UI:info(_("This article does not contain any images."))
+            ffiutil.sleep(1) -- Let the user see that
        end
    end
    if not include_images then
@ -568,6 +925,10 @@ div.thumb {
 ul, ol {
    margin-left: 0em;
 }
+/* avoid a line with a standalone bullet */
+li.gallerybox {
+    display: inline;
+}
 /* helps crengine to not display them as block elements */
 time, abbr, sup {
    display: inline;
@ -655,6 +1016,10 @@ time, abbr, sup {
    -- external link for us, so let's remove this link.
    html = html:gsub("<a[^>]*>%s*(<%s*img [^>]*>)%s*</a>", "%1")

+    -- TODO: do something for <li class="gallerybox"...> so they are no more
+    -- a <li> (crengine displays them one above the other) and can be displayed
+    -- side by side
+
    -- For some <div class="thumb tright"> , which include nested divs, although
    -- perfectly balanced, crengine seems to miss some closing </div> and we
    -- end up having our image bordered box including the remaining main wiki text.
@ -771,7 +1136,7 @@ time, abbr, sup {
            -- Process can be interrupted at this point between each image download
            -- by tapping while the InfoMessage is displayed
            -- We use the fast_refresh option from image #2 for a quicker download
-            local go_on = UI:info(T(_("Fetching image %1 / %2 …"), inum, nb_images), inum >= 2)
+            local go_on = UI:info(T(_("Retrieving image %1 / %2 …"), inum, nb_images), inum >= 2)
            if not go_on then
                cancelled = true
                break
@ -813,7 +1178,7 @@ time, abbr, sup {
    end
    epub:close()
    -- This was nearly a no-op, so sleep a bit to make that progress step seen
-    util.usleep(300000)
+    ffiutil.usleep(300000)
    UI:reset() -- close last InfoMessage

    if cancelled then