Wikipedia: show images and allow interrupting queries

Reword some english messages
pull/3617/head
poire-z 6 years ago
parent 43ad2cef99
commit 775b7a364f

@ -6,6 +6,7 @@ local KeyValuePage = require("ui/widget/keyvaluepage")
local LuaData = require("luadata")
local NetworkMgr = require("ui/network/manager")
local ReaderDictionary = require("apps/reader/modules/readerdictionary")
local Trapper = require("ui/trapper")
local Translator = require("ui/translator")
local UIManager = require("ui/uimanager")
local Wikipedia = require("ui/wikipedia")
@ -21,7 +22,6 @@ local ReaderWikipedia = ReaderDictionary:extend{
-- identify itself
is_wiki = true,
wiki_languages = {},
no_page = _("No wiki page found."),
disable_history = G_reader_settings:isTrue("wikipedia_disable_history"),
}
@ -271,7 +271,29 @@ function ReaderWikipedia:addToMainMenu(menu_items)
end,
})
end,
}
separator = true,
},
{ -- setting used in wikipedia.lua
text = _("Show image in search results"),
checked_func = function()
return G_reader_settings:nilOrTrue("wikipedia_show_image")
end,
callback = function()
G_reader_settings:flipNilOrTrue("wikipedia_show_image")
end,
},
{ -- setting used in wikipedia.lua
text = _("Show more images in full article"),
enabled_func = function()
return G_reader_settings:nilOrTrue("wikipedia_show_image")
end,
checked_func = function()
return G_reader_settings:nilOrTrue("wikipedia_show_more_images") and G_reader_settings:nilOrTrue("wikipedia_show_image")
end,
callback = function()
G_reader_settings:flipNilOrTrue("wikipedia_show_more_images")
end,
},
}
}
end
@ -319,6 +341,14 @@ function ReaderWikipedia:initLanguages(word)
end
function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
-- Wrapped through Trapper, as we may be using Trapper:dismissableRunInSubprocess() in it
Trapper:wrap(function()
self:lookupWikipedia(word, box, get_fullpage, forced_lang)
end)
return true
end
function ReaderWikipedia:lookupWikipedia(word, box, get_fullpage, forced_lang)
if not NetworkMgr:isOnline() then
NetworkMgr:promptWifiOn()
return
@ -358,19 +388,35 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
})
end
-- Fix lookup message to include lang
-- Fix lookup message to include lang and set appropriate error texts
local no_result_text, req_failure_text
if get_fullpage then
self.lookup_msg = T(_("Getting Wikipedia %2 page:\n%1"), "%1", lang:upper())
self.lookup_msg = T(_("Retrieving Wikipedia %2 article:\n%1"), "%1", lang:upper())
req_failure_text = _("Failed to retrieve Wikipedia article.")
no_result_text = _("Wikipedia article not found.")
else
self.lookup_msg = T(_("Searching Wikipedia %2 for:\n%1"), "%1", lang:upper())
req_failure_text = _("Failed searching Wikipedia.")
no_result_text = _("No Wikipedia articles matching search term.")
end
self:showLookupInfo(display_word)
local results = {}
local ok, pages
local lookup_cancelled = false
Wikipedia:setTrapWidget(self.lookup_progress_msg)
if get_fullpage then
ok, pages = pcall(Wikipedia.wikifull, Wikipedia, word, lang)
ok, pages = pcall(Wikipedia.getFullPage, Wikipedia, word, lang)
else
ok, pages = pcall(Wikipedia.wikintro, Wikipedia, word, lang)
ok, pages = pcall(Wikipedia.searchAndGetIntros, Wikipedia, word, lang)
end
Wikipedia:resetTrapWidget()
if not ok and pages and string.find(pages, Wikipedia.dismissed_error_code) then
-- So we can display an alternate dummy result
lookup_cancelled = true
-- Or we could just not show anything with:
-- self:dismissLookupInfo()
-- return
end
if ok and pages then
-- sort pages according to 'index' attribute if present (not present
@ -387,14 +433,14 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
pages = sorted_pages
end
for pageid, page in pairs(pages) do
local definition = page.extract or self.no_page
local definition = page.extract or no_result_text
if page.length then
-- we get 'length' only for intro results
-- let's append it to definition so we know
-- how big/valuable the full page is
local fullkb = math.ceil(page.length/1024)
local more_factor = math.ceil( page.length / (1+definition:len()) ) -- +1 just in case len()=0
definition = definition .. "\n" .. T(_("(full page : %1 kB, = %2 x this intro length)"), fullkb, more_factor)
definition = definition .. "\n" .. T(_("(full article : %1 kB, = %2 x this intro length)"), fullkb, more_factor)
end
local result = {
dict = T(_("Wikipedia %1"), lang:upper()),
@ -402,18 +448,27 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
definition = definition,
is_fullpage = get_fullpage,
lang = lang,
images = page.images,
}
table.insert(results, result)
end
-- logger.dbg of results will be done by ReaderDictionary:showDict()
else
logger.dbg("error:", pages)
-- dummy results
local definition
if lookup_cancelled then
definition = _("Wikipedia request canceled.")
elseif ok then
definition = no_result_text
else
definition = req_failure_text
logger.dbg("error:", pages)
end
results = {
{
dict = T(_("Wikipedia %1"), lang:upper()),
word = word,
definition = self.no_page,
definition = definition,
is_fullpage = get_fullpage,
lang = lang,
}

@ -1,8 +1,10 @@
local JSON = require("json")
local Screen = require("device").screen
local ffiutil = require("ffi/util")
local logger = require("logger")
local util = require("ffi/util")
local util = require("util")
local _ = require("gettext")
local T = require("ffi/util").template
local T = ffiutil.template
--[[
-- Query wikipedia using Wikimedia Web API.
@ -18,18 +20,9 @@ local T = require("ffi/util").template
local Wikipedia = {
wiki_server = "https://%s.wikipedia.org",
wiki_path = "/w/api.php",
wiki_params = {
action = "query",
prop = "extracts",
format = "json",
-- exintro = nil, -- get more than only the intro
explaintext = "",
redirects = "",
-- title = nil, -- text to lookup, will be added below
},
default_lang = "en",
-- Search query for better results
-- see https://www.mediawiki.org/wiki/API:Main_page
-- See https://www.mediawiki.org/wiki/API:Main_page for details.
-- Search query, returns introductory texts (+ main thumbnail image)
wiki_search_params = {
action = "query",
generator = "search",
@ -37,7 +30,7 @@ local Wikipedia = {
-- gsrsearch = nil, -- text to lookup, will be added below
gsrlimit = 20, -- max nb of results to get
exlimit = "max",
prop = "extracts|info", -- 'extracts' to get text, 'info' to get full page length
prop = "extracts|info|pageimages", -- 'extracts' to get text, 'info' to get full page length
format = "json",
explaintext = "",
exintro = "",
@ -45,6 +38,17 @@ local Wikipedia = {
-- (otherwise, we get the full text for only the first result, and
-- no text at all for the others
},
-- Full article, parsed to output text (+ main thumbnail image)
wiki_full_params = {
action = "query",
prop = "extracts|pageimages",
format = "json",
-- exintro = nil, -- get more than only the intro
explaintext = "",
redirects = "",
-- title = nil, -- text to lookup, will be added below
},
-- Full article, parsed to output HTML, for Save as EPUB
wiki_phtml_params = {
action = "parse",
format = "json",
@ -55,32 +59,152 @@ local Wikipedia = {
disablelimitreport = "",
disableeditsection = "",
},
-- allow for disabling prettifying full page text
-- Full article, parsed to output HTML, for images extraction
-- (used with full article as text, if "show more images" enabled)
wiki_images_params = { -- same as previous one, with just text html
action = "parse",
format = "json",
-- we only need the following informations
prop = "text",
-- page = nil, -- text to lookup, will be added below
redirects = "",
disabletoc = "", -- remove toc in html
disablelimitreport = "",
disableeditsection = "",
},
-- There is an alternative for obtaining page's images:
-- prop=imageinfo&action=query&iiprop=url|dimensions|mime|extmetadata&generator=images&pageids=49448&iiurlwidth=100&iiextmetadatafilter=ImageDescription
-- but it gives all images (including wikipedia icons) in any order, without
-- any score or information that would help considering if they matter or not
--
-- Allow for disabling prettifying full page text
wiki_prettify = G_reader_settings:nilOrTrue("wikipedia_prettify"),
-- Can be set so HTTP requests will be done under Trapper and
-- be interruptible
trap_widget = nil,
-- For actions done with Trapper:dismissable methods, we may throw
-- and error() with this code. We make the value of this error
-- accessible here so that caller can know it's a user dismiss.
dismissed_error_code = "Interrupted by user",
}
function Wikipedia:getWikiServer(lang)
return string.format(self.wiki_server, lang or self.default_lang)
end
-- Codes that getUrlContent may get from requester.request()
local TIMEOUT_CODE = "timeout" -- from socket.lua
local MAXTIME_CODE = "maxtime reached" -- from sink_table_with_maxtime
-- Sink that stores into a table, aborting if maxtime has elapsed
local function sink_table_with_maxtime(t, maxtime)
-- Start counting as soon as this sink is created
local start_secs, start_usecs = ffiutil.gettime()
local starttime = start_secs + start_usecs/1000000
t = t or {}
local f = function(chunk, err)
local secs, usecs = ffiutil.gettime()
if secs + usecs/1000000 - starttime > maxtime then
return nil, MAXTIME_CODE
end
if chunk then table.insert(t, chunk) end
return 1
end
return f, t
end
-- Get URL content
local function getUrlContent(url, timeout, maxtime)
local socket = require('socket')
local ltn12 = require('ltn12')
local http = require('socket.http')
local https = require('ssl.https')
local requester
if url:sub(1,7) == "http://" then
requester = http
elseif url:sub(1,8) == "https://" then
requester = https
else
return false, "Unsupported protocol"
end
if not timeout then timeout = 10 end
-- timeout needs to be set to 'http', even if we use 'https'
http.TIMEOUT, https.TIMEOUT = timeout, timeout
local request = {}
local sink = {}
request['url'] = url
request['method'] = 'GET'
-- 'timeout' delay works on socket, and is triggered when
-- that time has passed trying to connect, or after connection
-- when no data has been read for this time.
-- On a slow connection, it may not be triggered (as we could read
-- 1 byte every 1 second, not triggering any timeout).
-- 'maxtime' can be provided to overcome that, and we start counting
-- as soon as the first content byte is received (but it is checked
-- for only when data is received).
-- Setting 'maxtime' and 'timeout' gives more chance to abort the request when
-- it takes too much time (in the worst case: in timeout+maxtime seconds).
-- But time taken by DNS lookup cannot easily be accounted for, so
-- a request may (when dns lookup takes time) exceed timeout and maxtime...
if maxtime then
request['sink'] = sink_table_with_maxtime(sink, maxtime)
else
request['sink'] = ltn12.sink.table(sink)
end
local code, headers, status = socket.skip(1, requester.request(request))
local content = table.concat(sink) -- empty or content accumulated till now
-- logger.dbg("code:", code)
-- logger.dbg("headers:", headers)
-- logger.dbg("status:", status)
-- logger.dbg("#content:", #content)
if code == TIMEOUT_CODE or code == MAXTIME_CODE then
logger.warn("request interrupted:", code)
return false, code
end
if headers == nil then
logger.warn("No HTTP headers:", code, status)
return false, "Network or remote server unavailable"
end
if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK
logger.warn("HTTP status not okay:", code, status)
return false, "Remote server error or unavailable"
end
if headers and headers["content-length"] then
-- Check we really got the announced content size
local content_length = tonumber(headers["content-length"])
if #content ~= content_length then
return false, "Incomplete content received"
end
end
return true, content
end
function Wikipedia:setTrapWidget(trap_widget)
self.trap_widget = trap_widget
end
function Wikipedia:resetTrapWidget()
self.trap_widget = nil
end
-- Possible values for page_type parameter to loadPage()
local WIKIPEDIA_INTRO = 1
local WIKIPEDIA_FULL = 2
local WIKIPEDIA_PHTML = 3
local WIKIPEDIA_IMAGES = 4
--[[
-- return decoded JSON table from Wikipedia
--]]
function Wikipedia:loadPage(text, lang, page_type, plain)
local socket = require('socket')
local url = require('socket.url')
local http = require('socket.http')
local https = require('ssl.https')
local ltn12 = require('ltn12')
local request, sink = {}, {}
local query = ""
local parsed = url.parse(self:getWikiServer(lang))
parsed.path = self.wiki_path
if page_type == WIKIPEDIA_INTRO then -- search query
@ -90,8 +214,8 @@ function Wikipedia:loadPage(text, lang, page_type, plain)
end
parsed.query = query .. "gsrsearch=" .. url.escape(text)
elseif page_type == WIKIPEDIA_FULL then -- full page content
self.wiki_params.explaintext = plain and "" or nil
for k,v in pairs(self.wiki_params) do
self.wiki_full_params.explaintext = plain and "" or nil
for k,v in pairs(self.wiki_full_params) do
query = string.format("%s%s=%s&", query, k, v)
end
parsed.query = query .. "titles=" .. url.escape(text)
@ -100,66 +224,90 @@ function Wikipedia:loadPage(text, lang, page_type, plain)
query = string.format("%s%s=%s&", query, k, v)
end
parsed.query = query .. "page=" .. url.escape(text)
elseif page_type == WIKIPEDIA_IMAGES then -- images found in page html
for k,v in pairs(self.wiki_images_params) do
query = string.format("%s%s=%s&", query, k, v)
end
parsed.query = query .. "page=" .. url.escape(text)
else
return
end
-- HTTP request
request['url'] = url.build(parsed)
request['method'] = 'GET'
request['sink'] = ltn12.sink.table(sink)
http.TIMEOUT, https.TIMEOUT = 10, 10
local httpRequest = parsed.scheme == 'http' and http.request or https.request
-- first argument returned by skip is code
local _, headers, status = socket.skip(1, httpRequest(request))
-- raise error message when network is unavailable
if headers == nil then
error("Network is unreachable")
local built_url = url.build(parsed)
local completed, success, content
if self.trap_widget then -- if previously set with Wikipedia:setTrapWidget()
local Trapper = require("ui/trapper")
local timeout, maxtime = 30, 60
-- We use dismissableRunInSubprocess with complex return values:
completed, success, content = Trapper:dismissableRunInSubprocess(function()
return getUrlContent(built_url, timeout, maxtime)
end, self.trap_widget)
if not completed then
error(self.dismissed_error_code) -- "Interrupted by user"
end
else
local timeout, maxtime = 10, 60
success, content = getUrlContent(built_url, timeout, maxtime)
end
if status ~= "HTTP/1.1 200 OK" then
logger.warn("HTTP status not okay:", status)
return
if not success then
error(content)
end
local content = table.concat(sink)
if content ~= "" and string.sub(content, 1,1) == "{" then
local ok, result = pcall(JSON.decode, content)
if ok and result then
logger.dbg("wiki result", result)
logger.dbg("wiki result json:", result)
return result
else
logger.warn("wiki error:", result)
logger.warn("wiki result json decoding error:", result)
error("Failed decoding JSON")
end
else
logger.warn("not JSON from wiki response:", content)
logger.warn("wiki response is not json:", content)
error("Response is not JSON")
end
end
-- search wikipedia and get intros for results
function Wikipedia:wikintro(text, lang)
function Wikipedia:searchAndGetIntros(text, lang)
local result = self:loadPage(text, lang, WIKIPEDIA_INTRO, true)
if result then
local query = result.query
if query then
local show_image = G_reader_settings:nilOrTrue("wikipedia_show_image")
-- Scale wikipedia normalized (we hope) thumbnail by 2 (adjusted
-- to screen size/dpi) for intros (and x8 more for highres image)
local image_size_factor = Screen:scaleBySize(200)/100.0
if show_image then
for pageid, page in pairs(query.pages) do
self:addImages(page, lang, false, image_size_factor, 8)
end
end
return query.pages
end
end
end
-- get full content of a wiki page
function Wikipedia:wikifull(text, lang)
local result = self:loadPage(text, lang, WIKIPEDIA_FULL, true)
function Wikipedia:getFullPage(wiki_title, lang)
local result = self:loadPage(wiki_title, lang, WIKIPEDIA_FULL, true)
if result then
local query = result.query
if query then
if self.wiki_prettify then
-- Prettification of the plain text full page
local show_image = G_reader_settings:nilOrTrue("wikipedia_show_image")
local show_more_images = G_reader_settings:nilOrTrue("wikipedia_show_more_images")
-- Scale wikipedia normalized (we hope) thumbnails by 4 (adjusted
-- to screen size/dpi) for full page (and this *4 for highres image)
local image_size_factor = Screen:scaleBySize(400)/100.0
if self.wiki_prettify or show_image then
for pageid, page in pairs(query.pages) do
if page.extract then
if self.wiki_prettify and page.extract then
-- Prettification of the plain text full page
page.extract = self:prettifyText(page.extract)
end
if show_image then
self:addImages(page, lang, show_more_images, image_size_factor, 4)
end
end
end
return query.pages
@ -168,8 +316,8 @@ function Wikipedia:wikifull(text, lang)
end
-- get parsed html content and other infos of a wiki page
function Wikipedia:wikiphtml(text, lang)
local result = self:loadPage(text, lang, WIKIPEDIA_PHTML, true)
function Wikipedia:getFullPageHtml(wiki_title, lang)
local result = self:loadPage(wiki_title, lang, WIKIPEDIA_PHTML, true)
if result and result.parse then
return result.parse
end
@ -178,6 +326,247 @@ function Wikipedia:wikiphtml(text, lang)
end
end
-- get images extracted from parsed html
function Wikipedia:getFullPageImages(wiki_title, lang)
local images = {} -- will be returned, each in a format similar to page.thumbnail
local result = self:loadPage(wiki_title, lang, WIKIPEDIA_IMAGES, true)
if result and result.parse and result.parse.text and result.parse.text["*"] then
local html = result.parse.text["*"] -- html content
local url = require('socket.url')
local wiki_base_url = self:getWikiServer(lang)
local thumbs = {} -- bits of HTML containing an image
-- We first try to catch images in <div class=thumbinner>, which should exclude
-- wikipedia icons, flags... These seem to all end with a double </div>.
for thtml in html:gmatch([[<div class="thumbinner".-</div>%s*</div>]]) do
table.insert(thumbs, thtml)
end
-- We then also try to catch images in galleries (which often are less
-- interesting than those in thumbinner) as a 2nd set.
for thtml in html:gmatch([[<li class="gallerybox".-<div class="thumb".-</div>%s*</div>%s*<div class="gallerytext">.-</div>%s*</div>]]) do
table.insert(thumbs, thtml)
end
-- We may miss some interesting images in the page's top right table, but
-- there's no easy way to distinguish them from icons/flags in this table...
for _, thtml in ipairs(thumbs) do
-- We get <a href="/wiki/File:real_file_name.jpg (or /wiki/Fichier:real_file_name.jpg
-- depending on Wikipedia lang)
local filename = thtml:match([[<a href="/wiki/[^:]*:([^"]*)" class="image"]])
if filename then
filename = url.unescape(filename)
end
logger.dbg("found image with filename:", filename)
-- logger.dbg(thtml)
local timg, tremain = thtml:match([[(<img .->)(.*)]])
if timg and tremain then
-- (Should we discard those without caption ?)
local caption = tremain and util.htmlToPlainText(tremain)
if caption == "" then caption = nil end
logger.dbg(" caption:", caption)
-- logger.dbg(timg)
local src = timg:match([[src="([^"]*)"]])
if src and src ~= "" then
if src:sub(1,2) == "//" then
src = "https:" .. src
elseif src:sub(1,1) == "/" then -- non absolute url
src = wiki_base_url .. src
end
local width = tonumber(timg:match([[width="([^"]*)"]]))
local height = tonumber(timg:match([[height="([^"]*)"]]))
-- Ignore img without width and height, which should exlude
-- javascript maps and other unsupported stuff
if width and height then
-- Images in the html we got seem to be x4.5 the size of
-- the thumbnail we get with searchAndGetIntros() or
-- getFullPage(). Normalize them to the size of the thumbnail,
-- so we can resize them all later with the same rules.
width = math.ceil(width/4.5)
height = math.ceil(height/4.5)
-- No need to adjust width in src url here, as it will be
-- done in addImages() anyway
-- src = src:gsub("(.*/)%d+(px-[^/]*)", "%1"..width.."%2")
logger.dbg(" size:", width, "x", height, "url:", src)
table.insert(images, {
source = src,
width = width,
height = height,
filename = filename,
caption = caption,
})
end
end
end
end
end
return images
end
-- Function wrapped and plugged to image objects returned by :addImages()
local function image_load_bb_func(image, highres)
local source, trap_widget
if not highres then
-- We use an invisible widget that will resend the dismiss event,
-- so that image loading in TextBoxWdiget is unobtrusive and
-- interruptible
trap_widget = false
source = image.source
else
-- We need to let the user know image loading is happening,
-- with a discreet TrapWidget
trap_widget = _("Loading high-res image… (tap to cancel)")
source = image.hi_source
end
-- Image may be big or take some time to be resized on wikipedia servers.
-- As we use dismissableRunInSubprocess and can interrupt this loading,
-- we can use quite high timeouts
local timeout, maxtime = 60, 120
logger.dbg("fetching", source)
local Trapper = require("ui/trapper")
-- We use dismissableRunInSubprocess with simple string return value to
-- avoid dump()/load() a long string of image bytes
local completed, data = Trapper:dismissableRunInSubprocess(function()
local success, data = getUrlContent(source, timeout, maxtime)
-- With simple string value, we're not able to return the failure
-- reason, so log it here
if not success then
logger.warn("failed fetching image from", source, ":", data)
end
return success and data or nil
end, trap_widget, true) -- task_returns_simple_string=true
local success = data and true or false -- guess success from data
if not completed then
logger.dbg("image fetching interrupted by user")
return true -- let caller know it was interrupted
end
if not success then
-- log it again (on Android, log from sub-process seem to not work)
logger.warn("failed fetching image from", source)
return
end
logger.dbg(" fetched", #data)
-- Use mupdf to render image to blitbuffer
local mupdf = require("ffi/mupdf")
local ok, bb_or_error
if not highres then
-- For low-res, we should ensure the image we got from wikipedia is
-- the right size, so it does not overflow our reserved area
-- (TextBoxWidget may have adjusted image.width and height)
ok, bb_or_error = pcall(mupdf.renderImage, data, #data, image.width, image.height)
else
-- No need for width and height for high-res
ok, bb_or_error = pcall(mupdf.renderImage, data, #data)
end
if not ok then
logger.warn("failed building image from", source, ":", bb_or_error)
return
end
if not highres then
image.bb = bb_or_error
else
image.hi_bb = bb_or_error
end
end
function Wikipedia:addImages(page, lang, more_images, image_size_factor, hi_image_size_factor)
-- List of images, table with keys as expected by TextBoxWidget
page.images = {}
-- List of wikipedia images data structures (page.thumbnail and images
-- extracted from html) made to have the same keys for common processing
local wimages = {}
-- We got what Wikipedia scored as the most interesting image for this
-- page in page.thumbnail, and its filename in page.pageimage, ie:
-- "thumbnail": {
-- "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/Reading_on_the_bus_train_or_transit.jpg/37px-Reading_on_the_bus_train_or_transit.jpg",
-- "width": 37,
-- "height": 50
-- },
-- "pageimage": "Reading_on_the_bus_train_or_transit.jpg"
--
local first_image_filename = nil
if page.thumbnail and page.thumbnail.source then
page.thumbnail.filename = page.pageimage
first_image_filename = page.pageimage
table.insert(wimages, page.thumbnail)
end
-- To get more images, we need to make a second request to wikipedia
if more_images then
local ok, images_or_err = pcall(Wikipedia.getFullPageImages, Wikipedia, page.title, lang)
if not ok then
logger.warn("error getting more images", images_or_err)
else
for _, wimage in ipairs(images_or_err) do
if first_image_filename and wimage.filename == first_image_filename then
-- We got the same image as the thumbnail one, but it may have
-- a caption: replace thumbnail one with this one
table.remove(wimages, 1)
table.insert(wimages, 1, wimage)
else
table.insert(wimages, wimage)
end
end
end
end
-- All our wimages now have the keys: source, width, height, filename, caption
for _, wimage in ipairs(wimages) do
-- We trust wikipedia, and our x4.5 factor in :getFullPageImages(), for adequate
-- and homogeneous images' sizes. We'll just scale them according to the
-- provided 'image_size_factor' (which should account for screen size/DPI)
local width = wimage.width or 100 -- in case we don't get any width or height
local height = wimage.height or 100
-- Give a little boost in size to thin images
if width < height / 2 or height < width / 2 then
width = width * 1.3
height = height * 1.3
end
width = math.ceil(width * image_size_factor)
height = math.ceil(height * image_size_factor)
-- All wikipedia image urls like .../wikipedia/commons/A/BC/<filename>
-- or .../wikipedia/commons/thumb/A/BC/<filename>/<width>px-<filename>
-- can be transformed to another url with a requested new_width with the form:
-- /wikipedia/commons/thumb/A/BC/<filename>/<new_width>px-<filename>
-- (Additionally, the image format can be changed by appending .png,
-- .jpg or .gif to it)
-- The resize is so done on Wikipedia servers from the source image for
-- the best quality.
local source = wimage.source:gsub("(.*/)%d+(px-[^/]*)", "%1"..width.."%2")
-- We build values for a high resolution version of the image, to be displayed
-- with ImageViewer (x 4 by default)
local hi_width = width * (hi_image_size_factor or 4)
local hi_height = height * (hi_image_size_factor or 4)
local hi_source = wimage.source:gsub("(.*/)%d+(px-[^/]*)", "%1"..hi_width.."%2")
local title = wimage.filename
if title then
title = title:gsub("_", " ")
end
local image = {
-- As expected by TextBoxWidget (with additional source and
-- hi_source, that will be used by load_bb_func)
title = title,
caption = wimage.caption,
source = source,
width = width,
height = height,
bb = nil, -- will be loaded and build only if needed
hi_source = hi_source,
hi_width = hi_width,
hi_height = hi_height,
hi_bb = nil, -- will be loaded and build only if needed
}
-- If bb or hi_bb is nil, TextBoxWidget will call a method named "load_bb_func"
image.load_bb_func = function(highres)
return image_load_bb_func(image, highres)
end
table.insert(page.images, image)
end
end
-- UTF8 of unicode geometrical shapes we can use to replace
-- the "=== title ===" of wkipedia plaintext pages
-- These chosen ones are available in most fonts (prettier symbols
@ -218,38 +607,6 @@ function Wikipedia:prettifyText(text)
end
local function getUrlContent(url, timeout)
local socket = require('socket')
local ltn12 = require('ltn12')
local requester
if url:sub(1,7) == "http://" then
requester = require('socket.http')
elseif url:sub(1,8) == "https://" then
requester = require('ssl.https')
else
return false, "Unsupported protocol"
end
requester.TIMEOUT = timeout or 10
local request = {}
local sink = {}
request['url'] = url
request['method'] = 'GET'
request['sink'] = ltn12.sink.table(sink)
-- first argument returned by skip is code
local _, headers, status = socket.skip(1, requester.request(request))
if headers == nil then
logger.warn("No HTTP headers")
return false, "Network unavailable"
end
if status ~= "HTTP/1.1 200 OK" then
logger.warn("HTTP status not okay:", status)
return false, "Network unavailable"
end
return true, table.concat(sink)
end
-- UTF8 of unicode geometrical shapes we'll prepend to wikipedia section headers,
-- to help identifying hierarchy (othewise, the small font size differences helps).
-- Best if identical to the ones used above for prettifying full plain text page.
@ -292,12 +649,12 @@ function Wikipedia:createEpub(epub_path, page, lang, with_images)
-- Trapper:info() and Trapper:confirm() will just use logger.
local UI = require("ui/trapper")
UI:info(_("Fetching Wikipedia page…"))
local ok, phtml = pcall(self.wikiphtml, self, page, lang)
UI:info(_("Retrieving Wikipedia article…"))
local ok, phtml = pcall(self.getFullPageHtml, self, page, lang)
if not ok then
UI:info(phtml) -- display error in InfoMessage
-- Sleep a bit to make that error seen
util.sleep(2)
ffiutil.sleep(2)
UI:reset()
return false
end
@ -403,13 +760,13 @@ function Wikipedia:createEpub(epub_path, page, lang, with_images)
if with_images then
-- If no UI (Trapper:wrap() not called), UI:confirm() will answer true
if #images > 0 then
include_images = UI:confirm(T(_("The page contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include"))
include_images = UI:confirm(T(_("This article contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include"))
if include_images then
use_img_2x = UI:confirm(_("Would you like to use slightly higher quality images? This will result in a bigger file size."), _("Standard quality"), _("Higher quality"))
end
else
UI:info(_("The page does not contain any images."))
util.sleep(1) -- Let the user see that
UI:info(_("This article does not contain any images."))
ffiutil.sleep(1) -- Let the user see that
end
end
if not include_images then
@ -568,6 +925,10 @@ div.thumb {
ul, ol {
margin-left: 0em;
}
/* avoid a line with a standalone bullet */
li.gallerybox {
display: inline;
}
/* helps crengine to not display them as block elements */
time, abbr, sup {
display: inline;
@ -655,6 +1016,10 @@ time, abbr, sup {
-- external link for us, so let's remove this link.
html = html:gsub("<a[^>]*>%s*(<%s*img [^>]*>)%s*</a>", "%1")
-- TODO: do something for <li class="gallerybox"...> so they are no more
-- a <li> (crengine displays them one above the other) and can be displayed
-- side by side
-- For some <div class="thumb tright"> , which include nested divs, although
-- perfectly balanced, crengine seems to miss some closing </div> and we
-- end up having our image bordered box including the remaining main wiki text.
@ -771,7 +1136,7 @@ time, abbr, sup {
-- Process can be interrupted at this point between each image download
-- by tapping while the InfoMessage is displayed
-- We use the fast_refresh option from image #2 for a quicker download
local go_on = UI:info(T(_("Fetching image %1 / %2 …"), inum, nb_images), inum >= 2)
local go_on = UI:info(T(_("Retrieving image %1 / %2 …"), inum, nb_images), inum >= 2)
if not go_on then
cancelled = true
break
@ -813,7 +1178,7 @@ time, abbr, sup {
end
epub:close()
-- This was nearly a no-op, so sleep a bit to make that progress step seen
util.usleep(300000)
ffiutil.usleep(300000)
UI:reset() -- close last InfoMessage
if cancelled then

Loading…
Cancel
Save