HTML dictionary support (#3573)

* Adds a generic HTML widget modeled after the text widget, and HTML dictionary support. HTML dictionaries can have their own CSS (for X.ifo it must be X.css). The base CSS just resets the margin and sets the font.

Note that the widget doesn't handle links, that wasn't needed for the dictionary.

Closes <https://github.com/koreader/koreader/issues/1776>.

* Show tag stripped HTML if the dictionary entry isn't valid HTML

* Simulate the normal <br/> behavior

* Bump base
pull/3581/head
TnS-hun 6 years ago committed by Frans de Jonge
parent 5245f018a6
commit 06a8a33d39

@ -1 +1 @@
Subproject commit 81e789e724c7417c1691652532c3ea0a574cba2a
Subproject commit feca07cc6f32271c9a904f67372719b13fe292d7

@ -67,6 +67,17 @@ local ReaderDictionary = InputContainer:new{
lookup_msg = _("Searching dictionary for:\n%1"),
}
local function readDictionaryCss(path)
local f = io.open(path, "r")
if not f then
return nil
end
local content = f:read("*all")
f:close()
return content
end
function ReaderDictionary:init()
self.ui.menu:registerToMainMenu(self)
self.data_dir = os.getenv("STARDICT_DATA_DIR") or
@ -90,11 +101,14 @@ function ReaderDictionary:init()
local content = f:read("*all")
f:close()
local dictname = content:match("\nbookname=(.-)\n")
local is_html = content:find("sametypesequence=h", 1, true) ~= nil
-- sdcv won't use dict that don't have a bookname=
if dictname then
table.insert(available_ifos, {
file = ifo_file,
name = dictname,
is_html = is_html,
css = readDictionaryCss(ifo_file:gsub("%.ifo$", ".css"))
})
end
end
@ -331,26 +345,42 @@ local function dictDirsEmpty(dict_dirs)
return true
end
local function getAvailableIfoByName(dictionary_name)
for _, ifo in ipairs(available_ifos) do
if ifo.name == dictionary_name then
return ifo
end
end
return nil
end
local function tidyMarkup(results)
local cdata_tag = "<!%[CDATA%[(.-)%]%]>"
local format_escape = "&[29Ib%+]{(.-)}"
for _, result in ipairs(results) do
local def = result.definition
-- preserve the <br> tag for line break
def = def:gsub("<[bB][rR] ?/?>", "\n")
-- parse CDATA text in XML
if def:find(cdata_tag) then
def = def:gsub(cdata_tag, "%1")
-- ignore format strings
while def:find(format_escape) do
def = def:gsub(format_escape, "%1")
local ifo = getAvailableIfoByName(result.dict)
if ifo and ifo.is_html then
result.is_html = ifo.is_html
result.css = ifo.css
else
local def = result.definition
-- preserve the <br> tag for line break
def = def:gsub("<[bB][rR] ?/?>", "\n")
-- parse CDATA text in XML
if def:find(cdata_tag) then
def = def:gsub(cdata_tag, "%1")
-- ignore format strings
while def:find(format_escape) do
def = def:gsub(format_escape, "%1")
end
end
-- ignore all markup tags
def = def:gsub("%b<>", "")
-- strip all leading empty lines/spaces
def = def:gsub("^%s+", "")
result.definition = def
end
-- ignore all markup tags
def = def:gsub("%b<>", "")
-- strip all leading empty lines/spaces
def = def:gsub("^%s+", "")
result.definition = def
end
return results
end

@ -14,6 +14,7 @@ local InputDialog = require("ui/widget/inputdialog")
local LeftContainer = require("ui/widget/container/leftcontainer")
local LineWidget = require("ui/widget/linewidget")
local OverlapGroup = require("ui/widget/overlapgroup")
local ScrollHtmlWidget = require("ui/widget/scrollhtmlwidget")
local ScrollTextWidget = require("ui/widget/scrolltextwidget")
local Size = require("ui/size")
local TextWidget = require("ui/widget/textwidget")
@ -37,6 +38,7 @@ local DictQuickLookup = InputContainer:new{
displayword = nil,
is_wiki = false,
is_fullpage = false,
is_html = false,
dict_index = 1,
title_face = Font:getFace("x_smalltfont"),
content_face = Font:getFace("cfont", DDICT_FONT_SIZE),
@ -156,6 +158,25 @@ function DictQuickLookup:isDocless()
return self.ui == nil or self.ui.highlight == nil
end
function DictQuickLookup:getHtmlDictionaryCss()
-- Using Noto Sans because Nimbus doesn't contain the IPA symbols.
local css = [[
@page {
margin: 0;
font-family: 'Noto Sans';
}
body {
margin: 0;
}
]]
if self.css then
return css .. self.css
end
return css
end
function DictQuickLookup:update()
local orig_dimen = self.dict_frame and self.dict_frame.dimen or Geom:new{}
-- calculate window dimension
@ -236,12 +257,20 @@ function DictQuickLookup:update()
text_font_size = lookup_word_font_size,
hold_callback = function() self:lookupInputWord(self.lookupword) end,
}
-- word definition
local definition = FrameContainer:new{
padding = self.definition_padding,
margin = self.definition_margin,
bordersize = 0,
ScrollTextWidget:new{
local text_widget
if self.is_html then
text_widget = ScrollHtmlWidget:new{
html_body = self.definition,
css = self:getHtmlDictionaryCss(),
default_font_size = DDICT_FONT_SIZE,
width = self.width,
height = self.is_fullpage and self.height*0.75 or self.height*0.7,
dialog = self,
}
else
text_widget = ScrollTextWidget:new{
text = self.definition,
face = self.content_face,
width = self.width,
@ -250,7 +279,15 @@ function DictQuickLookup:update()
dialog = self,
-- allow for disabling justification
justified = G_reader_settings:nilOrTrue("dict_justify"),
},
}
end
-- word definition
local definition = FrameContainer:new{
padding = self.definition_padding,
margin = self.definition_margin,
bordersize = 0,
text_widget,
}
-- Different sets of buttons if fullpage or not
local buttons
@ -538,6 +575,8 @@ function DictQuickLookup:changeDictionary(index)
self.lookupword = self.results[index].word
self.definition = self.results[index].definition
self.is_fullpage = self.results[index].is_fullpage
self.is_html = self.results[index].is_html
self.css = self.results[index].css
self.lang = self.results[index].lang
if self.is_fullpage then
self.displayword = self.lookupword

@ -0,0 +1,189 @@
--[[--
HTML widget (without scroll bars).
--]]
local DrawContext = require("ffi/drawcontext")
local Geom = require("ui/geometry")
local InputContainer = require("ui/widget/container/inputcontainer")
local logger = require("logger")
local Mupdf = require("ffi/mupdf")
local util = require("util")
local TimeVal = require("ui/timeval")
local HtmlBoxWidget = InputContainer:new{
bb = nil,
dimen = nil,
document = nil,
page_count = 0,
page_number = 1,
hold_start_pos = nil,
hold_start_tv = nil,
}
function HtmlBoxWidget:setContent(body, css, default_font_size)
-- fz_set_user_css is tied to the context instead of the document so to easily support multiple
-- HTML dictionaries with different CSS, we embed the stylesheet into the HTML instead of using
-- that function.
local head = ""
if css then
head = string.format("<head><style>%s</style></head>", css)
end
local html = string.format("<html>%s<body>%s</body></html>", head, body)
-- For some reason in MuPDF <br/> always creates both a line break and an empty line, so we have to
-- simulate the normal <br/> behavior.
-- https://bugs.ghostscript.com/show_bug.cgi?id=698351
html = html:gsub("%<br ?/?%>", "&nbsp;<div></div>")
local ok
ok, self.document = pcall(Mupdf.openDocumentFromText, html, "html")
if not ok then
-- self.document contains the error
logger.warn("HTML loading error:", self.document)
body = util.htmlToPlainText(body)
body = util.htmlEscape(body)
-- Normally \n would be replaced with <br/>. See the previous comment regarding the bug in MuPDF.
body = body:gsub("\n", "&nbsp;<div></div>")
html = string.format("<html>%s<body>%s</body></html>", head, body)
ok, self.document = pcall(Mupdf.openDocumentFromText, html, "html")
if not ok then
error(self.document)
end
end
self.document:layoutDocument(self.dimen.w, self.dimen.h, default_font_size)
self.page_count = self.document:getPages()
end
function HtmlBoxWidget:_render()
if self.bb then
return
end
local page = self.document:openPage(self.page_number)
local dc = DrawContext.new()
self.bb = page:draw_new(dc, self.dimen.w, self.dimen.h, 0, 0)
page:close()
end
function HtmlBoxWidget:getSize()
return self.dimen
end
function HtmlBoxWidget:paintTo(bb, x, y)
self.dimen.x = x
self.dimen.y = y
self:_render()
local size = self:getSize()
bb:blitFrom(self.bb, x, y, 0, 0, size.w, size.h)
end
function HtmlBoxWidget:freeBb()
if self.bb and self.bb.free then
self.bb:free()
end
self.bb = nil
end
-- This will normally be called by our WidgetContainer:free()
-- But it SHOULD explicitly be called if we are getting replaced
-- (ie: in some other widget's update()), to not leak memory with
-- BlitBuffer zombies
function HtmlBoxWidget:free()
self:freeBb()
self.document:close()
self.document = nil
end
function HtmlBoxWidget:onCloseWidget()
-- free when UIManager:close() was called
self:free()
end
function HtmlBoxWidget:onHoldStartText(_, ges)
self.hold_start_pos = Geom:new{
x = ges.pos.x - self.dimen.x,
y = ges.pos.y - self.dimen.y,
}
self.hold_start_tv = TimeVal.now()
return true
end
function HtmlBoxWidget:getSelectedText(lines, start_pos, end_pos)
local found_start = false
local words = {}
for _, line in pairs(lines) do
for _, w in pairs(line) do
if type(w) == 'table' then
if (not found_start) and
(start_pos.x >= w.x0 and start_pos.x < w.x1 and start_pos.y >= w.y0 and start_pos.y < w.y1) then
found_start = true
end
if found_start then
table.insert(words, w.word)
-- Found the end.
if end_pos.x >= w.x0 and end_pos.x < w.x1 and end_pos.y >= w.y0 and end_pos.y < w.y1 then
return words
end
end
end
end
end
return words
end
function HtmlBoxWidget:onHoldReleaseText(callback, ges)
if not callback then
return false
end
-- check we have seen a HoldStart event
if not self.hold_start_pos then
return false
end
local start_pos = self.hold_start_pos
local end_pos = Geom:new{
x = ges.pos.x - self.dimen.x,
y = ges.pos.y - self.dimen.y,
}
self.hold_start_pos = nil
-- check start and end coordinates are actually inside our area
if start_pos.x < 0 or end_pos.x < 0 or
start_pos.x >= self.dimen.w or end_pos.x >= self.dimen.w or
start_pos.y < 0 or end_pos.y < 0 or
start_pos.y >= self.dimen.h or end_pos.y >= self.dimen.h then
return false
end
local hold_duration = TimeVal.now() - self.hold_start_tv
hold_duration = hold_duration.sec + (hold_duration.usec/1000000)
local page = self.document:openPage(self.page_number)
local lines = page:getPageText()
page:close()
local words = self:getSelectedText(lines, start_pos, end_pos)
local selected_text = table.concat(words, " ")
callback(selected_text, hold_duration)
return true
end
return HtmlBoxWidget

@ -0,0 +1,150 @@
--[[--
HTML widget with vertical scroll bar.
--]]
local Device = require("device")
local HtmlBoxWidget = require("ui/widget/htmlboxwidget")
local Geom = require("ui/geometry")
local GestureRange = require("ui/gesturerange")
local HorizontalGroup = require("ui/widget/horizontalgroup")
local HorizontalSpan = require("ui/widget/horizontalspan")
local InputContainer = require("ui/widget/container/inputcontainer")
local UIManager = require("ui/uimanager")
local VerticalScrollBar = require("ui/widget/verticalscrollbar")
local Input = Device.input
local Screen = Device.screen
local ScrollHtmlWidget = InputContainer:new{
html_body = nil,
css = nil,
default_font_size = 18,
htmlbox_widget = nil,
v_scroll_bar = nil,
dialog = nil,
dimen = nil,
width = 0,
height = 0,
scroll_bar_width = Screen:scaleBySize(6),
text_scroll_span = Screen:scaleBySize(12),
}
function ScrollHtmlWidget:init()
self.htmlbox_widget = HtmlBoxWidget:new{
dimen = Geom:new{
w = self.width - self.scroll_bar_width - self.text_scroll_span,
h = self.height,
},
}
self.htmlbox_widget:setContent(self.html_body, self.css, self.default_font_size)
self.v_scroll_bar = VerticalScrollBar:new{
enable = self.htmlbox_widget.page_count > 1,
width = self.scroll_bar_width,
height = self.height,
}
self.v_scroll_bar:set((self.htmlbox_widget.page_number-1) / self.htmlbox_widget.page_count, self.htmlbox_widget.page_number / self.htmlbox_widget.page_count)
local horizontal_group = HorizontalGroup:new{}
table.insert(horizontal_group, self.htmlbox_widget)
table.insert(horizontal_group, HorizontalSpan:new{width=self.text_scroll_span})
table.insert(horizontal_group, self.v_scroll_bar)
self[1] = horizontal_group
self.dimen = Geom:new(self[1]:getSize())
if Device:isTouchDevice() then
self.ges_events = {
SwipeScrollText = {
GestureRange:new{
ges = "swipe",
range = function() return self.dimen end,
},
},
TapScrollText = { -- allow scrolling with tap
GestureRange:new{
ges = "tap",
range = function() return self.dimen end,
},
},
}
end
if Device:hasKeyboard() or Device:hasKeys() then
self.key_events = {
ScrollDown = {{Input.group.PgFwd}, doc = "scroll down"},
ScrollUp = {{Input.group.PgBack}, doc = "scroll up"},
}
end
end
function ScrollHtmlWidget:scrollText(direction)
if direction == 0 then
return
end
if direction > 0 then
if self.htmlbox_widget.page_number >= self.htmlbox_widget.page_count then
return
end
self.htmlbox_widget.page_number = self.htmlbox_widget.page_number + 1
elseif direction < 0 then
if self.htmlbox_widget.page_number <= 1 then
return
end
self.htmlbox_widget.page_number = self.htmlbox_widget.page_number - 1
end
self.v_scroll_bar:set((self.htmlbox_widget.page_number-1) / self.htmlbox_widget.page_count, self.htmlbox_widget.page_number / self.htmlbox_widget.page_count)
self.htmlbox_widget:freeBb()
self.htmlbox_widget:_render()
UIManager:setDirty(self.dialog, function()
return "partial", self.dimen
end)
end
function ScrollHtmlWidget:onScrollText(arg, ges)
if ges.direction == "north" then
self:scrollText(1)
return true
elseif ges.direction == "south" then
self:scrollText(-1)
return true
end
-- if swipe west/east, let it propagate up (e.g. for quickdictlookup to
-- go to next/prev result)
end
function ScrollHtmlWidget:onTapScrollText(arg, ges)
if ges.pos.x < Screen:getWidth()/2 then
if self.htmlbox_widget.page_number > 1 then
self:scrollText(-1)
return true
end
else
if self.htmlbox_widget.page_number <= self.htmlbox_widget.page_count then
self:scrollText(1)
return true
end
end
-- if we couldn't scroll (because we're already at top or bottom),
-- let it propagate up (e.g. for quickdictlookup to go to next/prev result)
end
function ScrollHtmlWidget:onScrollDown()
self:scrollText(1)
return true
end
function ScrollHtmlWidget:onScrollUp()
self:scrollText(-1)
return true
end
return ScrollHtmlWidget

@ -564,4 +564,18 @@ function util.htmlToPlainTextIfHtml(text)
return text
end
--- Encode the HTML entities in a string
-- @string text the string to escape
-- Taken from https://github.com/kernelsauce/turbo/blob/e4a35c2e3fb63f07464f8f8e17252bea3a029685/turbo/escape.lua#L58-L70
function util.htmlEscape(text)
return text:gsub("[}{\">/<'&]", {
["&"] = "&amp;",
["<"] = "&lt;",
[">"] = "&gt;",
['"'] = "&quot;",
["'"] = "&#39;",
["/"] = "&#47;",
})
end
return util

Loading…
Cancel
Save