You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
koreader/frontend/apps/reader/modules/readerdictionary.lua

251 lines
8.8 KiB
Lua

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

local DataStorage = require("datastorage")
local Device = require("device")
local DictQuickLookup = require("ui/widget/dictquicklookup")
local InputContainer = require("ui/widget/container/inputcontainer")
local InfoMessage = require("ui/widget/infomessage")
local JSON = require("json")
local UIManager = require("ui/uimanager")
local logger = require("logger")
local util = require("util")
local _ = require("gettext")
local Screen = Device.screen
local T = require("ffi/util").template
local ReaderDictionary = InputContainer:new{
data_dir = nil,
dict_window_list = {},
lookup_msg = _("Searching dictionary for:\n%1")
}
function ReaderDictionary:init()
self.ui.menu:registerToMainMenu(self)
self.data_dir = os.getenv("STARDICT_DATA_DIR") or
DataStorage:getDataDir() .. "/data/dict"
end
function ReaderDictionary:addToMainMenu(menu_items)
menu_items.dictionary_lookup = {
text = _("Dictionary lookup"),
tap_input = {
title = _("Enter a word to look up"),
ok_text = _("Search dictionary"),
type = "text",
callback = function(input)
self:onLookupWord(input)
end,
},
}
end
function ReaderDictionary:onLookupWord(word, box, highlight)
self.highlight = highlight
self:stardictLookup(word, box)
return true
end
local function dictDirsEmpty(dict_dirs)
for _, dict_dir in ipairs(dict_dirs) do
if not util.isEmptyDir(dict_dir) then
return false
end
end
return true
end
local function tidyMarkup(results)
local cdata_tag = "<!%[CDATA%[(.-)%]%]>"
local format_escape = "&[29Ib%+]{(.-)}"
for _, result in ipairs(results) do
local def = result.definition
-- preserve the <br> tag for line break
def = def:gsub("<[bB][rR] ?/?>", "\n")
-- parse CDATA text in XML
if def:find(cdata_tag) then
def = def:gsub(cdata_tag, "%1")
-- ignore format strings
while def:find(format_escape) do
def = def:gsub(format_escape, "%1")
end
end
-- ignore all markup tags
def = def:gsub("%b<>", "")
-- strip all leading empty lines/spaces
def = def:gsub("^%s+", "")
result.definition = def
end
return results
end
function ReaderDictionary:cleanSelection(text)
-- Will be used by ReaderWikipedia too
if not text then
return ""
end
-- crengine does now a much better job at finding word boundaries, but
-- some cleanup is still needed for selection we get from other engines
-- (example: pdf selection "quautrefois," will be cleaned to "autrefois")
--
-- Replace extended quote (included in the general puncturation range)
-- with plain ascii quote (for french words like "aujourdhui")
text = string.gsub(text, "\xE2\x80\x99", "'") -- U+2019 (right single quotation mark)
-- Strip punctuation characters around selection
text = util.stripePunctuations(text)
-- Strip some common english grammatical construct
text = string.gsub(text, "'s$", '') -- english possessive
-- Strip some common french grammatical constructs
text = string.gsub(text, "^[LSDMNTlsdmnt]'", '') -- french l' s' t'...
text = string.gsub(text, "^[Qq][Uu]'", '') -- french qu'
-- Replace no-break space with regular space
text = string.gsub(text, "\xC2\xA0", ' ') -- U+00A0 no-break space
-- There may be a need to remove some (all?) diacritical marks
-- https://en.wikipedia.org/wiki/Combining_character#Unicode_ranges
-- see discussion at https://github.com/koreader/koreader/issues/1649
-- Commented for now, will have to be checked by people who read
-- languages and texts that use them.
-- text = string.gsub(text, "\204[\128-\191]", '') -- U+0300 to U+033F
-- text = string.gsub(text, "\205[\128-\175]", '') -- U+0340 to U+036F
return text
end
function ReaderDictionary:onLookupStarted(word)
local text = T(self.lookup_msg, word)
self.lookup_progress_msg = InfoMessage:new{text=text}
UIManager:show(self.lookup_progress_msg)
UIManager:forceRePaint()
end
function ReaderDictionary:onLookupDone()
if self.lookup_progress_msg then
UIManager:close(self.lookup_progress_msg)
UIManager:forceRePaint()
end
self.lookup_progress_msg = nil
end
function ReaderDictionary:stardictLookup(word, box)
logger.dbg("lookup word:", word, box)
-- escape quotes and other funny characters in word
word = self:cleanSelection(word)
logger.dbg("stripped word:", word)
if word == "" then
return
end
self:onLookupStarted(word)
local final_results = {}
local seen_results = {}
-- Allow for two sdcv calls : one in the classic data/dict, and
-- another one in data/dict_ext if it exists
-- We could put in data/dict_ext dictionaries with a great number of words
-- but poor definitions as a fall back. If these were in data/dict,
-- they would prevent fuzzy searches in other dictories with better
-- definitions, and masks such results. This way, we can get both.
local dict_dirs = {self.data_dir}
local dict_ext = self.data_dir.."_ext"
if lfs.attributes(dict_ext, "mode") == "directory" then
table.insert(dict_dirs, dict_ext)
end
-- early exit if no dictionaries
if dictDirsEmpty(dict_dirs) then
final_results = {
{
dict = "",
word = word,
definition = _([[No dictionaries installed. Please search for "Dictionary support" in the KOReader Wiki to get more information about installing new dictionaries.]]),
}
}
self:onLookupDone()
self:showDict(word, final_results, box)
return
end
for _, dict_dir in ipairs(dict_dirs) do
local results_str = nil
if Device:isAndroid() then
local A = require("android")
results_str = A.stdout("./sdcv", "--utf8-input", "--utf8-output",
"-nj", word, "--data-dir", dict_dir)
else
local std_out = io.popen("./sdcv --utf8-input --utf8-output -nj "
.. ("%q"):format(word) .. " --data-dir " .. dict_dir, "r")
if std_out then
results_str = std_out:read("*all")
std_out:close()
end
end
local ok, results = pcall(JSON.decode, results_str)
if ok and results then
-- we may get duplicates (sdcv may do multiple queries,
-- in fixed mode then in fuzzy mode), we have to remove them
local h
for _,r in ipairs(results) do
h = r.dict .. r.word .. r.definition
if seen_results[h] == nil then
table.insert(final_results, r)
seen_results[h] = true
end
end
else
logger.warn("JSON data cannot be decoded", results)
end
end
if #final_results == 0 then
-- dummy results
final_results = {
{
dict = "",
word = word,
definition = _("No definition found."),
}
}
end
self:onLookupDone()
self:showDict(word, tidyMarkup(final_results), box)
end
function ReaderDictionary:showDict(word, results, box)
if results and results[1] then
logger.dbg("showing quick lookup window", word, results)
self.dict_window = DictQuickLookup:new{
window_list = self.dict_window_list,
ui = self.ui,
highlight = self.highlight,
dialog = self.dialog,
-- original lookup word
word = word,
results = results,
dictionary = self.default_dictionary,
width = Screen:getWidth() - Screen:scaleBySize(80),
word_box = box,
-- differentiate between dict and wiki
is_wiki = self.is_wiki,
wiki_languages = self.wiki_languages,
refresh_callback = function()
-- update info in footer (time, battery, etc)
self.view.footer:updateFooter()
end,
}
table.insert(self.dict_window_list, self.dict_window)
UIManager:show(self.dict_window)
end
end
function ReaderDictionary:onUpdateDefaultDict(dict)
logger.dbg("make default dictionary:", dict)
self.default_dictionary = dict
UIManager:show(InfoMessage:new{
text = T(_("%1 is now the default dictionary for this document."), dict),
timeout = 2,
})
return true
end
function ReaderDictionary:onReadSettings(config)
self.default_dictionary = config:readSetting("default_dictionary")
end
function ReaderDictionary:onSaveSettings()
logger.dbg("save default dictionary", self.default_dictionary)
self.ui.doc_settings:saveSetting("default_dictionary", self.default_dictionary)
end
return ReaderDictionary