Dict/Wiki lookup: less text cleanup on manual input

Don't cleanup input text as much when entered manually
(or when it's sane) than when coming from book text
selection.
This may allow looking up words like "-suffix", or
do more precise Wikipedia queries.
pull/7104/head
poire-z 3 years ago
parent 5756f1f43e
commit aedab2d695

@ -214,7 +214,8 @@ function ReaderDictionary:addToMainMenu(menu_items)
os.date("%Y-%m-%d %H:%M:%S", value.time), os.date("%Y-%m-%d %H:%M:%S", value.time),
value.word, value.word,
callback = function() callback = function()
self:onLookupWord(value.word) -- Word had been cleaned before being added to history
self:onLookupWord(value.word, true)
end end
}) })
end end
@ -385,10 +386,10 @@ function ReaderDictionary:addToMainMenu(menu_items)
end end
end end
function ReaderDictionary:onLookupWord(word, box, highlight, link) function ReaderDictionary:onLookupWord(word, is_sane, box, highlight, link)
logger.dbg("dict lookup word:", word, box) logger.dbg("dict lookup word:", word, box)
-- escape quotes and other funny characters in word -- escape quotes and other funny characters in word
word = self:cleanSelection(word) word = self:cleanSelection(word, is_sane)
logger.dbg("dict stripped word:", word) logger.dbg("dict stripped word:", word)
self.highlight = highlight self.highlight = highlight
@ -609,7 +610,7 @@ local function tidyMarkup(results)
return results return results
end end
function ReaderDictionary:cleanSelection(text) function ReaderDictionary:cleanSelection(text, is_sane)
-- Will be used by ReaderWikipedia too -- Will be used by ReaderWikipedia too
if not text then if not text then
return "" return ""
@ -618,31 +619,33 @@ function ReaderDictionary:cleanSelection(text)
-- some cleanup is still needed for selection we get from other engines -- some cleanup is still needed for selection we get from other engines
-- (example: pdf selection "quautrefois," will be cleaned to "autrefois") -- (example: pdf selection "quautrefois," will be cleaned to "autrefois")
-- --
-- Trim any space at start or end
text = text:gsub("^%s+", "")
text = text:gsub("%s+$", "")
-- Replace extended quote (included in the general puncturation range)
-- with plain ascii quote (for french words like "aujourdhui")
text = text:gsub("\xE2\x80\x99", "'") -- U+2019 (right single quotation mark)
-- Strip punctuation characters around selection
text = util.stripPunctuation(text)
-- Strip some common english grammatical construct
text = text:gsub("'s$", '') -- english possessive
-- Strip some common french grammatical constructs
text = text:gsub("^[LSDMNTlsdmnt]'", '') -- french l' s' t'...
text = text:gsub("^[Qq][Uu]'", '') -- french qu'
-- Replace no-break space with regular space -- Replace no-break space with regular space
text = text:gsub("\xC2\xA0", ' ') -- U+00A0 no-break space text = text:gsub("\xC2\xA0", ' ') -- U+00A0 no-break space
-- There may be a need to remove some (all?) diacritical marks -- Trim any space at start or end
-- https://en.wikipedia.org/wiki/Combining_character#Unicode_ranges
-- see discussion at https://github.com/koreader/koreader/issues/1649
-- Commented for now, will have to be checked by people who read
-- languages and texts that use them.
-- text = text:gsub("\204[\128-\191]", '') -- U+0300 to U+033F
-- text = text:gsub("\205[\128-\175]", '') -- U+0340 to U+036F
-- Trim any space now at start or end after above changes
text = text:gsub("^%s+", "") text = text:gsub("^%s+", "")
text = text:gsub("%s+$", "") text = text:gsub("%s+$", "")
if not is_sane then
-- Replace extended quote (included in the general puncturation range)
-- with plain ascii quote (for french words like "aujourdhui")
text = text:gsub("\xE2\x80\x99", "'") -- U+2019 (right single quotation mark)
-- Strip punctuation characters around selection
text = util.stripPunctuation(text)
-- Strip some common english grammatical construct
text = text:gsub("'s$", '') -- english possessive
-- Strip some common french grammatical constructs
text = text:gsub("^[LSDMNTlsdmnt]'", '') -- french l' s' t'...
text = text:gsub("^[Qq][Uu]'", '') -- french qu'
-- There may be a need to remove some (all?) diacritical marks
-- https://en.wikipedia.org/wiki/Combining_character#Unicode_ranges
-- see discussion at https://github.com/koreader/koreader/issues/1649
-- Commented for now, will have to be checked by people who read
-- languages and texts that use them.
-- text = text:gsub("\204[\128-\191]", '') -- U+0300 to U+033F
-- text = text:gsub("\205[\128-\175]", '') -- U+0340 to U+036F
-- Trim any space now at start or end after above changes
text = text:gsub("^%s+", "")
text = text:gsub("%s+$", "")
end
return text return text
end end
@ -680,7 +683,8 @@ function ReaderDictionary:onShowDictionaryLookup()
is_enter_default = true, is_enter_default = true,
callback = function() callback = function()
UIManager:close(self.dictionary_lookup_dialog) UIManager:close(self.dictionary_lookup_dialog)
self:onLookupWord(self.dictionary_lookup_dialog:getInputText()) -- Trust that input text does not need any cleaning (allows querying for "-suffix")
self:onLookupWord(self.dictionary_lookup_dialog:getInputText(), true)
end, end,
}, },
} }

@ -937,14 +937,14 @@ function ReaderHighlight:lookup(selected_word, selected_link)
-- if we extracted text directly -- if we extracted text directly
if selected_word.word then if selected_word.word then
local word_box = self.view:pageToScreenTransform(self.hold_pos.page, selected_word.sbox) local word_box = self.view:pageToScreenTransform(self.hold_pos.page, selected_word.sbox)
self.ui:handleEvent(Event:new("LookupWord", selected_word.word, word_box, self, selected_link)) self.ui:handleEvent(Event:new("LookupWord", selected_word.word, false, word_box, self, selected_link))
-- or we will do OCR -- or we will do OCR
elseif selected_word.sbox and self.hold_pos then elseif selected_word.sbox and self.hold_pos then
local word = self.ui.document:getOCRWord(self.hold_pos.page, selected_word) local word = self.ui.document:getOCRWord(self.hold_pos.page, selected_word)
logger.dbg("OCRed word:", word) logger.dbg("OCRed word:", word)
if word and word ~= "" then if word and word ~= "" then
local word_box = self.view:pageToScreenTransform(self.hold_pos.page, selected_word.sbox) local word_box = self.view:pageToScreenTransform(self.hold_pos.page, selected_word.sbox)
self.ui:handleEvent(Event:new("LookupWord", word, word_box, self, selected_link)) self.ui:handleEvent(Event:new("LookupWord", word, false, word_box, self, selected_link))
else else
UIManager:show(InfoMessage:new{ UIManager:show(InfoMessage:new{
text = info_message_ocr_text, text = info_message_ocr_text,

@ -711,7 +711,7 @@ function ReaderLink:onGoToExternalLink(link_url)
callback = function() callback = function()
UIManager:nextTick(function() UIManager:nextTick(function()
UIManager:close(dialog) UIManager:close(dialog)
self.ui:handleEvent(Event:new("LookupWikipedia", wiki_page, false, true, wiki_lang)) self.ui:handleEvent(Event:new("LookupWikipedia", wiki_page, true, false, true, wiki_lang))
end) end)
end, end,
}) })

@ -53,7 +53,8 @@ function ReaderWikipedia:lookupInput()
is_enter_default = true, is_enter_default = true,
callback = function() callback = function()
UIManager:close(self.input_dialog) UIManager:close(self.input_dialog)
self:onLookupWikipedia(self.input_dialog:getInputText()) -- Trust that input text does not need any cleaning (allows querying for "-suffix")
self:onLookupWikipedia(self.input_dialog:getInputText(), true)
end, end,
}, },
} }
@ -98,7 +99,8 @@ function ReaderWikipedia:addToMainMenu(menu_items)
os.date("%Y-%m-%d %H:%M:%S", value.time), os.date("%Y-%m-%d %H:%M:%S", value.time),
text, text,
callback = function() callback = function()
self:onLookupWikipedia(value.word, nil, value.page, value.lang) -- Word had been cleaned before being added to history
self:onLookupWikipedia(value.word, true, nil, value.page, value.lang)
end end
}) })
end end
@ -375,16 +377,16 @@ function ReaderWikipedia:initLanguages(word)
end end
end end
function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang) function ReaderWikipedia:onLookupWikipedia(word, is_sane, box, get_fullpage, forced_lang)
-- Wrapped through Trapper, as we may be using Trapper:dismissableRunInSubprocess() in it -- Wrapped through Trapper, as we may be using Trapper:dismissableRunInSubprocess() in it
Trapper:wrap(function() Trapper:wrap(function()
self:lookupWikipedia(word, box, get_fullpage, forced_lang) self:lookupWikipedia(word, is_sane, box, get_fullpage, forced_lang)
end) end)
return true return true
end end
function ReaderWikipedia:lookupWikipedia(word, box, get_fullpage, forced_lang) function ReaderWikipedia:lookupWikipedia(word, is_sane, box, get_fullpage, forced_lang)
if NetworkMgr:willRerunWhenOnline(function() self:lookupWikipedia(word, box, get_fullpage, forced_lang) end) then if NetworkMgr:willRerunWhenOnline(function() self:lookupWikipedia(word, is_sane, box, get_fullpage, forced_lang) end) then
-- Not online yet, nothing more to do here, NetworkMgr will forward the callback and run it once connected! -- Not online yet, nothing more to do here, NetworkMgr will forward the callback and run it once connected!
return return
end end
@ -404,7 +406,7 @@ function ReaderWikipedia:lookupWikipedia(word, box, get_fullpage, forced_lang)
-- no need to clean word if get_fullpage, as it is the exact wikipetia page title -- no need to clean word if get_fullpage, as it is the exact wikipetia page title
if word and not get_fullpage then if word and not get_fullpage then
-- escape quotes and other funny characters in word -- escape quotes and other funny characters in word
word = self:cleanSelection(word) word = self:cleanSelection(word, is_sane)
-- no need to lower() word with wikipedia search -- no need to lower() word with wikipedia search
end end
logger.dbg("stripped word:", word) logger.dbg("stripped word:", word)

@ -1102,7 +1102,8 @@ function DictQuickLookup:inputLookup()
else else
event = "LookupWord" event = "LookupWord"
end end
self.ui:handleEvent(Event:new(event, word)) -- Trust that input text does not need any cleaning (allows querying for "-suffix")
self.ui:handleEvent(Event:new(event, word, true))
end end
end end
@ -1131,18 +1132,21 @@ end
function DictQuickLookup:lookupWikipedia(get_fullpage) function DictQuickLookup:lookupWikipedia(get_fullpage)
local word local word
local is_sane
if get_fullpage then if get_fullpage then
-- we use the word of the displayed result's definition, which -- we use the word of the displayed result's definition, which
-- is the exact title of the full wikipedia page -- is the exact title of the full wikipedia page
word = self.lookupword word = self.lookupword
is_sane = true
else else
-- we use the original word that was querried -- we use the original word that was querried
word = self.word word = self.word
is_sane = false
end end
self:resyncWikiLanguages() self:resyncWikiLanguages()
-- strange : we need to pass false instead of nil if word_box is nil, -- (With Event, we need to pass false instead of nil if word_box is nil,
-- otherwise get_fullpage is not passed -- otherwise next arguments are discarded)
self.ui:handleEvent(Event:new("LookupWikipedia", word, self.word_box and self.word_box or false, get_fullpage)) self.ui:handleEvent(Event:new("LookupWikipedia", word, is_sane, self.word_box and self.word_box or false, get_fullpage))
end end
return DictQuickLookup return DictQuickLookup

Loading…
Cancel
Save