Fulltext search: all entries in entire document (#11313)

reviewable/pr11349/r1
hius07 4 months ago committed by GitHub
parent 5f5162d95c
commit 0ceb88a9a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -4,7 +4,9 @@ local CheckButton = require("ui/widget/checkbutton")
local Device = require("device")
local InfoMessage = require("ui/widget/infomessage")
local InputDialog = require("ui/widget/inputdialog")
local Menu = require("ui/widget/menu")
local Notification = require("ui/widget/notification")
local SpinWidget = require("ui/widget/spinwidget")
local UIManager = require("ui/uimanager")
local Utf8Proc = require("ffi/utf8proc")
local WidgetContainer = require("ui/widget/container/widgetcontainer")
@ -27,7 +29,11 @@ local ReaderSearch = WidgetContainer:extend{
-- The speed of the search depends on the regexs. Complex ones might need some time, easy ones
-- go with the speed of light.
-- Setting max_hits higher, does not mean to require more memory. More hits means smaller single hits.
max_hits = 2048, -- maximum hits for search; timinges tested on a Tolino
max_hits = 2048, -- maximum hits for findText search; timinges tested on a Tolino
findall_max_hits = 5000, -- maximum hits for findAllText search
-- number of words before and after the search string in All search results
findall_nb_context_words = G_reader_settings:readSetting("fulltext_search_nb_context_words") or 3,
findall_results_per_page = G_reader_settings:readSetting("fulltext_search_results_per_page") or 10,
-- internal: whether we expect results on previous pages
-- (can be different from self.direction, if, from a page in the
@ -72,6 +78,54 @@ SRELL_ERROR_CODES[111] = _("Expression too complex, some hits will not be shown.
SRELL_ERROR_CODES[666] = _("Expression may lead to an extremely long search time.")
function ReaderSearch:addToMainMenu(menu_items)
menu_items.fulltext_search_settings = {
text = _("Fulltext search settings"),
sub_item_table = {
{
text_func = function()
return T(_("Words in context: %1"), self.findall_nb_context_words)
end,
keep_menu_open = true,
callback = function(touchmenu_instance)
local widget = SpinWidget:new{
title_text = _("Words in context"),
value = self.findall_nb_context_words,
value_min = 1,
value_max = 20,
default_value = 3,
callback = function(spin)
self.last_search_hash = nil
self.findall_nb_context_words = spin.value
G_reader_settings:saveSetting("fulltext_search_nb_context_words", spin.value)
touchmenu_instance:updateItems()
end,
}
UIManager:show(widget)
end,
},
{
text_func = function()
return T(_("Results per page: %1"), self.findall_results_per_page)
end,
keep_menu_open = true,
callback = function(touchmenu_instance)
local widget = SpinWidget:new{
title_text = _("Results per page"),
value = self.findall_results_per_page,
value_min = 6,
value_max = 24,
default_value = 10,
callback = function(spin)
self.findall_results_per_page = spin.value
G_reader_settings:saveSetting("fulltext_search_results_per_page", spin.value)
touchmenu_instance:updateItems()
end,
}
UIManager:show(widget)
end,
},
},
}
menu_items.fulltext_search = {
text = _("Fulltext search"),
callback = function()
@ -103,7 +157,14 @@ function ReaderSearch:searchCallback(reverse)
UIManager:show(InfoMessage:new{ text = error_message })
else
UIManager:close(self.input_dialog)
self:onShowSearchDialog(search_text, reverse, self.use_regex, self.case_insensitive)
if reverse then
self:onShowSearchDialog(search_text, reverse, self.use_regex, self.case_insensitive)
else
local Trapper = require("ui/trapper")
Trapper:wrap(function()
self:findAllText(search_text)
end)
end
end
end
@ -126,6 +187,13 @@ function ReaderSearch:onShowFulltextSearchInput()
UIManager:close(self.input_dialog)
end,
},
{
-- @translators Search all entries in entire document
text = _("All"),
callback = function()
self:searchCallback()
end,
},
{
text = backward_text,
callback = function()
@ -183,7 +251,7 @@ function ReaderSearch:onShowSearchDialog(text, direction, regex, case_insensitiv
local no_results = true -- for notification
local res = search_func(self, search_term, param, regex, case_insensitive)
if res then
if self.ui.document.info.has_pages then
if self.ui.paging then
no_results = false
self.ui.link:onGotoLink({page = res.page - 1}, neglect_current_location)
self.view.highlight.temp[res.page] = res
@ -370,6 +438,13 @@ function ReaderSearch:search(pattern, origin, regex, case_insensitive)
Device:setIgnoreInput(true)
local retval, words_found = self.ui.document:findText(pattern, origin, direction, case_insensitive, page, regex, self.max_hits)
Device:setIgnoreInput(false)
self:showErrorNotification(words_found, regex, self.max_hits)
return retval
end
function ReaderSearch:showErrorNotification(words_found, regex, max_hits)
regex = regex or self.use_regex
max_hits = max_hits or self.findall_max_hits
local regex_retval = regex and self.ui.document:getAndClearRegexSearchError()
if regex and regex_retval ~= 0 then
local error_message
@ -382,13 +457,12 @@ function ReaderSearch:search(pattern, origin, regex, case_insensitive)
text = error_message,
timeout = false,
})
elseif words_found and words_found > self.max_hits then
elseif words_found and words_found >= max_hits then
UIManager:show(Notification:new{
text =_("Too many hits"),
timeout = 4,
})
end
return retval
end
function ReaderSearch:searchFromStart(pattern, _, regex, case_insensitive)
@ -416,4 +490,85 @@ function ReaderSearch:searchNext(pattern, direction, regex, case_insensitive)
return self:search(pattern, 1, regex, case_insensitive)
end
function ReaderSearch:findAllText(search_text)
local last_search_hash = self.last_search_text .. tostring(self.case_insensitive) .. tostring(self.use_regex)
local not_cached = self.last_search_hash ~= last_search_hash
if not_cached then
local Trapper = require("ui/trapper")
local info = InfoMessage:new{ text = _("Searching… (tap to cancel)") }
UIManager:show(info)
UIManager:forceRePaint()
local completed, res = Trapper:dismissableRunInSubprocess(function()
return self.ui.document:findAllText(search_text,
self.case_insensitive, self.findall_nb_context_words, self.findall_max_hits, self.use_regex)
end, info)
if not completed then return end
UIManager:close(info)
self.last_search_hash = last_search_hash
self.findall_results = res
end
if self.findall_results then
self:showFindAllResults(not_cached)
else
UIManager:show(InfoMessage:new{ text = _("No results in the document") })
end
end
function ReaderSearch:showFindAllResults(not_cached)
if self.ui.rolling and not_cached then -- for ui.paging: items are built in KoptInterface:findAllText()
for _, item in ipairs(self.findall_results) do
-- PDF/Kopt shows full words when only some part matches; let's do the same with CRE
local word = item.matched_text or ""
if item.matched_word_prefix then
word = item.matched_word_prefix .. word
end
if item.matched_word_suffix then
word = word .. item.matched_word_suffix
end
-- append context before and after the word
local text = "" .. word .. ""
if item.prev_text then
text = item.prev_text .. text
end
if item.next_text then
text = text .. item.next_text
end
item.text = text
item.mandatory = self.ui.bookmark:getBookmarkPageString(item.start)
end
end
local menu
menu = Menu:new{
title = T(_("Search results (%1)"), #self.findall_results),
subtitle = T(_("Query: %1"), self.last_search_text),
item_table = self.findall_results,
items_per_page = self.findall_results_per_page,
covers_fullscreen = true,
is_borderless = true,
is_popout = false,
title_bar_fm_style = true,
onMenuChoice = function(_, item)
if self.ui.rolling then
self.ui.link:addCurrentLocationToStack()
self.ui.rolling:onGotoXPointer(item.start, item.start) -- show target line marker
self.ui.document:getTextFromXPointers(item.start, item["end"], true) -- highlight
else
local page = item.mandatory
local boxes = {}
for i, box in ipairs(item.boxes) do
boxes[i] = self.ui.document:nativeToPageRectTransform(page, box)
end
self.ui.link:onGotoLink({ page = page - 1 })
self.view.highlight.temp[page] = boxes
end
end,
close_callback = function()
UIManager:close(menu)
end,
}
UIManager:show(menu)
self:showErrorNotification(#self.findall_results)
end
return ReaderSearch

@ -1383,10 +1383,14 @@ function CreDocument:getAndClearRegexSearchError()
return retval
end
function CreDocument:findText(pattern, origin, reverse, caseInsensitive, page, regex, max_hits)
logger.dbg("CreDocument: find text", pattern, origin, reverse, caseInsensitive, regex, max_hits)
return self._document:findText(
pattern, origin, reverse, caseInsensitive and 1 or 0, regex and 1 or 0, max_hits or 200)
function CreDocument:findText(pattern, origin, direction, case_insensitive, page, regex, max_hits)
logger.dbg("CreDocument: find text", pattern, origin, direction == 1, case_insensitive, regex, max_hits)
return self._document:findText(pattern, origin, direction == 1, case_insensitive, regex, max_hits)
end
function CreDocument:findAllText(pattern, case_insensitive, nb_context_words, max_hits, regex)
logger.dbg("CreDocument: find all text", pattern, case_insensitive, regex, max_hits, true, nb_context_words)
return self._document:findAllText(pattern, case_insensitive, regex, max_hits, true, nb_context_words)
end
function CreDocument:enableInternalHistory(toggle)

@ -123,8 +123,12 @@ function DjvuDocument:getCoverPageImage()
return self.koptinterface:getCoverPageImage(self)
end
function DjvuDocument:findText(pattern, origin, reverse, caseInsensitive, page)
return self.koptinterface:findText(self, pattern, origin, reverse, caseInsensitive, page)
function DjvuDocument:findText(pattern, origin, reverse, case_insensitive, page)
return self.koptinterface:findText(self, pattern, origin, reverse, case_insensitive, page)
end
function DjvuDocument:findAllText(pattern, case_insensitive, nb_context_words, max_hits)
return self.koptinterface:findAllText(self, pattern, case_insensitive, nb_context_words, max_hits)
end
function DjvuDocument:renderPage(pageno, rect, zoom, rotation, gamma, render_mode, hinting)

@ -373,6 +373,10 @@ function Document:findText()
return nil
end
function Document:findAllText()
return nil
end
function Document:updateColorRendering()
if self.is_color_capable and CanvasContext.is_color_rendering_enabled then
self.render_color = true

@ -1097,49 +1097,60 @@ function KoptInterface:getWordFromNativePosition(doc, boxes, pos)
return word_box
end
function KoptInterface:getSelectedWordContext(word, nb_words, pos)
local boxes = self.last_text_boxes
if not pos or not boxes or #boxes == 0 then return end
local i, j = getWordBoxIndices(boxes, pos)
if boxes[i][j].word ~= word then return end
local li, wi = i, j
local prev_count, next_count = 0, 0
local prev_text, next_text = {}, {}
local function get_prev_text(boxes, i, j, nb_words)
local prev_count = 0
local prev_text = {}
while prev_count < nb_words do
if li == 1 and wi == 1 then
if i == 1 and j == 1 then
break
elseif wi == 1 then
li = li - 1
wi = #boxes[li]
elseif j == 1 then
i = i - 1
j = #boxes[i]
else
wi = wi - 1
j = j - 1
end
local current_word = boxes[li][wi].word
local current_word = boxes[i][j].word
if #current_word > 0 then
table.insert(prev_text, 1, current_word)
prev_count = prev_count + 1
end
end
if #prev_text > 0 then
return table.concat(prev_text, " ")
end
end
li, wi = i, j
local function get_next_text(boxes, i, j, nb_words)
local next_count = 0
local next_text = {}
while next_count < nb_words do
if li == #boxes and wi == #boxes[li] then
if i == #boxes and j == #boxes[i] then
break
elseif wi == #boxes[li] then
li = li + 1
wi = 1
elseif j == #boxes[i] then
i = i + 1
j = 1
else
wi = wi + 1
j = j + 1
end
local current_word = boxes[li][wi].word
local current_word = boxes[i][j].word
if #current_word > 0 then
table.insert(next_text, current_word)
next_count = next_count + 1
end
end
if #prev_text == 0 and #next_text == 0 then return end
return table.concat(prev_text, " "), table.concat(next_text, " ")
if #next_text > 0 then
return table.concat(next_text, " ")
end
end
function KoptInterface:getSelectedWordContext(word, nb_words, pos)
local boxes = self.last_text_boxes
if not pos or not boxes or #boxes == 0 then return end
local i, j = getWordBoxIndices(boxes, pos)
if boxes[i][j].word ~= word then return end
local prev_text = get_prev_text(boxes, i, j, nb_words)
local next_text = get_next_text(boxes, i, j, nb_words)
return prev_text, next_text
end
--[[--
@ -1336,19 +1347,23 @@ function KoptInterface:nativeToPageRectTransform(doc, pageno, rect)
end
end
local function all_matches(boxes, pattern, caseInsensitive)
local function get_pattern_list(pattern, case_insensitive)
-- pattern list of single words
local plist = {}
-- (as in util.splitToWords(), but only splitting on spaces, keeping punctuations)
for word in util.gsplit(pattern, "%s+") do
if util.hasCJKChar(word) then
for char in util.gsplit(word, "[\192-\255][\128-\191]+", true) do
table.insert(plist, caseInsensitive and Utf8Proc.lowercase(util.fixUtf8(char, "?")) or char)
table.insert(plist, case_insensitive and Utf8Proc.lowercase(util.fixUtf8(char, "?")) or char)
end
else
table.insert(plist, caseInsensitive and Utf8Proc.lowercase(util.fixUtf8(word, "?")) or word)
table.insert(plist, case_insensitive and Utf8Proc.lowercase(util.fixUtf8(word, "?")) or word)
end
end
return plist
end
local function all_matches(boxes, plist, case_insensitive)
local pnb = #plist
-- return mached word indices from index i, j
local function match(i, j)
@ -1362,7 +1377,7 @@ local function all_matches(boxes, pattern, caseInsensitive)
end
if i > #boxes then break end
local box = boxes[i][j]
local word = caseInsensitive and Utf8Proc.lowercase(util.fixUtf8(box.word, "?")) or box.word
local word = case_insensitive and Utf8Proc.lowercase(util.fixUtf8(box.word, "?")) or box.word
local pword = plist[pindex]
local matched
if pnb == 1 then -- single word in plist
@ -1407,11 +1422,12 @@ local function all_matches(boxes, pattern, caseInsensitive)
end)
end
function KoptInterface:findAllMatches(doc, pattern, caseInsensitive, page)
function KoptInterface:findAllMatches(doc, pattern, case_insensitive, page)
local text_boxes = doc:getPageTextBoxes(page)
if not text_boxes then return end
local plist = get_pattern_list(pattern, case_insensitive)
local matches = {}
for indices in all_matches(text_boxes or {}, pattern, caseInsensitive) do
for indices in all_matches(text_boxes, plist, case_insensitive) do
for _, index in ipairs(indices) do
local i, j = unpack(index)
local word = text_boxes[i][j]
@ -1427,8 +1443,8 @@ function KoptInterface:findAllMatches(doc, pattern, caseInsensitive, page)
return matches
end
function KoptInterface:findText(doc, pattern, origin, reverse, caseInsensitive, pageno)
logger.dbg("Koptinterface: find text", pattern, origin, reverse, caseInsensitive, pageno)
function KoptInterface:findText(doc, pattern, origin, reverse, case_insensitive, pageno)
logger.dbg("Koptinterface: find text", pattern, origin, reverse, case_insensitive, pageno)
local last_pageno = doc:getPageCount()
local start_page, end_page
if reverse == 1 then
@ -1456,7 +1472,7 @@ function KoptInterface:findText(doc, pattern, origin, reverse, caseInsensitive,
end
end
for i = start_page, end_page, (reverse == 1) and -1 or 1 do
local matches = self:findAllMatches(doc, pattern, caseInsensitive, i)
local matches = self:findAllMatches(doc, pattern, case_insensitive, i)
if #matches > 0 then
matches.page = i
return matches
@ -1464,6 +1480,58 @@ function KoptInterface:findText(doc, pattern, origin, reverse, caseInsensitive,
end
end
function KoptInterface:findAllText(doc, pattern, case_insensitive, nb_context_words, max_hits)
local plist = get_pattern_list(pattern, case_insensitive)
local res = {}
for page = 1, doc:getPageCount() do
local text_boxes = doc:getPageTextBoxes(page)
if text_boxes then
for indices in all_matches(text_boxes, plist, case_insensitive) do -- each found pattern in the page
local res_item = { -- item of the Menu item_table
text = nil,
mandatory = page,
boxes = {}, -- to draw temp highlight in onMenuSelect
}
local text = {}
local i_prev, j_prev, i_next, j_next
for ind, index in ipairs(indices) do -- each word in the pattern
local i, j = unpack(index)
local word = text_boxes[i][j]
res_item.boxes[ind] = {
x = word.x0, y = word.y0,
w = word.x1 - word.x0,
h = word.y1 - word.y0,
}
text[ind] = word.word
if ind == 1 then
i_prev, j_prev = i, j
end
if ind == #indices then
i_next, j_next = i, j
end
end
text = "" .. table.concat(text, " ") .. ""
local prev_text = get_prev_text(text_boxes, i_prev, j_prev, nb_context_words)
if prev_text then
text = prev_text .. text
end
local next_text = get_next_text(text_boxes, i_next, j_next, nb_context_words)
if next_text then
text = text .. next_text
end
res_item.text = text
table.insert(res, res_item)
if #res == max_hits then
return res
end
end
end
end
if #res > 0 then
return res
end
end
--[[--
Log reflow duration.
--]]

@ -337,8 +337,12 @@ function PdfDocument:getCoverPageImage()
return self.koptinterface:getCoverPageImage(self)
end
function PdfDocument:findText(pattern, origin, reverse, caseInsensitive, page)
return self.koptinterface:findText(self, pattern, origin, reverse, caseInsensitive, page)
function PdfDocument:findText(pattern, origin, reverse, case_insensitive, page)
return self.koptinterface:findText(self, pattern, origin, reverse, case_insensitive, page)
end
function PdfDocument:findAllText(pattern, case_insensitive, nb_context_words, max_hits)
return self.koptinterface:findAllText(self, pattern, case_insensitive, nb_context_words, max_hits)
end
function PdfDocument:renderPage(pageno, rect, zoom, rotation, gamma, render_mode, hinting)

@ -217,6 +217,8 @@ local order = {
"dictionary_settings",
"wikipedia_settings",
"translation_settings",
"----------------------------",
"fulltext_search_settings",
},
filemanager = {},
main = {

Loading…
Cancel
Save