Vocabulary builder: support extracting context from pdfs (#9622)

Move getSelectedWordContext(), now document specific,
from ReaderHighlight into each document module.
reviewable/pr9680/r1
weijiuqiao 2 years ago committed by GitHub
parent 58613d66e7
commit edf7cc9a61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1282,26 +1282,12 @@ dbg:guard(ReaderHighlight, "lookup",
end) end)
function ReaderHighlight:getSelectedWordContext(nb_words) function ReaderHighlight:getSelectedWordContext(nb_words)
if not self.ui.rolling or not self.selected_text then return nil end if not self.selected_text then return end
local pos_start = self.selected_text.pos0 local ok, prev_context, next_context = pcall(self.ui.document.getSelectedWordContext, self.ui.document,
local pos_end = self.selected_text.pos1 self.selected_text.text, nb_words, self.selected_text.pos0, self.selected_text.pos1)
if ok then
for i=0, nb_words do return prev_context, next_context
local ok, start = pcall(self.ui.document.getPrevVisibleWordStart, self.ui.document, pos_start)
if ok then pos_start = start
else break end
end
for i=0, nb_words do
local ok, ending = pcall(self.ui.document.getNextVisibleWordEnd, self.ui.document, pos_end)
if ok then pos_end = ending
else break end
end end
local ok_prev, prev = pcall(self.ui.document.getTextFromXPointers, self.ui.document, pos_start, self.selected_text.pos0)
local ok_next, next = pcall(self.ui.document.getTextFromXPointers, self.ui.document, self.selected_text.pos1, pos_end)
return ok_prev and prev, ok_next and next
end end
function ReaderHighlight:viewSelectionHTML(debug_view, no_css_files_buttons) function ReaderHighlight:viewSelectionHTML(debug_view, no_css_files_buttons)

@ -716,6 +716,28 @@ function CreDocument:getNextVisibleChar(xp)
return self._document:getNextVisibleChar(xp) return self._document:getNextVisibleChar(xp)
end end
function CreDocument:getSelectedWordContext(word, nb_words, pos0, pos1)
local pos_start = pos0
local pos_end = pos1
for i=0, nb_words do
local start = self:getPrevVisibleWordStart(pos_start)
if start then pos_start = start
else break end
end
for i=0, nb_words do
local ending = self:getNextVisibleWordEnd(pos_end)
if ending then pos_end = ending
else break end
end
local prev = self:getTextFromXPointers(pos_start, pos0)
local next = self:getTextFromXPointers(pos1, pos_end)
return prev, next
end
function CreDocument:drawCurrentView(target, x, y, rect, pos) function CreDocument:drawCurrentView(target, x, y, rect, pos)
if self.buffer and (self.buffer.w ~= rect.w or self.buffer.h ~= rect.h) then if self.buffer and (self.buffer.w ~= rect.w or self.buffer.h ~= rect.h) then
self.buffer:free() self.buffer:free()

@ -96,6 +96,10 @@ function DjvuDocument:nativeToPageRectTransform(pageno, rect)
return self.koptinterface:nativeToPageRectTransform(self, pageno, rect) return self.koptinterface:nativeToPageRectTransform(self, pageno, rect)
end end
function DjvuDocument:getSelectedWordContext(word, nb_words, pos)
return self.koptinterface:getSelectedWordContext(word, nb_words, pos)
end
function DjvuDocument:getOCRWord(pageno, wbox) function DjvuDocument:getOCRWord(pageno, wbox)
return self.koptinterface:getOCRWord(self, pageno, wbox) return self.koptinterface:getOCRWord(self, pageno, wbox)
end end

@ -1037,6 +1037,7 @@ Get word and word box from `doc` position.
function KoptInterface:getWordFromPosition(doc, pos) function KoptInterface:getWordFromPosition(doc, pos)
local text_boxes = self:getTextBoxes(doc, pos.page) local text_boxes = self:getTextBoxes(doc, pos.page)
if text_boxes then if text_boxes then
self.last_text_boxes = text_boxes
if doc.configurable.text_wrap == 1 then if doc.configurable.text_wrap == 1 then
return self:getWordFromReflowPosition(doc, text_boxes, pos) return self:getWordFromReflowPosition(doc, text_boxes, pos)
else else
@ -1094,6 +1095,51 @@ function KoptInterface:getWordFromNativePosition(doc, boxes, pos)
return word_box return word_box
end end
function KoptInterface:getSelectedWordContext(word, nb_words, pos)
local boxes = self.last_text_boxes
if not pos or not boxes or #boxes == 0 then return end
local i, j = getWordBoxIndices(boxes, pos)
if boxes[i][j].word ~= word then return end
local li, wi = i, j
local prev_count, next_count = 0, 0
local prev_text, next_text = {}, {}
while prev_count < nb_words do
if li == 1 and wi == 1 then
break
elseif wi == 1 then
li = li - 1
wi = #boxes[li]
else
wi = wi - 1
end
local current_word = boxes[li][wi].word
if #current_word > 0 then
table.insert(prev_text, 1, current_word)
prev_count = prev_count + 1
end
end
li, wi = i, j
while next_count < nb_words do
if li == #boxes and wi == #boxes[li] then
break
elseif wi == #boxes[li] then
li = li + 1
wi = 1
else
wi = wi + 1
end
local current_word = boxes[li][wi].word
if #current_word > 0 then
table.insert(next_text, current_word)
next_count = next_count + 1
end
end
if #prev_text == 0 and #next_text == 0 then return end
return table.concat(prev_text, " "), table.concat(next_text, " ")
end
--[[-- --[[--
Get link from position in screen page. Get link from position in screen page.
]]-- ]]--

@ -129,6 +129,10 @@ function PdfDocument:nativeToPageRectTransform(pageno, rect)
return self.koptinterface:nativeToPageRectTransform(self, pageno, rect) return self.koptinterface:nativeToPageRectTransform(self, pageno, rect)
end end
function PdfDocument:getSelectedWordContext(word, nb_words, pos)
return self.koptinterface:getSelectedWordContext(word, nb_words, pos)
end
function PdfDocument:getOCRWord(pageno, wbox) function PdfDocument:getOCRWord(pageno, wbox)
return self.koptinterface:getOCRWord(self, pageno, wbox) return self.koptinterface:getOCRWord(self, pageno, wbox)
end end

@ -517,7 +517,7 @@ function WordInfoDialog:init()
VerticalSpan:new{width= Size.padding.default}, VerticalSpan:new{width= Size.padding.default},
has_context and has_context and
TextBoxWidget:new{ TextBoxWidget:new{
text = "..." .. self.prev_context:gsub("\n", " ") .. "" ..self.title.."" .. self.next_context:gsub("\n", " ") .. "...", text = "..." .. (self.prev_context or ""):gsub("\n", " ") .. "" ..self.title.."" .. (self.next_context or ""):gsub("\n", " ") .. "...",
width = width, width = width,
face = Font:getFace("smallffont"), face = Font:getFace("smallffont"),
alignment = self.title_align or "left", alignment = self.title_align or "left",

Loading…
Cancel
Save