highlight word from scratch instead of reusing rectmaps in reflowing mode

Totally revert the OCR in reflowed page to build 545.
And this should fix #309.
pull/340/head
chrox 11 years ago
parent 90d55cbde4
commit 184a6f5d21

@ -338,9 +338,8 @@ function KoptInterface:getTextBoxes(doc, pageno)
else
if doc.configurable.text_wrap == 1 then
return self:getNativeTextBoxes(doc, pageno)
--return self:getTextBoxesFromScratch(doc, pageno)
else
return self:getTextBoxesFromScratch(doc, pageno)
return self:getNativeTextBoxesFromScratch(doc, pageno)
end
end
end
@ -393,12 +392,39 @@ function KoptInterface:getNativeTextBoxes(doc, pageno)
end
end
--[[
get text boxes in reflowed page via optical method,
i.e. OCR pre-processing in Tesseract and Leptonica.
--]]
function KoptInterface:getReflowedTextBoxesFromScratch(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "scratchrfpgboxes|"..context_hash
local cached = Cache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash)
if cached then
local reflowed_kc = self:waitForContext(cached.kctx)
local fullwidth, fullheight = reflowed_kc:getPageDim()
local kc = self:createContext(doc, pageno)
kc:copyDestBMP(reflowed_kc)
local boxes = kc:getNativeWordBoxes(0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes })
kc:free()
return boxes
end
else
return cached.scratchrfpgboxes
end
end
--[[
get text boxes in native page via optical method,
i.e. OCR pre-processing in Tesseract and Leptonica.
--]]
function KoptInterface:getTextBoxesFromScratch(doc, pageno)
local hash = "pgboxes|"..doc.file.."|"..pageno
function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno)
local hash = "scratchnativepgboxes|"..doc.file.."|"..pageno
local cached = Cache:check(hash)
if not cached then
local page_size = Document.getNativePageDimensions(doc, pageno)
@ -412,12 +438,12 @@ function KoptInterface:getTextBoxesFromScratch(doc, pageno)
local page = doc._document:openPage(pageno)
page:getPagePix(kc)
local boxes = kc:getNativeWordBoxes(0, 0, page_size.w, page_size.h)
Cache:insert(hash, CacheItem:new{ pgboxes = boxes })
Cache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes })
page:close()
kc:free()
return boxes
else
return cached.pgboxes
return cached.scratchnativepgboxes
end
end
@ -643,7 +669,7 @@ get word and word box from position in reflowed page
]]--
function KoptInterface:getWordFromReflowPosition(doc, boxes, pos)
local pageno = pos.page
local reflowed_page_boxes = self:getReflowedTextBoxes(doc, pageno)
local reflowed_page_boxes = self:getReflowedTextBoxesFromScratch(doc, pageno)
local reflowed_word_box = self:getWordFromBoxes(reflowed_page_boxes, pos)
local reflowed_pos = reflowed_word_box.box:center()
local native_pos = self:reflowToNativePosTransform(doc, pageno, reflowed_pos)

@ -1 +1 @@
Subproject commit 669fb3dad4738f8003ddb4d9bb8da16e95f7ef25
Subproject commit a2b327dfe1613734eb3600f130be941fb7a1e717
Loading…
Cancel
Save