add Document:fastDigest method to calculate document hash without performance overhead

pull/1441/head
chrox 9 years ago
parent 07ff30f89c
commit 5c9a9198fb

@ -99,6 +99,36 @@ function Document:discardChange()
self.is_edited = false
end
-- calculate partial digest of the document
-- since only PDF documents could be modified by koreader by appending data
-- at the end of the files when highlighting, we use a non-even sampling
-- algorithm which samples with larger weight at file head and much smaller
-- weight at file tail, thus reduces the probability that appended data may change
-- the digest value.
-- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144
-- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data
-- by highlighting in koreader may change the digest value.
function Document:fastDigest()
local md5 = require("MD5")
local lshift = bit.lshift
local file = io.open(self.file, 'rb')
if file then
local step, size = 1024, 1024
md5:new()
for i = -1, 10 do
file:seek("set", lshift(step, 2*i))
local sample = file:read(size)
if sample then
md5:update(sample)
else
break
end
end
file:close()
return md5:sum()
end
end
-- this might be overridden by a document implementation
function Document:getNativePageDimensions(pageno)
local hash = "pgdim|"..self.file.."|"..pageno

@ -28,6 +28,9 @@ describe("PDF document module", function()
local clip1 = doc:clipPagePNGString(pos0, pos1, pboxes, "lighten")
assert.truthy(clip1)
end)
it("should calculate fast digest", function()
assert.is_equal(doc:fastDigest(), "41cce710f34e5ec21315e19c99821415")
end)
it("should close document", function()
doc:close()
end)
@ -45,6 +48,9 @@ describe("EPUB document module", function()
assert.are.same(image:getWidth(), 442)
assert.are.same(image:getHeight(), 616)
end)
it("should calculate fast digest", function()
assert.is_equal(doc:fastDigest(), "59d481d168cca6267322f150c5f6a2a3")
end)
it("should close document", function()
doc:close()
end)

Loading…
Cancel
Save