From 5c9a9198fbafb066f495ef2ef13570863df2be19 Mon Sep 17 00:00:00 2001 From: chrox Date: Thu, 19 Feb 2015 22:15:31 +0800 Subject: [PATCH] add Document:fastDigest method to calculate document hash without performance overhead --- frontend/document/document.lua | 30 ++++++++++++++++++++++++++++++ spec/unit/document_spec.lua | 6 ++++++ 2 files changed, 36 insertions(+) diff --git a/frontend/document/document.lua b/frontend/document/document.lua index a0c5cd2c7..32e049bbc 100644 --- a/frontend/document/document.lua +++ b/frontend/document/document.lua @@ -99,6 +99,36 @@ function Document:discardChange() self.is_edited = false end +-- calculate partial digest of the document +-- since only PDF documents could be modified by koreader by appending data +-- at the end of the files when highlighting, we use a non-even sampling +-- algorithm which samples with larger weight at file head and much smaller +-- weight at file tail, thus reduces the probability that appended data may change +-- the digest value. +-- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144 +-- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data +-- by highlighting in koreader may change the digest value. +function Document:fastDigest() + local md5 = require("MD5") + local lshift = bit.lshift + local file = io.open(self.file, 'rb') + if file then + local step, size = 1024, 1024 + md5:new() + for i = -1, 10 do + file:seek("set", lshift(step, 2*i)) + local sample = file:read(size) + if sample then + md5:update(sample) + else + break + end + end + file:close() + return md5:sum() + end +end + -- this might be overridden by a document implementation function Document:getNativePageDimensions(pageno) local hash = "pgdim|"..self.file.."|"..pageno diff --git a/spec/unit/document_spec.lua b/spec/unit/document_spec.lua index b2b0383e2..4338b7a91 100644 --- a/spec/unit/document_spec.lua +++ b/spec/unit/document_spec.lua @@ -28,6 +28,9 @@ describe("PDF document module", function() local clip1 = doc:clipPagePNGString(pos0, pos1, pboxes, "lighten") assert.truthy(clip1) end) + it("should calculate fast digest", function() + assert.is_equal(doc:fastDigest(), "41cce710f34e5ec21315e19c99821415") + end) it("should close document", function() doc:close() end) @@ -45,6 +48,9 @@ describe("EPUB document module", function() assert.are.same(image:getWidth(), 442) assert.are.same(image:getHeight(), 616) end) + it("should calculate fast digest", function() + assert.is_equal(doc:fastDigest(), "59d481d168cca6267322f150c5f6a2a3") + end) it("should close document", function() doc:close() end)