diff --git a/base b/base index e9f64794e..9aa76dc81 160000 --- a/base +++ b/base @@ -1 +1 @@ -Subproject commit e9f64794e04e22d6cea4bf4f84fa215fc396455c +Subproject commit 9aa76dc818c7d874e9dd09a903722915be63af09 diff --git a/frontend/MD5.lua b/frontend/MD5.lua index 26c94759a..c8d73ea1f 100644 --- a/frontend/MD5.lua +++ b/frontend/MD5.lua @@ -208,14 +208,25 @@ local function bin2str(output, input, len) end end -local function md5(luastr) +local md5 = {} + +function md5:new() + self.ctx = ffi.new("MD5_CTX") + MD5Init(self.ctx) +end + +function md5:update(luastr) + MD5Update(self.ctx, ffi.cast("const char*", luastr), #luastr) +end + +function md5:sum(luastr) local buf = ffi.new("char[33]") local hash = ffi.new("uint8_t[16]") - local ctx = ffi.new("MD5_CTX") - - MD5Init(ctx) - MD5Update(ctx, ffi.cast("const char*", luastr), #luastr) - MD5Final(hash, ctx) + if luastr then + md5:new() + md5:update(luastr) + end + MD5Final(hash, self.ctx) bin2str(buf, hash, ffi.sizeof(hash)) diff --git a/frontend/cache.lua b/frontend/cache.lua index fda092f8e..5f9b92431 100644 --- a/frontend/cache.lua +++ b/frontend/cache.lua @@ -122,7 +122,7 @@ function Cache:check(key, ItemClass) end return self.cache[key] elseif ItemClass then - local cached = self.cached[md5(key)] + local cached = self.cached[md5:sum(key)] if cached then local item = ItemClass:new{} local ok, msg = pcall(item.load, item, cached) @@ -159,7 +159,7 @@ function Cache:serialize() -- only dump cache item that requests serialization explicitly if cache_item.persistent and cache_item.dump then DEBUG("dump cache item", key) - cache_size = cache_item:dump(cache_path..md5(key)) or 0 + cache_size = cache_item:dump(cache_path..md5:sum(key)) or 0 if cache_size > 0 then break end end end diff --git a/frontend/document/document.lua b/frontend/document/document.lua index a0c5cd2c7..32e049bbc 100644 --- a/frontend/document/document.lua +++ b/frontend/document/document.lua @@ -99,6 +99,36 @@ function Document:discardChange() self.is_edited = false end +-- calculate partial digest of the document +-- since only PDF documents could be modified by koreader by appending data +-- at the end of the files when highlighting, we use a non-even sampling +-- algorithm which samples with larger weight at file head and much smaller +-- weight at file tail, thus reduces the probability that appended data may change +-- the digest value. +-- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144 +-- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data +-- by highlighting in koreader may change the digest value. +function Document:fastDigest() + local md5 = require("MD5") + local lshift = bit.lshift + local file = io.open(self.file, 'rb') + if file then + local step, size = 1024, 1024 + md5:new() + for i = -1, 10 do + file:seek("set", lshift(step, 2*i)) + local sample = file:read(size) + if sample then + md5:update(sample) + else + break + end + end + file:close() + return md5:sum() + end +end + -- this might be overridden by a document implementation function Document:getNativePageDimensions(pageno) local hash = "pgdim|"..self.file.."|"..pageno diff --git a/reader.lua b/reader.lua index 0c703ed6e..7864b68fd 100755 --- a/reader.lua +++ b/reader.lua @@ -1,18 +1,28 @@ #!./luajit +-- load default settings require "defaults" pcall(dofile, "defaults.persistent.lua") -package.path = "?.lua;common/?.lua;frontend/?.lua" -package.cpath = "?.so;common/?.so;common/?.dll;/usr/lib/lua/?.so" +-- set search path for 'require()' +package.path = "common/?.lua;frontend/?.lua;" .. package.path +package.cpath = "common/?.so;common/?.dll;/usr/lib/lua/?.so;" .. package.cpath + +-- set search path for 'ffi.load()' local ffi = require("ffi") +local util = require("ffi/util") +ffi.cdef[[ + char *getenv(const char *name); + int putenv(const char *envvar); + int _putenv(const char *envvar); +]] if ffi.os == "Windows" then - ffi.cdef[[ - int _putenv(const char *envvar); - ]] ffi.C._putenv("PATH=libs;common;") - --ffi.C._putenv("EMULATE_READER_W=480") - --ffi.C._putenv("EMULATE_READER_H=600") +else + ffi.C.putenv("LD_LIBRARY_PATH=" + .. util.realpath("libs") .. ":" + .. util.realpath("common") ..":" + .. ffi.string(ffi.C.getenv("LD_LIBRARY_PATH"))) end local DocSettings = require("docsettings") diff --git a/spec/unit/document_spec.lua b/spec/unit/document_spec.lua index b2b0383e2..4338b7a91 100644 --- a/spec/unit/document_spec.lua +++ b/spec/unit/document_spec.lua @@ -28,6 +28,9 @@ describe("PDF document module", function() local clip1 = doc:clipPagePNGString(pos0, pos1, pboxes, "lighten") assert.truthy(clip1) end) + it("should calculate fast digest", function() + assert.is_equal(doc:fastDigest(), "41cce710f34e5ec21315e19c99821415") + end) it("should close document", function() doc:close() end) @@ -45,6 +48,9 @@ describe("EPUB document module", function() assert.are.same(image:getWidth(), 442) assert.are.same(image:getHeight(), 616) end) + it("should calculate fast digest", function() + assert.is_equal(doc:fastDigest(), "59d481d168cca6267322f150c5f6a2a3") + end) it("should close document", function() doc:close() end) diff --git a/spec/unit/md5_spec.lua b/spec/unit/md5_spec.lua index fb59b864a..4efbe212a 100644 --- a/spec/unit/md5_spec.lua +++ b/spec/unit/md5_spec.lua @@ -4,9 +4,16 @@ local md5 = require("MD5") describe("MD5 module", function() it("should calculate correct MD5 hashes", function() - assert.is_equal(md5(""), "d41d8cd98f00b204e9800998ecf8427e") - assert.is_equal(md5("\0"), "93b885adfe0da089cdf634904fd59f71") - assert.is_equal(md5("0123456789abcdefX"), "1b05aba914a8b12315c7ee52b42f3d35") + assert.is_equal(md5:sum(""), "d41d8cd98f00b204e9800998ecf8427e") + assert.is_equal(md5:sum("\0"), "93b885adfe0da089cdf634904fd59f71") + assert.is_equal(md5:sum("0123456789abcdefX"), "1b05aba914a8b12315c7ee52b42f3d35") + end) + it("should calculate MD5 sum by updating", function() + md5:new() + md5:update("0123456789") + md5:update("abcdefghij") + local md5sum = md5:sum() + assert.is_equal(md5sum, md5:sum("0123456789abcdefghij")) end) end)