diff --git a/datastorage.lua b/datastorage.lua index 42c73c5bd..eda3b160f 100644 --- a/datastorage.lua +++ b/datastorage.lua @@ -53,6 +53,10 @@ function DataStorage:getDocSettingsDir() return self:getDataDir() .. "/docsettings" end +function DataStorage:getDocSettingsHashDir() + return self:getDataDir() .. "/hashdocsettings" +end + function DataStorage:getFullDataDir() if full_data_dir then return full_data_dir end diff --git a/frontend/docsettings.lua b/frontend/docsettings.lua index 023d9924d..4c18d6995 100644 --- a/frontend/docsettings.lua +++ b/frontend/docsettings.lua @@ -16,8 +16,24 @@ local DocSettings = LuaSettings:extend{} local HISTORY_DIR = DataStorage:getHistoryDir() local DOCSETTINGS_DIR = DataStorage:getDocSettingsDir() +local DOCSETTINGS_HASH_DIR = DataStorage:getDocSettingsHashDir() local custom_metadata_filename = "custom_metadata.lua" +local is_hash_location_enabled +local hash_path_cache = {} + +function DocSettings.isHashLocationEnabled() + if is_hash_location_enabled == nil then + is_hash_location_enabled = lfs.attributes(DOCSETTINGS_HASH_DIR, "mode") == "directory" + end + return is_hash_location_enabled +end + +function DocSettings.setIsHashLocationEnabled(value) + is_hash_location_enabled = value +end + + local function buildCandidates(list) local candidates = {} local previous_entry_exists = false @@ -68,15 +84,31 @@ end --- Returns path to sidecar directory (`filename.sdr`). -- Sidecar directory is the file without _last_ suffix. -- @string doc_path path to the document (e.g., `/foo/bar.pdf`) +-- @string force_location prefer e.g., "hash" or "dir" location over standard "doc", if available -- @treturn string path to the sidecar directory (e.g., `/foo/bar.sdr`) function DocSettings:getSidecarDir(doc_path, force_location) if doc_path == nil or doc_path == "" then return "" end local path = doc_path:match("(.*)%.") or doc_path -- file path without the last suffix local location = force_location or G_reader_settings:readSetting("document_metadata_folder", "doc") if location == "dir" then - path = DOCSETTINGS_DIR..path + path = DOCSETTINGS_DIR .. path + elseif location == "hash" then + local hsh = hash_path_cache[doc_path] + if not hsh then + local file = io.open(doc_path, 'rb') + if not file then return path .. ".sdr" end + hsh = util.partialMD5(file) + file:close() + hash_path_cache[doc_path] = hsh + logger.dbg("DocSettings: Caching new partial MD5 hash for", doc_path, "as", hsh) + else + logger.dbg("DocSettings: Using cached partial MD5 hash for", doc_path, "as", hsh) + end + -- converts b3fb8f4f8448160365087d6ca05c7fa2 to b3/ to avoid too many files in one dir + local subpath = string.format("/%s/", hsh:sub(1, 2)) + path = DOCSETTINGS_HASH_DIR .. subpath .. hsh end - return path..".sdr" + return path .. ".sdr" end --- Returns path to `metadata.lua` file. @@ -110,6 +142,13 @@ function DocSettings:getDocSidecarFile(doc_path, no_legacy) if lfs.attributes(sidecar_file, "mode") == "file" then return sidecar_file end + -- Calculate partial hash and check for hash-based files only if there are files to check + if DocSettings.isHashLocationEnabled() then + sidecar_file = self:getSidecarFile(doc_path, "hash") + if lfs.attributes(sidecar_file, "mode") == "file" then + return sidecar_file + end + end if not no_legacy then sidecar_file = self:getHistoryPath(doc_path) if lfs.attributes(sidecar_file, "mode") == "file" then @@ -154,6 +193,20 @@ function DocSettings:getFileFromHistory(hist_name) end end +--- Returns the directory and full filepath of a hash-ID-based sidecar metadata store +-- @string doc_path path to the document (e.g., `/foo/bar.pdf`) +function DocSettings:getSidecarHashDirAndFilepath(doc_path) + -- Getting PDF ID from trailer via mupdf has not been implemented - everything uses partial MD5 + local path = self:getSidecarDir(doc_path, "hash") + local filetype = doc_path:match(".+%.(%w+)$") + if not filetype or filetype == "" then + return "", "" + end + local hash_file = "metadata." .. filetype .. ".lua" + local hash_filepath = path .. "/" .. hash_file + return path, hash_filepath +end + --- Opens a document's individual settings (font, margin, dictionary, etc.) -- @string doc_path path to the document (e.g., `/foo/bar.pdf`) -- @treturn DocSettings object @@ -166,7 +219,7 @@ function DocSettings:open(doc_path) local doc_sidecar_file, legacy_sidecar_file if lfs.attributes(new.doc_sidecar_dir, "mode") == "directory" then doc_sidecar_file = new.doc_sidecar_file - legacy_sidecar_file = new.doc_sidecar_dir.."/"..ffiutil.basename(doc_path)..".lua" + legacy_sidecar_file = new.doc_sidecar_dir .. "/" .. ffiutil.basename(doc_path) .. ".lua" end new.dir_sidecar_dir = new:getSidecarDir(doc_path, "dir") new.dir_sidecar_file = new:getSidecarFile(doc_path, "dir") @@ -176,24 +229,36 @@ function DocSettings:open(doc_path) end local history_file = new:getHistoryPath(doc_path) + local hash_sidecar_dir, hash_sidecar_file + if DocSettings.isHashLocationEnabled() then + hash_sidecar_dir, hash_sidecar_file = + new:getSidecarHashDirAndFilepath(doc_path) + new.hash_sidecar_dir = hash_sidecar_dir + new.hash_sidecar_file = hash_sidecar_file + end + -- Candidates list, in order of priority: local candidates_list = { -- New sidecar file in doc folder doc_sidecar_file or "", -- Backup file of new sidecar file in doc folder - doc_sidecar_file and (doc_sidecar_file..".old") or "", + doc_sidecar_file and (doc_sidecar_file .. ".old") or "", -- Legacy sidecar file legacy_sidecar_file or "", -- New sidecar file in docsettings folder dir_sidecar_file or "", -- Backup file of new sidecar file in docsettings folder - dir_sidecar_file and (dir_sidecar_file..".old") or "", + dir_sidecar_file and (dir_sidecar_file .. ".old") or "", + -- Hash or PDF fingerprint-based sidecar file lookup + hash_sidecar_file or "", + -- Backup file of hash or PDF fingerprint-based sidecar file lookup + hash_sidecar_file and (new.hash_sidecar_file .. ".old") or "", -- Legacy history folder history_file, -- Backup file in legacy history folder - history_file..".old", + history_file .. ".old", -- Legacy kpdfview setting - doc_path..".kpdfview.lua", + doc_path .. ".kpdfview.lua", } -- We get back an array of tables for *existing* candidates, sorted MRU first (insertion order breaks ties). local candidates = buildCandidates(candidates_list) @@ -237,10 +302,20 @@ end function DocSettings:flush(data, no_custom_metadata) -- Depending on the settings, doc_settings are saved to the book folder or -- to koreader/docsettings folder. The latter is also a fallback for read-only book storage. - local serials = G_reader_settings:readSetting("document_metadata_folder", "doc") == "doc" - and { {self.doc_sidecar_dir, self.doc_sidecar_file}, + local serials + local preferred_metdata_storage = G_reader_settings:readSetting("document_metadata_folder", "doc") + if preferred_metdata_storage == "doc" then + serials = { {self.doc_sidecar_dir, self.doc_sidecar_file}, {self.dir_sidecar_dir, self.dir_sidecar_file}, } - or { {self.dir_sidecar_dir, self.dir_sidecar_file}, } + elseif preferred_metdata_storage == "dir" then + serials = { {self.dir_sidecar_dir, self.dir_sidecar_file}, } + elseif preferred_metdata_storage == "hash" then + if self.hash_sidecar_dir == nil or self.hash_sidecar_file == nil then + self.hash_sidecar_dir, self.hash_sidecar_file = + self:getSidecarHashDirAndFilepath(self.data.doc_path) + end + serials = { {self.hash_sidecar_dir, self.hash_sidecar_file } } + end local s_out = dump(data or self.data, nil, true) for _, s in ipairs(serials) do @@ -321,7 +396,7 @@ function DocSettings:purge(sidecar_to_keep, data_to_purge) local candidate_path = t.path if lfs.attributes(candidate_path, "mode") == "file" then if (not sidecar_to_keep) - or (candidate_path ~= sidecar_to_keep and candidate_path ~= sidecar_to_keep..".old") then + or (candidate_path ~= sidecar_to_keep and candidate_path ~= sidecar_to_keep .. ".old") then os.remove(candidate_path) logger.dbg("DocSettings: purged:", candidate_path) end @@ -345,7 +420,11 @@ function DocSettings:purge(sidecar_to_keep, data_to_purge) if lfs.attributes(self.dir_sidecar_dir, "mode") == "directory" then util.removePath(self.dir_sidecar_dir) -- remove empty parent folders end + if self.hash_sidecar_dir and lfs.attributes(self.hash_sidecar_dir, "mode") == "directory" then + util.removePath(self.hash_sidecar_dir) -- remove empty parent folders + end end + DocSettings.setIsHashLocationEnabled(nil) -- reset this in case last hash book is purged end --- Removes empty sidecar dir. @@ -359,51 +438,61 @@ end --- Updates sdr location for file rename/copy/move/delete operations. function DocSettings:updateLocation(doc_path, new_doc_path, copy) - local doc_settings, new_sidecar_dir - - -- update metadata - if DocSettings:hasSidecarFile(doc_path) then - doc_settings = DocSettings:open(doc_path) - if new_doc_path then - local new_doc_settings = DocSettings:open(new_doc_path) - -- save doc settings to the new location, no custom metadata yet - new_sidecar_dir = new_doc_settings:flush(doc_settings.data, true) - else + local doc_settings, new_sidecar_dir, cover_file + if G_reader_settings:readSetting("document_metadata_folder") == "hash" then + -- none of these operations (except delete) changes the hash -> no location change + if not new_doc_path then + doc_settings = DocSettings:open(doc_path) local cache_file_path = doc_settings:readSetting("cache_file_path") - if cache_file_path then - os.remove(cache_file_path) + if cache_file_path then os.remove(cache_file_path) end + cover_file = doc_settings:getCoverFile() + doc_settings:purge() + end + else + -- update metadata + if DocSettings:hasSidecarFile(doc_path) then + doc_settings = DocSettings:open(doc_path) + if new_doc_path then + local new_doc_settings = DocSettings:open(new_doc_path) + -- save doc settings to the new location, no custom metadata yet + new_sidecar_dir = new_doc_settings:flush(doc_settings.data, true) + else + local cache_file_path = doc_settings:readSetting("cache_file_path") + if cache_file_path then + os.remove(cache_file_path) + end end end - end - -- update custom metadata - if not doc_settings then - doc_settings = DocSettings:open(doc_path) - end - local cover_file = doc_settings:getCoverFile() - if new_doc_path then - -- custom cover - if cover_file then - if not new_sidecar_dir then - new_sidecar_dir = DocSettings:getSidecarDir(new_doc_path) - util.makePath(new_sidecar_dir) - end - local _, filename = util.splitFilePathName(cover_file) - ffiutil.copyFile(cover_file, new_sidecar_dir .. "/" .. filename) + -- update custom metadata + if not doc_settings then + doc_settings = DocSettings:open(doc_path) end - -- custom metadata - local metadata_file = self:getCustomMetadataFile(doc_path) - if metadata_file then - if not new_sidecar_dir then - new_sidecar_dir = DocSettings:getSidecarDir(new_doc_path) - util.makePath(new_sidecar_dir) + cover_file = doc_settings:getCoverFile() + if new_doc_path then + -- custom cover + if cover_file then + if not new_sidecar_dir then + new_sidecar_dir = DocSettings:getSidecarDir(new_doc_path) + util.makePath(new_sidecar_dir) + end + local _, filename = util.splitFilePathName(cover_file) + ffiutil.copyFile(cover_file, new_sidecar_dir .. "/" .. filename) + end + -- custom metadata + local metadata_file = self:getCustomMetadataFile(doc_path) + if metadata_file then + if not new_sidecar_dir then + new_sidecar_dir = DocSettings:getSidecarDir(new_doc_path) + util.makePath(new_sidecar_dir) + end + ffiutil.copyFile(metadata_file, new_sidecar_dir .. "/" .. custom_metadata_filename) end - ffiutil.copyFile(metadata_file, new_sidecar_dir .. "/" .. custom_metadata_filename) end - end - if not copy then - doc_settings:purge() + if not copy then + doc_settings:purge() + end end if cover_file then -- after purge because purge uses cover file cache @@ -430,12 +519,18 @@ function DocSettings:findCoverFile(doc_path) local location = G_reader_settings:readSetting("document_metadata_folder", "doc") local sidecar_dir = self:getSidecarDir(doc_path, location) local cover_file = findCoverFileInDir(sidecar_dir) - if not cover_file then - location = location == "doc" and "dir" or "doc" - sidecar_dir = self:getSidecarDir(doc_path, location) - cover_file = findCoverFileInDir(sidecar_dir) + if cover_file then return cover_file end + local candidates = {"doc", "dir"} + if DocSettings.isHashLocationEnabled() then + table.insert(candidates, "hash") + end + for _, mode in ipairs(candidates) do + if mode ~= location then + sidecar_dir = self:getSidecarDir(doc_path, mode) + cover_file = findCoverFileInDir(sidecar_dir) + if cover_file then return cover_file end + end end - return cover_file end function DocSettings:getCoverFile(reset_cache) @@ -457,9 +552,13 @@ function DocSettings:getCustomCandidateSidecarDirs(doc_path) end -- new book, create sidecar dir in accordance with sdr location setting local dir_sidecar_dir = self:getSidecarDir(doc_path, "dir") - if G_reader_settings:readSetting("document_metadata_folder", "doc") == "doc" then + local preferred_metadata_storage = G_reader_settings:readSetting("document_metadata_folder", "doc") + if preferred_metadata_storage == "doc" then local doc_sidecar_dir = self:getSidecarDir(doc_path, "doc") return { doc_sidecar_dir, dir_sidecar_dir } -- fallback in case of readonly book storage + elseif preferred_metadata_storage == "hash" then + local hash_sidecar_dir = self:getSidecarDir(doc_path, "hash") + return { hash_sidecar_dir } end return { dir_sidecar_dir } end @@ -481,7 +580,12 @@ end --- Returns path to book custom metadata file if it exists, or nil. function DocSettings:getCustomMetadataFile(doc_path) doc_path = doc_path or self.data.doc_path - for _, mode in ipairs({"doc", "dir"}) do + + local candidates = {"doc", "dir"} + if DocSettings.isHashLocationEnabled() then + table.insert(candidates, "hash") + end + for _, mode in ipairs(candidates) do local file = self:getSidecarDir(doc_path, mode) .. "/" .. custom_metadata_filename if lfs.attributes(file, "mode") == "file" then return file @@ -527,4 +631,79 @@ function DocSettings:flushCustomMetadata(doc_path) end end +-- hash-based SDR storage +local function getSdrsInDir(path) + -- Get all the metadata.filetype.lua files under directory path. + -- Derived from readerdictionary.getIfosInDir() + local sdrs = {} + local ok, iter, dir_obj = pcall(lfs.dir, path) + if ok then + for name in iter, dir_obj do + if name ~= "." and name ~= ".." then + local fullpath = path .. "/" .. name + local attributes = lfs.attributes(fullpath) + if attributes ~= nil then + if attributes.mode == "directory" then + local dirifos = getSdrsInDir(fullpath) -- recurse + for _, ifo in pairs(dirifos) do + table.insert(sdrs, ifo) + end + elseif name:match("metadata%..+%.lua$") then + table.insert(sdrs, fullpath) + end + end + end + end + end + return sdrs +end + +function DocSettings.getHashDirSdrInfos() + local sdrs = getSdrsInDir(DOCSETTINGS_HASH_DIR) + local title_author_strs = {} + for _, sdr in ipairs(sdrs) do + -- Ignore empty files + if lfs.attributes(sdr, "size") > 0 then + local ok, stored + ok, stored = pcall(dofile, sdr) + -- Ignore empty tables + if ok and next(stored) ~= nil then + local info_str, custom_authors + local sdr_path = sdr:sub(1, sdr:match(".*/()") - 1) -- SDR path + local custom_metadata_file = sdr_path .. custom_metadata_filename + if custom_metadata_file then + local custom = DocSettings:openCustomMetadata(custom_metadata_file) + local custom_props = custom:readSetting("custom_props") + if custom_props then + if custom_props.title then info_str = custom_props.title end + if custom_props.authors then custom_authors = custom_props.authors end + end + end + if not info_str then info_str = stored.doc_props.title end + if not info_str then info_str = "untitled document" end + if custom_authors then + info_str = info_str .. ", author: " .. custom_authors + elseif stored.doc_props.authors then + info_str = info_str .. ", author: " .. stored.doc_props.authors + end + if stored.stats then + if stored.stats.highlights > 0 then + info_str = info_str .. ", highlights: " .. stored.stats.highlights + end + if stored.stats.notes > 0 then + info_str = info_str .. ", notes: " .. stored.stats.notes + end + end + info_str = info_str .. ", path: " .. sdr:sub(sdr:find("/", 3) + 1) + table.insert(title_author_strs, info_str) + else + table.insert(title_author_strs, "error " .. sdr) + end + else + table.insert(title_author_strs, "zero-size file " .. sdr) + end + end + return title_author_strs +end + return DocSettings diff --git a/frontend/document/document.lua b/frontend/document/document.lua index 0858720ae..ea94ebb2f 100644 --- a/frontend/document/document.lua +++ b/frontend/document/document.lua @@ -9,6 +9,7 @@ local Math = require("optmath") local TileCacheItem = require("document/tilecacheitem") local lfs = require("libs/libkoreader-lfs") local logger = require("logger") +local util = require("util") --[[ This is an abstract interface to a document @@ -145,16 +146,6 @@ end -- calculate partial digest of the document and store in its docsettings to avoid document saving -- feature to change its checksum. --- --- To the calculating mechanism itself. --- since only PDF documents could be modified by KOReader by appending data --- at the end of the files when highlighting, we use a non-even sampling --- algorithm which samples with larger weight at file head and much smaller --- weight at file tail, thus reduces the probability that appended data may change --- the digest value. --- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144 --- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data --- by highlighting in KOReader may change the digest value. function Document:fastDigest(docsettings) if not self.file then return end local file = io.open(self.file, 'rb') @@ -167,21 +158,7 @@ function Document:fastDigest(docsettings) local result = docsettings:readSetting("partial_md5_checksum") if not result then logger.dbg("computing and storing partial_md5_checksum") - local bit = require("bit") - local md5 = require("ffi/sha2").md5 - local lshift = bit.lshift - local step, size = 1024, 1024 - local update = md5() - for i = -1, 10 do - file:seek("set", lshift(step, 2*i)) - local sample = file:read(size) - if sample then - update(sample) - else - break - end - end - result = update() + result = util.partialMD5(file) docsettings:saveSetting("partial_md5_checksum", result) end if tmp_docsettings then diff --git a/frontend/ui/elements/common_settings_menu_table.lua b/frontend/ui/elements/common_settings_menu_table.lua index 0a3e21661..eb18abcd8 100644 --- a/frontend/ui/elements/common_settings_menu_table.lua +++ b/frontend/ui/elements/common_settings_menu_table.lua @@ -1,3 +1,4 @@ +local DataStorage = require("datastorage") local DateTimeWidget = require("ui/widget/datetimewidget") local Device = require("device") local Event = require("ui/event") @@ -6,6 +7,7 @@ local Language = require("ui/language") local NetworkMgr = require("ui/network/manager") local PowerD = Device:getPowerDevice() local UIManager = require("ui/uimanager") +local DocSettings = require("docsettings") local _ = require("gettext") local N_ = _.ngettext local C_ = _.pgettext @@ -540,15 +542,23 @@ common_settings.document = { local metadata_folder_str = { ["doc"] = _("book folder"), - ["dir"] = "koreader/docsettings/", + ["dir"] = DataStorage:getDocSettingsDir(), + ["hash"] = DataStorage:getDocSettingsHashDir() } -local metadata_folder_help_text = _([[ -Book view settings, reading progress, highlights, bookmarks and notes (collectively known as metadata) are stored in a separate folder named .sdr (".sdr" meaning "sidecar"). +local metadata_folder_help_header = T(_([[Book view settings, reading progress, highlights, bookmarks and notes (collectively known as metadata) are stored in a separate folder named .sdr (".sdr" meaning "sidecar"). -You can decide between two locations where these will be saved: -- alongside the book file itself (the long time default): these sdr folders will be visible when you browse your library directories with another file browser or from your computer, which may clutter your vision of your library. But this allows you to move them along when you reorganize your library, and also survives any renaming of parent directories. Also, if you perform directory synchronization or backups, your settings will be part of them. -- all inside koreader/docsettings/: these sdr folders will only be visible and used by KOReader, and won't clutter your vision of your library directories with another file browser or from your computer. But any reorganisation of your library (directories or filename moves and renamings) may result in KOReader not finding your previous settings for these books. These settings won't be part of any synchronization or backups of your library.]]) +You can decide between three locations/methods where these will be saved:]])) +local metadata_folder_help_doc = T(_(" - alongside the book file itself (the long time default): sdr folders will be visible when you browse your library directories with another file browser or from your computer, which may clutter your vision of your library. But this allows you to move them along when you reorganize your library, and also survives any renaming of parent directories. Also, if you perform directory synchronization or backups, your settings will be part of them.")) +local metadata_folder_help_dir = T(_(" - all in %1: sdr folders will only be visible and used by KOReader, and won't clutter your vision of your library directories with another file browser or from your computer. But any reorganisation of your library (directories or filename moves and renamings) may result in KOReader not finding your previous settings for these books. These settings won't be part of any synchronization or backups of your library."), DataStorage:getDocSettingsDir()) +local metadata_folder_help_hash = T(_(" - all inside %1 as hashes: sdr folders are identified not by filepath/filename but by partial MD5 hash, allowing you to rename, move, and copy documents outside of KOReader without sdr folder clutter while keeping them linked to their metadata. However, any file modifications such as writing highlights into PDFs or downloading from Calibre may change the hash, and thus lose their linked metadata. Calculating file hashes may also slow down file browser navigation. This option may suit users with multiple copies of documents across different devices and directories."), DataStorage:getDocSettingsHashDir()) +local metadata_folder_help_text = metadata_folder_help_header .. "\n" .. metadata_folder_help_doc .. "\n" .. metadata_folder_help_dir .. "\n" .. metadata_folder_help_hash + +local hash_filemod_warn = T(_([[%1 requires calculating partial file hashes of documents which may slow down file browser navigation. Any file modifications (such as embedding annotations into PDF files or downloading from Calibre) may change the partial hash, thereby losing track of any highlights, bookmarks, and progress data. Embedding PDF annotations is currently set to "%s" and can be disabled at (⚙ → Document → Save Document (write highlights into PDF)).]]), DataStorage:getDocSettingsHashDir()) +local leaving_hash_sdr_warn = T(_("Warning: You currently have documents with hash-based metadata. Until this metadata is moved by opening those documents, or deleted, file browser navigation may remain slower.")) +local hash_metadata_file_list_header = T(_([[ +Hash-based metadata has been saved in %1 for the following documents. Hash-based storage may slow down file browser navigation in large directories. Thus, if not using hash-based metadata storage, it is recommended to open the associated documents in KOReader to automatically migrate their metadata to the preferred storage location, or to delete %1, which will speed up file browser navigation. +]]), DataStorage:getDocSettingsHashDir()) local function genMetadataFolderMenuItem(value) return { @@ -557,8 +567,23 @@ local function genMetadataFolderMenuItem(value) return G_reader_settings:readSetting("document_metadata_folder") == value end, callback = function() - G_reader_settings:saveSetting("document_metadata_folder", value) + local old_value = G_reader_settings:readSetting("document_metadata_folder") + if value ~= old_value then + G_reader_settings:saveSetting("document_metadata_folder", value) + if value == "hash" then + DocSettings.setIsHashLocationEnabled(true) + local save_document_setting = G_reader_settings:readSetting("save_document") + UIManager:show(InfoMessage:new{ text = string.format(hash_filemod_warn, save_document_setting), icon = "notice-warning" }) + else + DocSettings.setIsHashLocationEnabled(nil) -- setting to nil will let it reset itself appropriately + if DocSettings.isHashLocationEnabled() then + UIManager:show(InfoMessage:new{ text = leaving_hash_sdr_warn, icon = "notice-warning" }) + end + end + end end, + radio = true, + separator = value == "hash", } end @@ -579,6 +604,38 @@ common_settings.document_metadata_location = { }, genMetadataFolderMenuItem("doc"), genMetadataFolderMenuItem("dir"), + genMetadataFolderMenuItem("hash"), + { -- hash-based metadata count / TextViewer + text_func = function() + local hash_text = _("Show documents with hash-based metadata") + local no_hash_text = _("No documents with hash-based metadata") + if DocSettings.isHashLocationEnabled() then + if G_reader_settings:readSetting("document_metadata_folder") ~= "hash" then + return "⚠ " .. hash_text + end + return hash_text + end + return no_hash_text + end, + keep_menu_open = true, + enabled_func = function() + return DocSettings.isHashLocationEnabled() + end, + callback = function() + local hash_file_infos = DocSettings.getHashDirSdrInfos() + local book_info_items = {} + for i, file_info in ipairs(hash_file_infos) do + table.insert(book_info_items, table.concat({"\n", i, ". ", file_info})) + end + local book_info_str = table.concat(book_info_items) + UIManager:show(require("ui/widget/textviewer"):new{ + title = T(N_("1 document with hash-based metadata", "%1 documents with hash-based metadata", #hash_file_infos), #hash_file_infos), + title_multilines = true, + justified = false, + text = hash_metadata_file_list_header .. book_info_str, + }) + end, + }, }, } @@ -615,7 +672,18 @@ common_settings.document_save = { text = _("Save document (write highlights into PDF)"), sub_item_table = { genGenericMenuEntry(_("Prompt"), "save_document", "prompt", "prompt"), -- set "save_document" to "prompt" - genGenericMenuEntry(_("Always"), "save_document", "always"), + { + text = _("Always"), + checked_func = function() + return G_reader_settings:readSetting("save_document") == "always" + end, + callback = function() + if G_reader_settings:readSetting("document_metadata_folder") == "hash" then + UIManager:show(InfoMessage:new{ text = _("Warning: Book metadata location is set to hash-based storage. Writing highlights into a PDF modifies the file which may change the partial hash, resulting in its metadata (e.g., highlights and progress) being unlinked and lost."), icon = "notice-warning" }) + end + G_reader_settings:saveSetting("save_document", "always") + end, + }, genGenericMenuEntry(_("Disable"), "save_document", "disable"), }, } diff --git a/frontend/util.lua b/frontend/util.lua index 656620888..20f6defdb 100644 --- a/frontend/util.lua +++ b/frontend/util.lua @@ -1005,6 +1005,34 @@ function util.getFormattedSize(size) return s end +--- Calculate partial digest of an open file. To the calculating mechanism itself, +-- since only PDF documents could be modified by KOReader by appending data +-- at the end of the files when highlighting, we use a non-even sampling +-- algorithm which samples with larger weight at file head and much smaller +-- weight at file tail, thus reduces the probability that appended data may change +-- the digest value. +-- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144 +-- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data +-- by highlighting in KOReader may change the digest value. +function util.partialMD5(file) + local bit = require("bit") + local md5 = require("ffi/sha2").md5 + local leftshift = bit.lshift + local step, size = 1024, 1024 + local update = md5() + for i = -1, 10 do + file:seek("set", leftshift(step, 2*i)) + local sample = file:read(size) + if sample then + update(sample) + else + break + end + end + return update() +end + + --[[-- Replaces invalid UTF-8 characters with a replacement string.