diff --git a/plugins/calibre.koplugin/metadata.lua b/plugins/calibre.koplugin/metadata.lua index e441e33f3..41eb66153 100644 --- a/plugins/calibre.koplugin/metadata.lua +++ b/plugins/calibre.koplugin/metadata.lua @@ -8,34 +8,42 @@ of storing it. @module koplugin.calibre.metadata --]]-- +local lfs = require("libs/libkoreader-lfs") local rapidjson = require("rapidjson") local logger = require("logger") +local parser = require("parser") local util = require("util") -local unused_metadata = { - "application_id", - "author_link_map", - "author_sort", - "author_sort_map", - "book_producer", - "comments", - "cover", - "db_id", - "identifiers", - "languages", - "pubdate", - "publication_type", - "publisher", - "rating", - "rights", - "thumbnail", - "timestamp", - "title_sort", - "user_categories", - "user_metadata", - "_series_sort_", +local used_metadata = { + "uuid", + "lpath", + "last_modified", + "size", + "title", + "authors", + "tags", + "series", + "series_index" } +local function slim(book) + local slim_book = {} + for _, k in ipairs(used_metadata) do + if k == "series" or k == "series_index" then + slim_book[k] = book[k] or rapidjson.null + elseif k == "tags" then + slim_book[k] = book[k] or {} + else + slim_book[k] = book[k] + end + end + return slim_book +end + +-- this is the max file size we attempt to decode using json. For larger +-- files we want to attempt to manually parse the file to avoid OOM errors +local MAX_JSON_FILESIZE = 30 * 1000 * 1000 + --- find calibre files for a given dir local function findCalibreFiles(dir) local function existOrLast(file) @@ -90,12 +98,28 @@ end -- loads books' metadata from JSON file function CalibreMetadata:loadBookList() - local json, err = rapidjson.load(self.metadata) - if not json then - logger.warn("Unable to load book list from JSON file:", self.metadata, err) + local attr = lfs.attributes(self.metadata) + if not attr then + logger.warn("Unable to get file attributes from JSON file:", self.metadata) return {} end - return json + local valid = attr.mode == "file" and attr.size > 0 + if not valid then + logger.warn("File is invalid", self.metadata) + return {} + end + local books, err + if attr.size > MAX_JSON_FILESIZE then + books, err = parser.parseFile(self.metadata) + else + books, err = rapidjson.load(self.metadata) + end + if not books then + logger.warn(string.format("Unable to load library from json file %s: \n%s", + self.metadata, err)) + return {} + end + return books end -- saves books' metadata to JSON file @@ -114,11 +138,8 @@ function CalibreMetadata:saveBookList() end -- add a book to our books table -function CalibreMetadata:addBook(metadata) - for _, key in pairs(unused_metadata) do - metadata[key] = nil - end - table.insert(self.books, #self.books + 1, metadata) +function CalibreMetadata:addBook(book) + table.insert(self.books, #self.books + 1, slim(book)) end -- remove a book from our books table @@ -180,13 +201,9 @@ end -- removes unused metadata from books function CalibreMetadata:cleanUnused() - local slim_books = self.books - for index, _ in ipairs(slim_books) do - for _, key in pairs(unused_metadata) do - slim_books[index][key] = nil - end + for index, book in ipairs(self.books) do + self.books[index] = slim(book) end - self.books = slim_books self:saveBookList() end @@ -238,14 +255,17 @@ function CalibreMetadata:init(dir, is_search) return false end - local deleted_count = self:prune() - local elapsed = socket.gettime() - start - logger.info(string.format( - "calibre info loaded from disk in %f milliseconds: %d books. %d pruned", - elapsed * 1000, #self.books, deleted_count)) - if not is_search then + local msg + if is_search then + msg = string.format("(search) in %f milliseconds: %d books", + (socket.gettime() - start) * 1000, #self.books) + else + local deleted_count = self:prune() self:cleanUnused() + msg = string.format("in %f milliseconds: %d books. %d pruned", + (socket.gettime() - start) * 1000, #self.books, deleted_count) end + logger.info(string.format("calibre info loaded from disk %s", msg)) return true end diff --git a/plugins/calibre.koplugin/parser.lua b/plugins/calibre.koplugin/parser.lua new file mode 100644 index 000000000..405c5c65b --- /dev/null +++ b/plugins/calibre.koplugin/parser.lua @@ -0,0 +1,90 @@ +-- A parser for metadata.calibre +local util = require("util") + +-- removes leading and closing characters and converts hex-unicodes +local function replaceHexChars(s, n, j) + local l = string.len(s) + if string.sub(s, l, l) == "\"" then + s = string.sub(s, n, string.len(s)-1) + else + s = string.sub(s, n, string.len(s)-j) + end + s = string.gsub(s, "\\u([a-f0-9][a-f0-9][a-f0-9][a-f0-9])", function(w) + return util.unicodeCodepointToUtf8(tonumber(w, 16)) + end) + return s +end + +-- a couple of string helper functions for dealing with raw json strings +local function isEqual(str, key) + if str:sub(1, key:len() + 6) == string.format(" \"%s\"", key) then + return true + end + return false +end + +local function getValue(str, key) + if str == string.format(" \"%s\": null, ", key) then + return nil + else + return replaceHexChars(str, key:len() + 10, key == "series_index" and 2 or 3) + end +end + +local jsonStr = getmetatable("") +jsonStr.__index["equals"] = isEqual +jsonStr.__index["value"] = getValue + + +local parser = {} + +-- read metadata from file, line by line, and keep just the data we need +function parser.parseFile(file) + assert(type(file) == "string", "wrong type (expected a string") + local f, err = io.open(file, "rb") + if not f then + return nil, string.format("error parsing %s: %s", file, err) + end + f:close() + local add = function(t, line) + if type(t) ~= "table" or type(line) ~= "string" then + return {} + end + line = replaceHexChars(line, 8, 3) + table.insert(t, #t + 1, line) + return t + end + local books, book = {}, {} + local is_author, is_tag = false, false + for line in io.lines(file) do + if line == " }, " or line == " }" then + if type(book) == "table" then + table.insert(books, #books + 1, book) + end + book = {} + elseif line == " \"authors\": [" then + is_author = true + elseif line == " \"tags\": [" then + is_tag = true + elseif line == " ], " or line == " ]" then + is_author, is_tag = false, false + else + for _, key in ipairs({"title", "uuid", "lpath", "size", + "last_modified", "series", "series_index"}) + do + if line:equals(key) then + book[key] = line:value(key) + break + end + end + end + if is_author then + book.authors = add(book.authors, line) + elseif is_tag then + book.tags = add(book.tags, line) + end + end + return books +end + +return parser diff --git a/plugins/calibre.koplugin/search.lua b/plugins/calibre.koplugin/search.lua index 00314f588..156f6961f 100644 --- a/plugins/calibre.koplugin/search.lua +++ b/plugins/calibre.koplugin/search.lua @@ -18,6 +18,7 @@ local Screen = require("device").screen local UIManager = require("ui/uimanager") local logger = require("logger") local socket = require("socket") +local util = require("util") local _ = require("gettext") local T = require("ffi/util").template @@ -45,16 +46,8 @@ local function getAllMetadata(t) end end for _, book in ipairs(CalibreMetadata.books) do - local slim_book = {} - slim_book.title = book.title - slim_book.lpath = book.lpath - slim_book.authors = book.authors - slim_book.series = book.series - slim_book.series_index = book.series_index - slim_book.tags = book.tags - slim_book.size = book.size - slim_book.rootpath = path - table.insert(books, #books + 1, slim_book) + book.rootpath = path + table.insert(books, #books + 1, book) end CalibreMetadata:clean() end @@ -103,9 +96,11 @@ end local function searchByTag(t, query, case_insensitive) local freq = {} for _, book in ipairs(t) do - for __, tag in ipairs(book.tags) do - if match(tag, query, case_insensitive) then - freq[tag] = (freq[tag] or 0) + 1 + if type(book.tags) == "table" then + for __, tag in ipairs(book.tags) do + if match(tag, query, case_insensitive) then + freq[tag] = (freq[tag] or 0) + 1 + end end end end @@ -145,7 +140,7 @@ local function getBookInfo(book) -- all entries can be empty, except size, which is always filled by calibre. local title = _("Title:") .. " " .. book.title or "-" local authors = _("Author(s):") .. " " .. getEntries(book.authors) or "-" - local size = _("Size:") .. " " .. string.format("%4.1fM", book.size/1024/1024) + local size = _("Size:") .. " " .. util.getFriendlySize(book.size) or _("Unknown") local tags = getEntries(book.tags) if tags then tags = _("Tags:") .. " " .. tags @@ -329,7 +324,7 @@ function CalibreSearch:find(option) -- measure time elapsed searching local start = socket.gettime() if option == "find" then - local books = self:findBooks(self.books, self.search_value) + local books = self:findBooks(self.search_value) local result = self:bookCatalog(books) self:showresults(result) else @@ -346,7 +341,7 @@ function CalibreSearch:find(option) end -- find books with current search options -function CalibreSearch:findBooks(t, query) +function CalibreSearch:findBooks(query) -- handle case sensitivity local function bookMatch(s, p) if not s or not p then return false end @@ -375,7 +370,7 @@ function CalibreSearch:findBooks(t, query) end -- performs a book search local results = {} - for i, book in ipairs(t) do + for i, book in ipairs(self.books) do if bookSearch(book, query) then table.insert(results, #results + 1, book) end @@ -597,7 +592,7 @@ function CalibreSearch:getMetadata() -- try to load metadata from calibre files and dump it to cache file, if enabled. local books = getAllMetadata(self.libraries) if self.cache_metadata then - local dump = {} + local serialized_table = {} local function removeNull(t) for _, key in ipairs({"series", "series_index"}) do if type(t[key]) == "function" then @@ -607,9 +602,9 @@ function CalibreSearch:getMetadata() return t end for index, book in ipairs(books) do - table.insert(dump, index, removeNull(book)) + table.insert(serialized_table, index, removeNull(book)) end - self.cache_books:save(dump) + self.cache_books:save(serialized_table) end local elapsed = socket.gettime() - start logger.info(string.format(template, #books, "calibre", elapsed * 1000))