You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
koreader/plugins/exporter.koplugin/clip.lua

373 lines
12 KiB
Lua

local DataStorage = require("datastorage")
local DocumentRegistry = require("document/documentregistry")
local DocSettings = require("docsettings")
local ReadHistory = require("readhistory")
local logger = require("logger")
local md5 = require("ffi/sha2").md5
local util = require("util")
local _ = require("gettext")
local T = require("ffi/util").template
local MyClipping = {
my_clippings = "/mnt/us/documents/My Clippings.txt",
history_dir = DataStorage:getDataDir() .. "/history",
}
function MyClipping:new(o)
if o == nil then o = {} end
setmetatable(o, self)
self.__index = self
return o
end
--[[
-- clippings: main table to store parsed highlights and notes entries
-- {
-- ["Title(Author Name)"] = {
-- {
-- {
-- ["page"] = 123,
-- ["time"] = 1398127554,
-- ["text"] = "Games of all sorts were played in homes and fields."
-- },
-- {
-- ["page"] = 156,
-- ["time"] = 1398128287,
-- ["text"] = "There Spenser settled down to gentleman farming.",
-- ["note"] = "This is a sample note.",
-- },
-- ["title"] = "Chapter I"
-- },
-- }
-- }
-- ]]
function MyClipping:parseMyClippings()
-- My Clippings format:
-- Title(Author Name)
-- Your Highlight on Page 123 | Added on Monday, April 21, 2014 10:08:07 PM
--
-- This is a sample highlight.
-- ==========
local file = io.open(self.my_clippings, "r")
local clippings = {}
if file then
local index = 1
local title, author, info, text
for line in file:lines() do
line = line:match("^%s*(.-)%s*$") or ""
if index == 1 then
title, author = self:getTitle(line)
clippings[title] = clippings[title] or {
title = title,
author = author,
}
elseif index == 2 then
info = self:getInfo(line)
-- elseif index == 3 then
-- should be a blank line, we skip this line
elseif index == 4 then
text = self:getText(line)
end
if line == "==========" then
if index == 5 then
-- entry ends normally
local clipping = {
page = info.page or info.location,
sort = info.sort,
time = info.time,
text = text,
}
-- we cannot extract chapter info so just insert clipping
-- to a place holder chapter
table.insert(clippings[title], { clipping })
end
index = 0
end
index = index + 1
end
file:close()
end
return clippings
end
local extensions = {
[".pdf"] = true,
[".djvu"] = true,
[".epub"] = true,
[".fb2"] = true,
[".mobi"] = true,
[".txt"] = true,
[".html"] = true,
[".doc"] = true,
}
-- first attempt to parse from document metadata
-- remove file extensions added by former KOReader
-- extract author name in "Title(Author)" format
-- extract author name in "Title - Author" format
function MyClipping:getTitle(line, path)
if path then
local props = self:getProps(path)
if props and props.title ~= "" then
return props.title, props.authors or props.author
end
end
line = line:match("^%s*(.-)%s*$") or ""
if extensions[line:sub(-4):lower()] then
line = line:sub(1, -5)
elseif extensions[line:sub(-5):lower()] then
line = line:sub(1, -6)
end
local _, _, title, author = line:find("(.-)%s*%((.*)%)")
if not author then
_, _, title, author = line:find("(.-)%s*-%s*(.*)")
end
if not title then title = line end
return title:match("^%s*(.-)%s*$"), author
end
local keywords = {
["highlight"] = {
"Highlight",
"标注",
},
["note"] = {
"Note",
"笔记",
},
["bookmark"] = {
"Bookmark",
"书签",
},
}
local months = {
["Jan"] = 1,
["Feb"] = 2,
["Mar"] = 3,
["Apr"] = 4,
["May"] = 5,
["Jun"] = 6,
["Jul"] = 7,
["Aug"] = 8,
["Sep"] = 9,
["Oct"] = 10,
["Nov"] = 11,
["Dec"] = 12
}
local pms = {
["PM"] = 12,
["下午"] = 12,
}
function MyClipping:getTime(line)
if not line then return end
local _, _, year, month, day = line:find("(%d+)年(%d+)月(%d+)日")
if not year or not month or not day then
_, _, year, month, day = line:find("(%d%d%d%d)-(%d%d)-(%d%d)")
end
if not year or not month or not day then
for k, v in pairs(months) do
if line:find(k) then
month = v
_, _, day = line:find(" (%d?%d),")
_, _, year = line:find(" (%d%d%d%d)")
break
end
end
end
local _, _, hour, minute, second = line:find("(%d+):(%d+):(%d+)")
if year and month and day and hour and minute and second then
for k, v in pairs(pms) do
if line:find(k) then
hour = hour + v
break
end
end
local time = os.time({
year = year, month = month, day = day,
hour = hour, min = minute, sec = second,
})
return time
end
end
function MyClipping:getInfo(line)
local info = {}
line = line or ""
local _, _, part1, part2 = line:find("(.+)%s*|%s*(.+)")
-- find entry type and location
for sort, words in pairs(keywords) do
for _, word in ipairs(words) do
if part1 and part1:find(word) then
info.sort = sort
info.location = part1:match("(%d+-?%d+)")
break
end
end
end
-- find entry created time
info.time = self:getTime(part2 or "")
return info
end
function MyClipping:getText(line)
line = line or ""
return line:match("^%s*(.-)%s*$") or ""
end
-- get PNG string and md5 hash
function MyClipping:getImage(image)
--DEBUG("image", image)
local doc = DocumentRegistry:openDocument(image.file)
if doc then
local png = doc:clipPagePNGString(image.pos0, image.pos1,
image.pboxes, image.drawer)
--doc:clipPagePNGFile(image.pos0, image.pos1,
--image.pboxes, image.drawer, "/tmp/"..md5(png)..".png")
doc:close()
if png then return { png = png, hash = md5(png) } end
end
end
function MyClipping:parseHighlight(highlights, bookmarks, book)
--DEBUG("book", book.file)
-- create a translated pattern that matches bookmark auto-text
-- see ReaderBookmark:getBookmarkAutoText and ReaderBookmark:getBookmarkPageString
--- @todo Remove this once we get rid of auto-text or improve the data model.
local pattern = "^" .. T(_("Page %1 %2 @ %3"),
"%[?%d*%]?%d+",
"(.*)",
"%d%d%d%d%-%d%d%-%d%d %d%d:%d%d:%d%d") .. "$"
for page, items in pairs(highlights) do
for _, item in ipairs(items) do
local clipping = {}
clipping.page = page
clipping.sort = "highlight"
clipping.time = self:getTime(item.datetime or "")
clipping.text = self:getText(item.text)
clipping.chapter = item.chapter
clipping.drawer = item.drawer
for _, bookmark in pairs(bookmarks) do
if bookmark.datetime == item.datetime and bookmark.text then
local bookmark_quote = bookmark.text:match(pattern)
if bookmark_quote ~= clipping.text and bookmark.text ~= clipping.text then
-- use modified quoted text or entire bookmark text if it's not a match
clipping.note = bookmark_quote or bookmark.text
end
end
end
if item.text == "" and item.pos0 and item.pos1 and
item.pos0.x and item.pos0.y and
item.pos1.x and item.pos1.y then
-- highlights in reflowing mode don't have page in pos
if item.pos0.page == nil then item.pos0.page = page end
if item.pos1.page == nil then item.pos1.page = page end
local image = {}
image.file = book.file
image.pos0, image.pos1 = item.pos0, item.pos1
image.pboxes = item.pboxes
image.drawer = item.drawer
clipping.image = self:getImage(image)
end
--- @todo Store chapter info when exporting highlights.
if clipping.text and clipping.text ~= "" or clipping.image then
table.insert(book, { clipping })
end
end
end
table.sort(book, function(v1, v2) return v1[1].page < v2[1].page end)
end
function MyClipping:parseHistoryFile(clippings, history_file, doc_file)
if lfs.attributes(history_file, "mode") ~= "file"
or not history_file:find(".+%.lua$") then
return
end
if lfs.attributes(doc_file, "mode") ~= "file" then return end
local ok, stored = pcall(dofile, history_file)
if ok then
if not stored then
logger.warn("An empty history file ",
history_file,
"has been found. The book associated is ",
doc_file)
return
elseif not stored.highlight then
return
end
local _, docname = util.splitFilePathName(doc_file)
local title, author = self:getTitle(util.splitFileNameSuffix(docname), doc_file)
clippings[title] = {
file = doc_file,
title = title,
author = author,
}
self:parseHighlight(stored.highlight, stored.bookmarks, clippings[title])
end
end
function MyClipping:parseHistory()
local clippings = {}
for f in lfs.dir(self.history_dir) do
self:parseHistoryFile(clippings,
self.history_dir .. "/" .. f,
DocSettings:getPathFromHistory(f) .. "/" ..
DocSettings:getNameFromHistory(f))
end
for _, item in ipairs(ReadHistory.hist) do
self:parseHistoryFile(clippings,
DocSettings:getSidecarFile(item.file),
item.file)
end
return clippings
end
function MyClipping:getProps(file)
local document = DocumentRegistry:openDocument(file)
local book_props = nil
if document then
local loaded = true
if document.loadDocument then -- CreDocument
if not document:loadDocument(false) then -- load only metadata
-- failed loading, calling other methods would segfault
loaded = false
end
end
if loaded then
book_props = document:getProps()
end
document:close()
end
return book_props
end
function MyClipping:parseCurrentDoc(view)
local clippings = {}
local path = view.document.file
local _, _, docname = path:find(".*/(.*)")
local title, author = self:getTitle(docname, path)
clippings[title] = {
file = view.document.file,
title = title,
author = author,
}
self:parseHighlight(view.highlight.saved, view.ui.bookmark.bookmarks, clippings[title])
return clippings
end
return MyClipping