From 8815cbe07aa30024f74e9b744f67684661ef0ce8 Mon Sep 17 00:00:00 2001 From: Frans de Jonge Date: Tue, 14 May 2019 19:10:41 +0200 Subject: [PATCH] [fix, chore] Abstract filename logic in util.getSafeFilename() (#5026) Fixes https://github.com/koreader/koreader/issues/5025 The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved): https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491 --- frontend/apps/reader/modules/readerlink.lua | 2 +- frontend/ui/widget/dictquicklookup.lua | 2 +- frontend/ui/widget/opdsbrowser.lua | 11 +----- frontend/util.lua | 44 ++++++++++++++++++++- plugins/newsdownloader.koplugin/main.lua | 6 +-- plugins/wallabag.koplugin/main.lua | 16 ++++---- 6 files changed, 57 insertions(+), 24 deletions(-) diff --git a/frontend/apps/reader/modules/readerlink.lua b/frontend/apps/reader/modules/readerlink.lua index 6cabdf916..97113d673 100644 --- a/frontend/apps/reader/modules/readerlink.lua +++ b/frontend/apps/reader/modules/readerlink.lua @@ -616,7 +616,7 @@ function ReaderLink:onGoToExternalLink(link_url) -- wikipedia page saved as epub, full of wikipedia links, it's -- too easy to click on links when wanting to change page...) -- But first check if this wikipedia article has been saved as EPUB - local epub_filename = util.replaceInvalidChars(wiki_page:gsub("_", " ")) .. "."..string.upper(wiki_lang)..".epub" + local epub_filename = util.getSafeFilename(wiki_page:gsub("_", " ")) .. "."..string.upper(wiki_lang)..".epub" local epub_fullpath -- either in current book directory local last_file = G_reader_settings:readSetting("lastfile") diff --git a/frontend/ui/widget/dictquicklookup.lua b/frontend/ui/widget/dictquicklookup.lua index 5b09a1950..376508a19 100644 --- a/frontend/ui/widget/dictquicklookup.lua +++ b/frontend/ui/widget/dictquicklookup.lua @@ -329,7 +329,7 @@ function DictQuickLookup:update() local lang = self.lang or self.wiki_languages_copy[1] -- Just to be safe (none of the invalid chars, except ':' for uninteresting -- Portal: or File: wikipedia pages, should be in lookup_word) - local cleaned_lookupword = util.replaceInvalidChars(self.lookupword:gsub("_", " ")) + local cleaned_lookupword = util.getSafeFilename(self.lookupword:gsub("_", " ")) local filename = cleaned_lookupword .. "."..string.upper(lang)..".epub" -- Find a directory to save file into local dir diff --git a/frontend/ui/widget/opdsbrowser.lua b/frontend/ui/widget/opdsbrowser.lua index 5de19ac09..c8cd3f75c 100644 --- a/frontend/ui/widget/opdsbrowser.lua +++ b/frontend/ui/widget/opdsbrowser.lua @@ -479,15 +479,8 @@ end function OPDSBrowser:downloadFile(item, format, remote_url) -- download to user selected directory or last opened dir local download_dir = self.getCurrentDownloadDir() - local file_system = util.getFilesystemType(download_dir) - if file_system == "vfat" or file_system == "fuse.fsp" then - item.author = util.replaceInvalidChars(item.author) - item.title = util.replaceInvalidChars(item.title) - else - item.author = util.replaceSlashChar(item.author) - item.title = util.replaceSlashChar(item.title) - end - local local_path = download_dir .. "/" .. item.author .. ' - ' .. item.title .. "." .. string.lower(format) + local filename = util.getSafeFilename(item.author .. " - " .. item.title .. "." .. string.lower(format), download_dir) + local local_path = download_dir .. "/" .. filename local_path = util.fixUtf8(local_path, "_") local function download() diff --git a/frontend/util.lua b/frontend/util.lua index d97698cfe..3fd9543c6 100644 --- a/frontend/util.lua +++ b/frontend/util.lua @@ -453,7 +453,7 @@ end -- / poses a problem. ---- @string str filename ---- @treturn string sanitized filename -function util.replaceInvalidChars(str) +local function replaceAllInvalidChars(str) if str then return str:gsub('[\\,%/,:,%*,%?,%",%<,%>,%|]','_') end @@ -462,12 +462,52 @@ end --- Replaces slash with an underscore. ---- @string str ---- @treturn string -function util.replaceSlashChar(str) +local function replaceSlashChar(str) if str then return str:gsub('%/','_') end end +--- Replaces characters that are invalid filenames. +-- +-- Replaces the characters \/:*?"<>| with an _ +-- unless an optional path is provided. +-- These characters are problematic on Windows filesystems. On Linux only +-- / poses a problem. +-- If an optional path is provided, util.getFilesystemType() will be used +-- to determine whether stricter VFAT restrictions should be applied. +---- @string str +---- @string path +---- @int limit +---- @treturn string +function util.getSafeFilename(str, path, limit) + local filename, suffix = util.splitFileNameSuffix(str) + local replaceFunc = replaceSlashChar + local safe_filename + -- VFAT supports a maximum of 255 UCS-2 characters, although it's probably treated as UTF-16 by Windows + -- default to a slightly lower limit just in case + limit = limit or 240 + + if path then + local file_system = util.getFilesystemType(path) + if file_system == "vfat" or file_system == "fuse.fsp" then + replaceFunc = replaceAllInvalidChars + end + end + + filename = filename:sub(1, limit) + -- the limit might result in broken UTF-8, which we don't want in the result + filename = util.fixUtf8(filename, "") + + if suffix and suffix ~= "" then + safe_filename = replaceFunc(filename) .. "." .. replaceFunc(suffix) + else + safe_filename = replaceFunc(filename) + end + + return safe_filename +end + --- Splits a file into its directory path and file name. --- If the given path has a trailing /, returns the entire path as the directory --- path and "" as the file name. diff --git a/plugins/newsdownloader.koplugin/main.lua b/plugins/newsdownloader.koplugin/main.lua index 347596074..ce7d1d67e 100644 --- a/plugins/newsdownloader.koplugin/main.lua +++ b/plugins/newsdownloader.koplugin/main.lua @@ -262,7 +262,7 @@ end function NewsDownloader:processAtom(feeds, limit, download_full_article, include_images) local feed_output_dir = string.format("%s%s/", news_download_dir_path, - util.replaceInvalidChars(getFeedTitle(feeds.feed.title))) + util.getSafeFilename(getFeedTitle(feeds.feed.title))) if not lfs.attributes(feed_output_dir, "mode") then lfs.mkdir(feed_output_dir) end @@ -281,7 +281,7 @@ end function NewsDownloader:processRSS(feeds, limit, download_full_article, include_images) local feed_output_dir = ("%s%s/"):format( - news_download_dir_path, util.replaceInvalidChars(util.htmlEntitiesToUtf8(feeds.rss.channel.title))) + news_download_dir_path, util.getSafeFilename(util.htmlEntitiesToUtf8(feeds.rss.channel.title))) if not lfs.attributes(feed_output_dir, "mode") then lfs.mkdir(feed_output_dir) end @@ -307,7 +307,7 @@ local function parseDate(dateTime) end local function getTitleWithDate(feed) - local title = util.replaceInvalidChars(getFeedTitle(feed.title)) + local title = util.getSafeFilename(getFeedTitle(feed.title)) if feed.updated then title = parseDate(feed.updated) .. title elseif feed.pubDate then diff --git a/plugins/wallabag.koplugin/main.lua b/plugins/wallabag.koplugin/main.lua index a6aeee68b..ffe3bea0d 100644 --- a/plugins/wallabag.koplugin/main.lua +++ b/plugins/wallabag.koplugin/main.lua @@ -24,7 +24,7 @@ local T = FFIUtil.template local Screen = require("device").screen -- constants -local article_id_preffix = "[w-id_" +local article_id_prefix = "[w-id_" local article_id_postfix = "] " local failed, skipped, downloaded = 1, 2, 3 @@ -297,8 +297,8 @@ end function Wallabag:download(article) local skip_article = false local item_url = "/api/entries/" .. article.id .. "/export.epub" - local title = util.replaceInvalidChars(article.title) - local local_path = self.directory .. article_id_preffix .. article.id .. article_id_postfix .. title:sub(1,30) .. ".epub" + local title = util.getSafeFilename(article.title) + local local_path = self.directory .. article_id_prefix .. article.id .. article_id_postfix .. title .. ".epub" logger.dbg("Wallabag: DOWNLOAD: id: ", article.id) logger.dbg("Wallabag: DOWNLOAD: title: ", article.title) logger.dbg("Wallabag: DOWNLOAD: filename: ", local_path) @@ -571,17 +571,17 @@ end function Wallabag:getArticleID( path ) -- extract the Wallabag ID from the file name local offset = self.directory:len() + 2 -- skip / and advance to the next char - local preffix_len = article_id_preffix:len() - if path:sub( offset , offset + preffix_len - 1 ) ~= article_id_preffix then - logger.warn("Wallabag: getArticleID: no match! ", path:sub( offset , offset + preffix_len - 1 ) ) + local prefix_len = article_id_prefix:len() + if path:sub( offset , offset + prefix_len - 1 ) ~= article_id_prefix then + logger.warn("Wallabag: getArticleID: no match! ", path:sub( offset , offset + prefix_len - 1 ) ) return end - local endpos = path:find( article_id_postfix, offset + preffix_len ) + local endpos = path:find( article_id_postfix, offset + prefix_len ) if endpos == nil then logger.warn("Wallabag: getArticleID: no match! " ) return end - local id = path:sub( offset + preffix_len, endpos - 1 ) + local id = path:sub( offset + prefix_len, endpos - 1 ) return id end