[chore] replace utf8 bytes with Unicode escape sequence

reviewable/pr10749/r5^2
poire-z 10 months ago
parent 7bff61150a
commit 626864f856

@ -677,14 +677,14 @@ function ReaderDictionary:cleanSelection(text, is_sane)
-- (example: pdf selection "quautrefois," will be cleaned to "autrefois") -- (example: pdf selection "quautrefois," will be cleaned to "autrefois")
-- --
-- Replace no-break space with regular space -- Replace no-break space with regular space
text = text:gsub("\xC2\xA0", ' ') -- U+00A0 no-break space text = text:gsub("\u{00A0}", ' ')
-- Trim any space at start or end -- Trim any space at start or end
text = text:gsub("^%s+", "") text = text:gsub("^%s+", "")
text = text:gsub("%s+$", "") text = text:gsub("%s+$", "")
if not is_sane then if not is_sane then
-- Replace extended quote (included in the general puncturation range) -- Replace extended quote (included in the general puncturation range)
-- with plain ascii quote (for french words like "aujourdhui") -- with plain ascii quote (for french words like "aujourdhui")
text = text:gsub("\xE2\x80\x99", "'") -- U+2019 (right single quotation mark) text = text:gsub("\u{2019}", "'") -- Right single quotation mark
-- Strip punctuation characters around selection -- Strip punctuation characters around selection
text = util.stripPunctuation(text) text = util.stripPunctuation(text)
-- Strip some common english grammatical construct -- Strip some common english grammatical construct

@ -386,7 +386,7 @@ local footerTextGeneratorMap = {
book_title = function(footer) book_title = function(footer)
local doc_info = footer.ui.document:getProps() local doc_info = footer.ui.document:getProps()
if doc_info and doc_info.title then if doc_info and doc_info.title then
local title = doc_info.title:gsub(" ", "\xC2\xA0") -- replace space with no-break-space local title = doc_info.title:gsub(" ", "\u{00A0}") -- replace space with no-break-space
local title_widget = TextWidget:new{ local title_widget = TextWidget:new{
text = title, text = title,
max_width = footer._saved_screen_width * footer.settings.book_title_max_width_pct * (1/100), max_width = footer._saved_screen_width * footer.settings.book_title_max_width_pct * (1/100),
@ -406,7 +406,7 @@ local footerTextGeneratorMap = {
book_chapter = function(footer) book_chapter = function(footer)
local chapter_title = footer.ui.toc:getTocTitleByPage(footer.pageno) local chapter_title = footer.ui.toc:getTocTitleByPage(footer.pageno)
if chapter_title and chapter_title ~= "" then if chapter_title and chapter_title ~= "" then
chapter_title = chapter_title:gsub(" ", "\xC2\xA0") -- replace space with no-break-space chapter_title = chapter_title:gsub(" ", "\u{00A0}") -- replace space with no-break-space
local chapter_widget = TextWidget:new{ local chapter_widget = TextWidget:new{
text = chapter_title, text = chapter_title,
max_width = footer._saved_screen_width * footer.settings.book_chapter_max_width_pct * (1/100), max_width = footer._saved_screen_width * footer.settings.book_chapter_max_width_pct * (1/100),
@ -2037,7 +2037,7 @@ function ReaderFooter:genAllFooterText()
if self.settings.item_prefix == "compact_items" then if self.settings.item_prefix == "compact_items" then
-- remove whitespace from footer items if symbol_type is compact_items -- remove whitespace from footer items if symbol_type is compact_items
-- use a hair-space to avoid issues with RTL display -- use a hair-space to avoid issues with RTL display
text = text:gsub("%s", "\xE2\x80\x8A") text = text:gsub("%s", "\u{200A}")
end end
-- if generator request a merge of this item, add it directly, -- if generator request a merge of this item, add it directly,
-- i.e. no separator before and after the text then. -- i.e. no separator before and after the text then.

@ -1649,7 +1649,7 @@ function ReaderHighlight:onUnhighlight(bookmark_item)
if self.ui.paging then -- We can safely use page if self.ui.paging then -- We can safely use page
-- As we may have changed spaces and hyphens handling in the extracted -- As we may have changed spaces and hyphens handling in the extracted
-- text over the years, check text identities with them removed -- text over the years, check text identities with them removed
local sel_text_cleaned = sel_text:gsub("[ -]", ""):gsub("\xC2\xAD", "") local sel_text_cleaned = sel_text:gsub("[ -]", ""):gsub("\u{00AD}", "")
for index = 1, #self.view.highlight.saved[page] do for index = 1, #self.view.highlight.saved[page] do
local highlight = self.view.highlight.saved[page][index] local highlight = self.view.highlight.saved[page][index]
-- pos0 are tables and can't be compared directly, except when from -- pos0 are tables and can't be compared directly, except when from
@ -1657,7 +1657,7 @@ function ReaderHighlight:onUnhighlight(bookmark_item)
-- If bookmark_item provided, just check datetime -- If bookmark_item provided, just check datetime
if ( (datetime == nil and highlight.pos0 == sel_pos0) or if ( (datetime == nil and highlight.pos0 == sel_pos0) or
(datetime ~= nil and highlight.datetime == datetime) ) then (datetime ~= nil and highlight.datetime == datetime) ) then
if highlight.text:gsub("[ -]", ""):gsub("\xC2\xAD", "") == sel_text_cleaned then if highlight.text:gsub("[ -]", ""):gsub("\u{00AD}", "") == sel_text_cleaned then
idx = index idx = index
break break
end end

@ -83,7 +83,7 @@ end
function ReaderToc:cleanUpTocTitle(title, replace_empty) function ReaderToc:cleanUpTocTitle(title, replace_empty)
title = title:gsub("\13", "") title = title:gsub("\13", "")
if replace_empty and title:match("^%s*$") then if replace_empty and title:match("^%s*$") then
title = "\xE2\x80\x93" -- U+2013 En-Dash title = "\u{2013}" -- En-Dash
end end
return title return title
end end

@ -150,7 +150,7 @@ function datetime.secondsToHClock(seconds, withoutSeconds, hmsFormat, withDays,
if compact then if compact then
return T(C_("Time", "%1s"), string.format("%d", seconds)) return T(C_("Time", "%1s"), string.format("%d", seconds))
else else
return T(C_("Time", "%1m\xE2\x80\x89%2s"), "0", string.format("%d", seconds)) return T(C_("Time", "%1m\u{2009}%2s"), "0", string.format("%d", seconds)) -- use a thin space
end end
else else
if compact then if compact then
@ -178,13 +178,13 @@ function datetime.secondsToHClock(seconds, withoutSeconds, hmsFormat, withDays,
if hmsFormat then if hmsFormat then
time_string = time_string:gsub("0(%d)", "%1") -- delete all leading "0"s time_string = time_string:gsub("0(%d)", "%1") -- delete all leading "0"s
time_string = time_string:gsub(C_("Time", "d"), C_("Time", "d") .. "\xE2\x80\x89") -- add thin space after "d" time_string = time_string:gsub(C_("Time", "d"), C_("Time", "d") .. "\u{2009}") -- add thin space after "d"
time_string = time_string:gsub(C_("Time", "h"), C_("Time", "h") .. "\xE2\x80\x89") -- add thin space after "h" time_string = time_string:gsub(C_("Time", "h"), C_("Time", "h") .. "\u{2009}") -- add thin space after "h"
if not withoutSeconds then if not withoutSeconds then
time_string = time_string:gsub(C_("Time", "m"), C_("Time", "m") .. "\xE2\x80\x89") .. C_("Time", "s") -- add thin space after "m" time_string = time_string:gsub(C_("Time", "m"), C_("Time", "m") .. "\u{2009}") .. C_("Time", "s") -- add thin space after "m"
end end
if compact then if compact then
time_string = time_string:gsub("\xE2\x80\x89", "\xE2\x80\x8A") -- replace thin space with hair space time_string = time_string:gsub("\u{2009}", "\u{200A}") -- replace thin space with hair space
end end
return time_string return time_string
else else

@ -946,7 +946,7 @@ function KoptInterface:getTextFromBoxes(boxes, pos0, pos1)
-- Previous line ended with a minus. -- Previous line ended with a minus.
-- Assume it's some hyphenation and discard it. -- Assume it's some hyphenation and discard it.
line_text = line_text:sub(1, -2) line_text = line_text:sub(1, -2)
elseif line_text:sub(-2, -1) == "\xC2\xAD" then elseif line_text:sub(-2, -1) == "\u{00AD}" then
-- Previous line ended with a hyphen. -- Previous line ended with a hyphen.
-- Assume it's some hyphenation and discard it. -- Assume it's some hyphenation and discard it.
line_text = line_text:sub(1, -3) line_text = line_text:sub(1, -3)

@ -187,14 +187,14 @@ end
-- which would be an issue and would need stripping. But as these -- which would be an issue and would need stripping. But as these
-- Free fonts are only used as fallback fonts, and the invisible glyphs -- Free fonts are only used as fallback fonts, and the invisible glyphs
-- will have been found in the previous fonts, we don't need to. -- will have been found in the previous fonts, we don't need to.
local LRI = "\xE2\x81\xA6" -- U+2066 LRI / LEFT-TO-RIGHT ISOLATE local LRI = "\u{2066}" -- LRI / LEFT-TO-RIGHT ISOLATE
local RLI = "\xE2\x81\xA7" -- U+2067 RLI / RIGHT-TO-LEFT ISOLATE local RLI = "\u{2067}" -- RLI / RIGHT-TO-LEFT ISOLATE
local FSI = "\xE2\x81\xA8" -- U+2068 FSI / FIRST STRONG ISOLATE local FSI = "\u{2068}" -- FSI / FIRST STRONG ISOLATE
local PDI = "\xE2\x81\xA9" -- U+2069 PDI / POP DIRECTIONAL ISOLATE local PDI = "\u{2069}" -- PDI / POP DIRECTIONAL ISOLATE
-- Not currently needed: -- Not currently needed:
-- local LRM = "\xE2\x80\x8E" -- U+200E LRM / LEFT-TO-RIGHT MARK -- local LRM = "\u{200E}" -- LRM / LEFT-TO-RIGHT MARK
-- local RLM = "\xE2\x80\x8F" -- U+200F RLM / RIGHT-TO-LEFT MARK -- local RLM = "\u{200F}" -- RLM / RIGHT-TO-LEFT MARK
function Bidi.ltr(text) function Bidi.ltr(text)
return string.format("%s%s%s", LRI, text, PDI) return string.format("%s%s%s", LRI, text, PDI)

@ -410,7 +410,7 @@ Note that your selected font size is not affected by this setting.]]),
}, },
name_text_hold_callback = optionsutil.showValues, name_text_hold_callback = optionsutil.showValues,
show_true_value_func = function(val) -- add "%" show_true_value_func = function(val) -- add "%"
return string.format("%d\xE2\x80\xAF%%", val) -- use Narrow No-Break space here return string.format("%d\u{202F}%%", val) -- use Narrow No-Break space here
end, end,
}, },
} }
@ -498,7 +498,7 @@ Note that your selected font size is not affected by this setting.]]),
name_text_hold_callback = optionsutil.showValues, name_text_hold_callback = optionsutil.showValues,
name_text_true_values = true, name_text_true_values = true,
show_true_value_func = function(val) show_true_value_func = function(val)
return string.format("%d\xE2\x80\xAF%%, %d\xE2\x80\xAF%%", val[1], val[2]) -- use Narrow Now-Break space here return string.format("%d\u{202F}%%, %d\u{202F}%%", val[1], val[2]) -- use Narrow Now-Break space here
end, end,
}, },
{ {
@ -537,7 +537,7 @@ Note that your selected font size is not affected by this setting.]]),
name_text_hold_callback = optionsutil.showValues, name_text_hold_callback = optionsutil.showValues,
name_text_true_values = true, name_text_true_values = true,
show_true_value_func = function(val) show_true_value_func = function(val)
return string.format("%d\xE2\x80\xAF%%", val) -- use Narrow No-Break space here return string.format("%d\u{202F}%%", val) -- use Narrow No-Break space here
end, end,
}, },
{ {

@ -82,8 +82,8 @@ function ViewHtml:_viewSelectionHTML(document, selected_text, view, with_css_fil
end end
if massage_html then if massage_html then
-- Make some invisible chars visible -- Make some invisible chars visible
replace_in_html("\xC2\xA0", "") -- no break space: open box replace_in_html("\u{00A0}", "\u{2423}") -- no break space: open box
replace_in_html("\xC2\xAD", "") -- soft hyphen: dot operator (smaller than middle dot ·) replace_in_html("\u{00AD}", "\u{22C5}") -- soft hyphen: dot operator (smaller than middle dot ·)
-- Prettify inlined CSS (from <HEAD>, put in an internal -- Prettify inlined CSS (from <HEAD>, put in an internal
-- <body><stylesheet> element by crengine (the opening tag may -- <body><stylesheet> element by crengine (the opening tag may
-- include some href=, or end with " ~X>" with some html_flags) -- include some href=, or end with " ~X>" with some html_flags)

@ -201,7 +201,7 @@ function DoubleSpinWidget:update(numberpicker_left_value, numberpicker_right_val
if self.unit == "°" then if self.unit == "°" then
unit = self.unit unit = self.unit
elseif self.unit ~= "" then elseif self.unit ~= "" then
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
end end
end end
table.insert(buttons, { table.insert(buttons, {

@ -226,7 +226,7 @@ function NumberPickerWidget:init()
if self.unit == "°" then if self.unit == "°" then
unit = self.unit unit = self.unit
elseif self.unit ~= "" then elseif self.unit ~= "" then
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
end end
end end
self.text_value = Button:new{ self.text_value = Button:new{

@ -129,7 +129,7 @@ function SpinWidget:update(numberpicker_value, numberpicker_value_index)
if self.unit == "°" then if self.unit == "°" then
unit = self.unit unit = self.unit
elseif self.unit ~= "" then elseif self.unit ~= "" then
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
end end
end end
local value local value

@ -546,17 +546,17 @@ end
-- These chosen ones are available in most fonts (prettier symbols -- These chosen ones are available in most fonts (prettier symbols
-- exist in unicode, but are available in a few fonts only) and -- exist in unicode, but are available in a few fonts only) and
-- have a quite consistent size/weight in all fonts. -- have a quite consistent size/weight in all fonts.
local th1_sym = "\xE2\x96\x88" -- full block (big black rectangle) (never met, only for web page title?) local th1_sym = "\u{2588}" -- full block (big black rectangle) (never met, only for web page title?)
local th2_sym = "\xE2\x96\x89" -- big black square local th2_sym = "\u{2589}" -- big black square
local th3_sym = "\xC2\xA0\xE2\x97\xA4" -- black upper left triangle (indented, nicer) local th3_sym = "\u{00A0}\u{25E4}" -- black upper left triangle (indented, nicer)
local th4_sym = "\xE2\x97\x86" -- black diamond local th4_sym = "\u{25C6}" -- black diamond
local th5_sym = "\xE2\x9C\xBF" -- black florette local th5_sym = "\u{273F}" -- black florette
local th6_sym = "\xE2\x9D\x96" -- black diamond minus white x local th6_sym = "\u{2756}" -- black diamond minus white x
-- Others available in most fonts -- Others available in most fonts
-- local thX_sym = "\xE2\x9C\x9A" -- heavy greek cross -- local thX_sym = "\u{271A}" -- heavy greek cross
-- local thX_sym = "\xE2\x97\xA2" -- black lower right triangle -- local thX_sym = "\u{25E2}" -- black lower right triangle
-- local thX_sym = "\xE2\x97\x89" -- fish eye -- local thX_sym = "\u{25C9}" -- fish eye
-- local thX_sym = "\xE2\x96\x97" -- quadrant lower right -- local thX_sym = "\u{2597}" -- quadrant lower right
-- For optional prettification of the plain text full page -- For optional prettification of the plain text full page
function Wikipedia:prettifyText(text) function Wikipedia:prettifyText(text)
@ -571,7 +571,7 @@ function Wikipedia:prettifyText(text)
text = text:gsub("==$", "==\n") -- for a </hN> at end of text to be matched by next gsub text = text:gsub("==$", "==\n") -- for a </hN> at end of text to be matched by next gsub
text = text:gsub(" ===?\n+", "\n\n") -- </h2> to </h3> : empty line after text = text:gsub(" ===?\n+", "\n\n") -- </h2> to </h3> : empty line after
text = text:gsub(" ====+\n+", "\n") -- </h4> to </hN> : single \n, no empty line text = text:gsub(" ====+\n+", "\n") -- </h4> to </hN> : single \n, no empty line
text = text:gsub("\n\n+\xE2\x80\x94", "\n\xE2\x80\x94") -- em dash, used for quote author, make it stick to prev text text = text:gsub("\n\n+\u{2014}", "\n\u{2014}") -- em dash, used for quote author, make it stick to prev text
text = text:gsub("\n +\n", "\n") -- trim lines full of only spaces (often seen in math formulas) text = text:gsub("\n +\n", "\n") -- trim lines full of only spaces (often seen in math formulas)
text = text:gsub("^\n*", "") -- trim new lines at start text = text:gsub("^\n*", "") -- trim new lines at start
text = text:gsub("\n*$", "") -- trim new lines at end text = text:gsub("\n*$", "") -- trim new lines at end
@ -587,17 +587,17 @@ end
-- These chosen ones are available in most fonts (prettier symbols -- These chosen ones are available in most fonts (prettier symbols
-- exist in unicode, but are available in a few fonts only) and -- exist in unicode, but are available in a few fonts only) and
-- have a quite consistent size/weight in all fonts. -- have a quite consistent size/weight in all fonts.
local h1_sym = "\xE2\x96\x88" -- full block (big black rectangle) (never met, only for web page title?) local h1_sym = "\u{2588}" -- full block (big black rectangle) (never met, only for web page title?)
local h2_sym = "\xE2\x96\x89" -- big black square local h2_sym = "\u{2589}" -- big black square
local h3_sym = "\xE2\x97\xA4" -- black upper left triangle local h3_sym = "\u{25E4}" -- black upper left triangle
local h4_sym = "\xE2\x97\x86" -- black diamond local h4_sym = "\u{25C6}" -- black diamond
local h5_sym = "\xE2\x9C\xBF" -- black florette local h5_sym = "\u{273F}" -- black florette
local h6_sym = "\xE2\x9D\x96" -- black diamond minus white x local h6_sym = "\u{2756}" -- black diamond minus white x
-- Other available ones in most fonts -- Other available ones in most fonts
-- local hXsym = "\xE2\x9C\x9A" -- heavy greek cross -- local hXsym = "\u{271A}" -- heavy greek cross
-- local hXsym = "\xE2\x97\xA2" -- black lower right triangle -- local hXsym = "\u{25E2}" -- black lower right triangle
-- local hXsym = "\xE2\x97\x89" -- fish eye -- local hXsym = "\u{25C9}" -- fish eye
-- local hXsym = "\xE2\x96\x97" -- quadrant lower right -- local hXsym = "\u{2597}" -- quadrant lower right
local ext_to_mimetype = { local ext_to_mimetype = {
png = "image/png", png = "image/png",

@ -1093,7 +1093,7 @@ local HTML_ENTITIES_TO_UTF8 = {
{"&gt;", ">"}, {"&gt;", ">"},
{"&quot;", '"'}, {"&quot;", '"'},
{"&apos;", "'"}, {"&apos;", "'"},
{"&nbsp;", "\xC2\xA0"}, {"&nbsp;", "\u{00A0}"},
{"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end}, {"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end},
{"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x, 16)) end}, {"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x, 16)) end},
{"&amp;", "&"}, -- must be last {"&amp;", "&"}, -- must be last

@ -168,12 +168,12 @@ function FakeCover:init()
-- But at least, make dots breakable (they wouldn't be if not -- But at least, make dots breakable (they wouldn't be if not
-- followed by a space), by adding to them a zero-width-space, -- followed by a space), by adding to them a zero-width-space,
-- so the dots stay on the right of their preceeding word. -- so the dots stay on the right of their preceeding word.
title = title:gsub("%.", ".\xE2\x80\x8B") title = title:gsub("%.", ".\u{200B}")
-- Except for a last dot near end of title that might preceed -- Except for a last dot near end of title that might preceed
-- a file extension: we'd rather want the dot and its suffix -- a file extension: we'd rather want the dot and its suffix
-- together on a last line: so, move the zero-width-space -- together on a last line: so, move the zero-width-space
-- before it. -- before it.
title = title:gsub("%.\xE2\x80\x8B(%w%w?%w?%w?%w?)$", "\xE2\x80\x8B.%1") title = title:gsub("%.\u{200B}(%w%w?%w?%w?%w?)$", "\u{200B}.%1")
-- These substitutions will hopefully have no impact with the following BD wrapping -- These substitutions will hopefully have no impact with the following BD wrapping
end end
if title then if title then
@ -295,10 +295,10 @@ function FakeCover:init()
-- but not around underscores and dots without any space around. -- but not around underscores and dots without any space around.
-- So, append a zero-width-space to allow text wrap after them. -- So, append a zero-width-space to allow text wrap after them.
if title then if title then
title = title:gsub("_", "_\xE2\x80\x8B"):gsub("%.", ".\xE2\x80\x8B") title = title:gsub("_", "_\u{200B}"):gsub("%.", ".\u{200B}")
end end
if authors then if authors then
authors = authors:gsub("_", "_\xE2\x80\x8B"):gsub("%.", ".\xE2\x80\x8B") authors = authors:gsub("_", "_\u{200B}"):gsub("%.", ".\u{200B}")
end end
else else
-- Replace underscores and hyphens with spaces, to allow text wrap there. -- Replace underscores and hyphens with spaces, to allow text wrap there.

Loading…
Cancel
Save