[chore] replace utf8 bytes with Unicode escape sequence

reviewable/pr10749/r5^2
poire-z 9 months ago
parent 7bff61150a
commit 626864f856

@ -677,14 +677,14 @@ function ReaderDictionary:cleanSelection(text, is_sane)
-- (example: pdf selection "quautrefois," will be cleaned to "autrefois")
--
-- Replace no-break space with regular space
text = text:gsub("\xC2\xA0", ' ') -- U+00A0 no-break space
text = text:gsub("\u{00A0}", ' ')
-- Trim any space at start or end
text = text:gsub("^%s+", "")
text = text:gsub("%s+$", "")
if not is_sane then
-- Replace extended quote (included in the general puncturation range)
-- with plain ascii quote (for french words like "aujourdhui")
text = text:gsub("\xE2\x80\x99", "'") -- U+2019 (right single quotation mark)
text = text:gsub("\u{2019}", "'") -- Right single quotation mark
-- Strip punctuation characters around selection
text = util.stripPunctuation(text)
-- Strip some common english grammatical construct

@ -386,7 +386,7 @@ local footerTextGeneratorMap = {
book_title = function(footer)
local doc_info = footer.ui.document:getProps()
if doc_info and doc_info.title then
local title = doc_info.title:gsub(" ", "\xC2\xA0") -- replace space with no-break-space
local title = doc_info.title:gsub(" ", "\u{00A0}") -- replace space with no-break-space
local title_widget = TextWidget:new{
text = title,
max_width = footer._saved_screen_width * footer.settings.book_title_max_width_pct * (1/100),
@ -406,7 +406,7 @@ local footerTextGeneratorMap = {
book_chapter = function(footer)
local chapter_title = footer.ui.toc:getTocTitleByPage(footer.pageno)
if chapter_title and chapter_title ~= "" then
chapter_title = chapter_title:gsub(" ", "\xC2\xA0") -- replace space with no-break-space
chapter_title = chapter_title:gsub(" ", "\u{00A0}") -- replace space with no-break-space
local chapter_widget = TextWidget:new{
text = chapter_title,
max_width = footer._saved_screen_width * footer.settings.book_chapter_max_width_pct * (1/100),
@ -2037,7 +2037,7 @@ function ReaderFooter:genAllFooterText()
if self.settings.item_prefix == "compact_items" then
-- remove whitespace from footer items if symbol_type is compact_items
-- use a hair-space to avoid issues with RTL display
text = text:gsub("%s", "\xE2\x80\x8A")
text = text:gsub("%s", "\u{200A}")
end
-- if generator request a merge of this item, add it directly,
-- i.e. no separator before and after the text then.

@ -1649,7 +1649,7 @@ function ReaderHighlight:onUnhighlight(bookmark_item)
if self.ui.paging then -- We can safely use page
-- As we may have changed spaces and hyphens handling in the extracted
-- text over the years, check text identities with them removed
local sel_text_cleaned = sel_text:gsub("[ -]", ""):gsub("\xC2\xAD", "")
local sel_text_cleaned = sel_text:gsub("[ -]", ""):gsub("\u{00AD}", "")
for index = 1, #self.view.highlight.saved[page] do
local highlight = self.view.highlight.saved[page][index]
-- pos0 are tables and can't be compared directly, except when from
@ -1657,7 +1657,7 @@ function ReaderHighlight:onUnhighlight(bookmark_item)
-- If bookmark_item provided, just check datetime
if ( (datetime == nil and highlight.pos0 == sel_pos0) or
(datetime ~= nil and highlight.datetime == datetime) ) then
if highlight.text:gsub("[ -]", ""):gsub("\xC2\xAD", "") == sel_text_cleaned then
if highlight.text:gsub("[ -]", ""):gsub("\u{00AD}", "") == sel_text_cleaned then
idx = index
break
end

@ -83,7 +83,7 @@ end
function ReaderToc:cleanUpTocTitle(title, replace_empty)
title = title:gsub("\13", "")
if replace_empty and title:match("^%s*$") then
title = "\xE2\x80\x93" -- U+2013 En-Dash
title = "\u{2013}" -- En-Dash
end
return title
end

@ -150,7 +150,7 @@ function datetime.secondsToHClock(seconds, withoutSeconds, hmsFormat, withDays,
if compact then
return T(C_("Time", "%1s"), string.format("%d", seconds))
else
return T(C_("Time", "%1m\xE2\x80\x89%2s"), "0", string.format("%d", seconds))
return T(C_("Time", "%1m\u{2009}%2s"), "0", string.format("%d", seconds)) -- use a thin space
end
else
if compact then
@ -178,13 +178,13 @@ function datetime.secondsToHClock(seconds, withoutSeconds, hmsFormat, withDays,
if hmsFormat then
time_string = time_string:gsub("0(%d)", "%1") -- delete all leading "0"s
time_string = time_string:gsub(C_("Time", "d"), C_("Time", "d") .. "\xE2\x80\x89") -- add thin space after "d"
time_string = time_string:gsub(C_("Time", "h"), C_("Time", "h") .. "\xE2\x80\x89") -- add thin space after "h"
time_string = time_string:gsub(C_("Time", "d"), C_("Time", "d") .. "\u{2009}") -- add thin space after "d"
time_string = time_string:gsub(C_("Time", "h"), C_("Time", "h") .. "\u{2009}") -- add thin space after "h"
if not withoutSeconds then
time_string = time_string:gsub(C_("Time", "m"), C_("Time", "m") .. "\xE2\x80\x89") .. C_("Time", "s") -- add thin space after "m"
time_string = time_string:gsub(C_("Time", "m"), C_("Time", "m") .. "\u{2009}") .. C_("Time", "s") -- add thin space after "m"
end
if compact then
time_string = time_string:gsub("\xE2\x80\x89", "\xE2\x80\x8A") -- replace thin space with hair space
time_string = time_string:gsub("\u{2009}", "\u{200A}") -- replace thin space with hair space
end
return time_string
else

@ -946,7 +946,7 @@ function KoptInterface:getTextFromBoxes(boxes, pos0, pos1)
-- Previous line ended with a minus.
-- Assume it's some hyphenation and discard it.
line_text = line_text:sub(1, -2)
elseif line_text:sub(-2, -1) == "\xC2\xAD" then
elseif line_text:sub(-2, -1) == "\u{00AD}" then
-- Previous line ended with a hyphen.
-- Assume it's some hyphenation and discard it.
line_text = line_text:sub(1, -3)

@ -187,14 +187,14 @@ end
-- which would be an issue and would need stripping. But as these
-- Free fonts are only used as fallback fonts, and the invisible glyphs
-- will have been found in the previous fonts, we don't need to.
local LRI = "\xE2\x81\xA6" -- U+2066 LRI / LEFT-TO-RIGHT ISOLATE
local RLI = "\xE2\x81\xA7" -- U+2067 RLI / RIGHT-TO-LEFT ISOLATE
local FSI = "\xE2\x81\xA8" -- U+2068 FSI / FIRST STRONG ISOLATE
local PDI = "\xE2\x81\xA9" -- U+2069 PDI / POP DIRECTIONAL ISOLATE
local LRI = "\u{2066}" -- LRI / LEFT-TO-RIGHT ISOLATE
local RLI = "\u{2067}" -- RLI / RIGHT-TO-LEFT ISOLATE
local FSI = "\u{2068}" -- FSI / FIRST STRONG ISOLATE
local PDI = "\u{2069}" -- PDI / POP DIRECTIONAL ISOLATE
-- Not currently needed:
-- local LRM = "\xE2\x80\x8E" -- U+200E LRM / LEFT-TO-RIGHT MARK
-- local RLM = "\xE2\x80\x8F" -- U+200F RLM / RIGHT-TO-LEFT MARK
-- local LRM = "\u{200E}" -- LRM / LEFT-TO-RIGHT MARK
-- local RLM = "\u{200F}" -- RLM / RIGHT-TO-LEFT MARK
function Bidi.ltr(text)
return string.format("%s%s%s", LRI, text, PDI)

@ -410,7 +410,7 @@ Note that your selected font size is not affected by this setting.]]),
},
name_text_hold_callback = optionsutil.showValues,
show_true_value_func = function(val) -- add "%"
return string.format("%d\xE2\x80\xAF%%", val) -- use Narrow No-Break space here
return string.format("%d\u{202F}%%", val) -- use Narrow No-Break space here
end,
},
}
@ -498,7 +498,7 @@ Note that your selected font size is not affected by this setting.]]),
name_text_hold_callback = optionsutil.showValues,
name_text_true_values = true,
show_true_value_func = function(val)
return string.format("%d\xE2\x80\xAF%%, %d\xE2\x80\xAF%%", val[1], val[2]) -- use Narrow Now-Break space here
return string.format("%d\u{202F}%%, %d\u{202F}%%", val[1], val[2]) -- use Narrow Now-Break space here
end,
},
{
@ -537,7 +537,7 @@ Note that your selected font size is not affected by this setting.]]),
name_text_hold_callback = optionsutil.showValues,
name_text_true_values = true,
show_true_value_func = function(val)
return string.format("%d\xE2\x80\xAF%%", val) -- use Narrow No-Break space here
return string.format("%d\u{202F}%%", val) -- use Narrow No-Break space here
end,
},
{

@ -82,8 +82,8 @@ function ViewHtml:_viewSelectionHTML(document, selected_text, view, with_css_fil
end
if massage_html then
-- Make some invisible chars visible
replace_in_html("\xC2\xA0", "") -- no break space: open box
replace_in_html("\xC2\xAD", "") -- soft hyphen: dot operator (smaller than middle dot ·)
replace_in_html("\u{00A0}", "\u{2423}") -- no break space: open box
replace_in_html("\u{00AD}", "\u{22C5}") -- soft hyphen: dot operator (smaller than middle dot ·)
-- Prettify inlined CSS (from <HEAD>, put in an internal
-- <body><stylesheet> element by crengine (the opening tag may
-- include some href=, or end with " ~X>" with some html_flags)

@ -201,7 +201,7 @@ function DoubleSpinWidget:update(numberpicker_left_value, numberpicker_right_val
if self.unit == "°" then
unit = self.unit
elseif self.unit ~= "" then
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here
unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
end
end
table.insert(buttons, {

@ -226,7 +226,7 @@ function NumberPickerWidget:init()
if self.unit == "°" then
unit = self.unit
elseif self.unit ~= "" then
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here
unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
end
end
self.text_value = Button:new{

@ -129,7 +129,7 @@ function SpinWidget:update(numberpicker_value, numberpicker_value_index)
if self.unit == "°" then
unit = self.unit
elseif self.unit ~= "" then
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here
unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
end
end
local value

@ -546,17 +546,17 @@ end
-- These chosen ones are available in most fonts (prettier symbols
-- exist in unicode, but are available in a few fonts only) and
-- have a quite consistent size/weight in all fonts.
local th1_sym = "\xE2\x96\x88" -- full block (big black rectangle) (never met, only for web page title?)
local th2_sym = "\xE2\x96\x89" -- big black square
local th3_sym = "\xC2\xA0\xE2\x97\xA4" -- black upper left triangle (indented, nicer)
local th4_sym = "\xE2\x97\x86" -- black diamond
local th5_sym = "\xE2\x9C\xBF" -- black florette
local th6_sym = "\xE2\x9D\x96" -- black diamond minus white x
local th1_sym = "\u{2588}" -- full block (big black rectangle) (never met, only for web page title?)
local th2_sym = "\u{2589}" -- big black square
local th3_sym = "\u{00A0}\u{25E4}" -- black upper left triangle (indented, nicer)
local th4_sym = "\u{25C6}" -- black diamond
local th5_sym = "\u{273F}" -- black florette
local th6_sym = "\u{2756}" -- black diamond minus white x
-- Others available in most fonts
-- local thX_sym = "\xE2\x9C\x9A" -- heavy greek cross
-- local thX_sym = "\xE2\x97\xA2" -- black lower right triangle
-- local thX_sym = "\xE2\x97\x89" -- fish eye
-- local thX_sym = "\xE2\x96\x97" -- quadrant lower right
-- local thX_sym = "\u{271A}" -- heavy greek cross
-- local thX_sym = "\u{25E2}" -- black lower right triangle
-- local thX_sym = "\u{25C9}" -- fish eye
-- local thX_sym = "\u{2597}" -- quadrant lower right
-- For optional prettification of the plain text full page
function Wikipedia:prettifyText(text)
@ -571,7 +571,7 @@ function Wikipedia:prettifyText(text)
text = text:gsub("==$", "==\n") -- for a </hN> at end of text to be matched by next gsub
text = text:gsub(" ===?\n+", "\n\n") -- </h2> to </h3> : empty line after
text = text:gsub(" ====+\n+", "\n") -- </h4> to </hN> : single \n, no empty line
text = text:gsub("\n\n+\xE2\x80\x94", "\n\xE2\x80\x94") -- em dash, used for quote author, make it stick to prev text
text = text:gsub("\n\n+\u{2014}", "\n\u{2014}") -- em dash, used for quote author, make it stick to prev text
text = text:gsub("\n +\n", "\n") -- trim lines full of only spaces (often seen in math formulas)
text = text:gsub("^\n*", "") -- trim new lines at start
text = text:gsub("\n*$", "") -- trim new lines at end
@ -587,17 +587,17 @@ end
-- These chosen ones are available in most fonts (prettier symbols
-- exist in unicode, but are available in a few fonts only) and
-- have a quite consistent size/weight in all fonts.
local h1_sym = "\xE2\x96\x88" -- full block (big black rectangle) (never met, only for web page title?)
local h2_sym = "\xE2\x96\x89" -- big black square
local h3_sym = "\xE2\x97\xA4" -- black upper left triangle
local h4_sym = "\xE2\x97\x86" -- black diamond
local h5_sym = "\xE2\x9C\xBF" -- black florette
local h6_sym = "\xE2\x9D\x96" -- black diamond minus white x
local h1_sym = "\u{2588}" -- full block (big black rectangle) (never met, only for web page title?)
local h2_sym = "\u{2589}" -- big black square
local h3_sym = "\u{25E4}" -- black upper left triangle
local h4_sym = "\u{25C6}" -- black diamond
local h5_sym = "\u{273F}" -- black florette
local h6_sym = "\u{2756}" -- black diamond minus white x
-- Other available ones in most fonts
-- local hXsym = "\xE2\x9C\x9A" -- heavy greek cross
-- local hXsym = "\xE2\x97\xA2" -- black lower right triangle
-- local hXsym = "\xE2\x97\x89" -- fish eye
-- local hXsym = "\xE2\x96\x97" -- quadrant lower right
-- local hXsym = "\u{271A}" -- heavy greek cross
-- local hXsym = "\u{25E2}" -- black lower right triangle
-- local hXsym = "\u{25C9}" -- fish eye
-- local hXsym = "\u{2597}" -- quadrant lower right
local ext_to_mimetype = {
png = "image/png",

@ -1093,7 +1093,7 @@ local HTML_ENTITIES_TO_UTF8 = {
{"&gt;", ">"},
{"&quot;", '"'},
{"&apos;", "'"},
{"&nbsp;", "\xC2\xA0"},
{"&nbsp;", "\u{00A0}"},
{"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end},
{"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x, 16)) end},
{"&amp;", "&"}, -- must be last

@ -168,12 +168,12 @@ function FakeCover:init()
-- But at least, make dots breakable (they wouldn't be if not
-- followed by a space), by adding to them a zero-width-space,
-- so the dots stay on the right of their preceeding word.
title = title:gsub("%.", ".\xE2\x80\x8B")
title = title:gsub("%.", ".\u{200B}")
-- Except for a last dot near end of title that might preceed
-- a file extension: we'd rather want the dot and its suffix
-- together on a last line: so, move the zero-width-space
-- before it.
title = title:gsub("%.\xE2\x80\x8B(%w%w?%w?%w?%w?)$", "\xE2\x80\x8B.%1")
title = title:gsub("%.\u{200B}(%w%w?%w?%w?%w?)$", "\u{200B}.%1")
-- These substitutions will hopefully have no impact with the following BD wrapping
end
if title then
@ -295,10 +295,10 @@ function FakeCover:init()
-- but not around underscores and dots without any space around.
-- So, append a zero-width-space to allow text wrap after them.
if title then
title = title:gsub("_", "_\xE2\x80\x8B"):gsub("%.", ".\xE2\x80\x8B")
title = title:gsub("_", "_\u{200B}"):gsub("%.", ".\u{200B}")
end
if authors then
authors = authors:gsub("_", "_\xE2\x80\x8B"):gsub("%.", ".\xE2\x80\x8B")
authors = authors:gsub("_", "_\u{200B}"):gsub("%.", ".\u{200B}")
end
else
-- Replace underscores and hyphens with spaces, to allow text wrap there.

Loading…
Cancel
Save