diff --git a/frontend/ui/widget/textboxwidget.lua b/frontend/ui/widget/textboxwidget.lua index b7bfd251d..8adcc40d3 100644 --- a/frontend/ui/widget/textboxwidget.lua +++ b/frontend/ui/widget/textboxwidget.lua @@ -3,8 +3,7 @@ local Widget = require("ui/widget/widget") local RenderText = require("ui/rendertext") local Screen = require("device").screen local Geom = require("ui/geometry") - --- TODO: rename string:gsplit definition +local util = require("util") --[[ A TextWidget that handles long text wrapping @@ -64,45 +63,6 @@ function TextBoxWidget:_wrapGreedyAlg(h_list) return v_list end ---[[ -Lua doesn't have a string.split() function and most of the time -you don't really need it because string.gmatch() is enough. -However string.gmatch() has one significant disadvantage for me: -You can't split a string while matching both the delimited -strings and the delimiters themselves without tracking positions -and substrings. The string.gsplit() function below takes care of -this problem. -Author: Peter Odding -License: MIT/X11 -Source: http://snippets.luacode.org/snippets/String_splitting_130 ---]] -function string:gsplit(pattern, capture) - pattern = pattern and tostring(pattern) or '%s+' - if (''):find(pattern) then - error('pattern matches empty string!', 2) - end - return coroutine.wrap(function() - local index = 1 - repeat - local first, last = self:find(pattern, index) - if first and last then - if index < first then - coroutine.yield(self:sub(index, first - 1)) - end - if capture then - coroutine.yield(self:sub(first, last)) - end - index = last + 1 - else - if index <= #self then - coroutine.yield(self:sub(index)) - end - break - end - until index > #self - end) -end - function TextBoxWidget:_getVerticalList(alg) if self.vertical_list then return self.vertical_list @@ -110,10 +70,10 @@ function TextBoxWidget:_getVerticalList(alg) -- build horizontal list local h_list = {} local line_count = 0 - for line in self.text:gsplit("\n", true) do + for line in util.gsplit(self.text, "\n", true) do for words in line:gmatch("[\32-\127\192-\255]+[\128-\191]*") do - for word in words:gsplit("%s+", true) do - for w in word:gsplit("%p+", true) do + for word in util.gsplit(words, "%s+", true) do + for w in util.gsplit(word, "%p+", true) do local word_box = {} word_box.word = w word_box.width = RenderText:sizeUtf8Text(0, Screen:getWidth(), self.face, w, true, self.bold).x diff --git a/frontend/util.lua b/frontend/util.lua index ff1fbe4e1..5eff42fef 100644 --- a/frontend/util.lua +++ b/frontend/util.lua @@ -8,4 +8,43 @@ function util.stripePunctuations(word) return word:gsub("\226[\128-\131][\128-\191]",''):gsub("^%p+",''):gsub("%p+$",'') end +--[[ +Lua doesn't have a string.split() function and most of the time +you don't really need it because string.gmatch() is enough. +However string.gmatch() has one significant disadvantage for me: +You can't split a string while matching both the delimited +strings and the delimiters themselves without tracking positions +and substrings. The gsplit function below takes care of +this problem. +Author: Peter Odding +License: MIT/X11 +Source: http://snippets.luacode.org/snippets/String_splitting_130 +--]] +function util.gsplit(str, pattern, capture) + pattern = pattern and tostring(pattern) or '%s+' + if (''):find(pattern) then + error('pattern matches empty string!', 2) + end + return coroutine.wrap(function() + local index = 1 + repeat + local first, last = str:find(pattern, index) + if first and last then + if index < first then + coroutine.yield(str:sub(index, first - 1)) + end + if capture then + coroutine.yield(str:sub(first, last)) + end + index = last + 1 + else + if index <= #str then + coroutine.yield(str:sub(index)) + end + break + end + until index > #str + end) +end + return util diff --git a/spec/unit/util_spec.lua b/spec/unit/util_spec.lua new file mode 100644 index 000000000..681d251f5 --- /dev/null +++ b/spec/unit/util_spec.lua @@ -0,0 +1,36 @@ +require("commonrequire") + +local util = require("util") + +describe("util module", function() + it("should strip punctuations around word", function() + assert.is_equal(util.stripePunctuations("\"hello world\""), "hello world") + assert.is_equal(util.stripePunctuations("\"hello world?\""), "hello world") + assert.is_equal(util.stripePunctuations("\"hello, world?\""), "hello, world") + assert.is_equal(util.stripePunctuations("“你好“"), "你好") + assert.is_equal(util.stripePunctuations("“你好?“"), "你好") + end) + it("should split string with patterns", function() + local sentence = "Hello world, welcome to KoReader!" + local words = {} + for word in util.gsplit(sentence, "%s+", false) do + table.insert(words, word) + end + assert.are_same(words, {"Hello", "world,", "welcome", "to", "KoReader!"}) + end) + it("should split command line arguments with quotation", function() + local command = "./sdcv -nj \"words\" \"a lot\" 'more or less' --data-dir=dict" + local argv = {} + for arg1 in util.gsplit(command, "[\"'].-[\"']", true) do + for arg2 in util.gsplit(arg1, "^[^\"'].-%s+", true) do + for arg3 in util.gsplit(arg2, "[\"']", false) do + local trimed = arg3:gsub("^%s*(.-)%s*$", "%1") + if trimed ~= "" then + table.insert(argv, trimed) + end + end + end + end + assert.are_same(argv, {"./sdcv", "-nj", "words", "a lot", "more or less", "--data-dir=dict"}) + end) +end)