Refactor out string.gsplit to util.gsplit

pull/1497/head
chrox 9 years ago
parent 98515a2210
commit 932df2a2f9

@ -3,8 +3,7 @@ local Widget = require("ui/widget/widget")
local RenderText = require("ui/rendertext")
local Screen = require("device").screen
local Geom = require("ui/geometry")
-- TODO: rename string:gsplit definition
local util = require("util")
--[[
A TextWidget that handles long text wrapping
@ -64,45 +63,6 @@ function TextBoxWidget:_wrapGreedyAlg(h_list)
return v_list
end
--[[
Lua doesn't have a string.split() function and most of the time
you don't really need it because string.gmatch() is enough.
However string.gmatch() has one significant disadvantage for me:
You can't split a string while matching both the delimited
strings and the delimiters themselves without tracking positions
and substrings. The string.gsplit() function below takes care of
this problem.
Author: Peter Odding
License: MIT/X11
Source: http://snippets.luacode.org/snippets/String_splitting_130
--]]
function string:gsplit(pattern, capture)
pattern = pattern and tostring(pattern) or '%s+'
if (''):find(pattern) then
error('pattern matches empty string!', 2)
end
return coroutine.wrap(function()
local index = 1
repeat
local first, last = self:find(pattern, index)
if first and last then
if index < first then
coroutine.yield(self:sub(index, first - 1))
end
if capture then
coroutine.yield(self:sub(first, last))
end
index = last + 1
else
if index <= #self then
coroutine.yield(self:sub(index))
end
break
end
until index > #self
end)
end
function TextBoxWidget:_getVerticalList(alg)
if self.vertical_list then
return self.vertical_list
@ -110,10 +70,10 @@ function TextBoxWidget:_getVerticalList(alg)
-- build horizontal list
local h_list = {}
local line_count = 0
for line in self.text:gsplit("\n", true) do
for line in util.gsplit(self.text, "\n", true) do
for words in line:gmatch("[\32-\127\192-\255]+[\128-\191]*") do
for word in words:gsplit("%s+", true) do
for w in word:gsplit("%p+", true) do
for word in util.gsplit(words, "%s+", true) do
for w in util.gsplit(word, "%p+", true) do
local word_box = {}
word_box.word = w
word_box.width = RenderText:sizeUtf8Text(0, Screen:getWidth(), self.face, w, true, self.bold).x

@ -8,4 +8,43 @@ function util.stripePunctuations(word)
return word:gsub("\226[\128-\131][\128-\191]",''):gsub("^%p+",''):gsub("%p+$",'')
end
--[[
Lua doesn't have a string.split() function and most of the time
you don't really need it because string.gmatch() is enough.
However string.gmatch() has one significant disadvantage for me:
You can't split a string while matching both the delimited
strings and the delimiters themselves without tracking positions
and substrings. The gsplit function below takes care of
this problem.
Author: Peter Odding
License: MIT/X11
Source: http://snippets.luacode.org/snippets/String_splitting_130
--]]
function util.gsplit(str, pattern, capture)
pattern = pattern and tostring(pattern) or '%s+'
if (''):find(pattern) then
error('pattern matches empty string!', 2)
end
return coroutine.wrap(function()
local index = 1
repeat
local first, last = str:find(pattern, index)
if first and last then
if index < first then
coroutine.yield(str:sub(index, first - 1))
end
if capture then
coroutine.yield(str:sub(first, last))
end
index = last + 1
else
if index <= #str then
coroutine.yield(str:sub(index))
end
break
end
until index > #str
end)
end
return util

@ -0,0 +1,36 @@
require("commonrequire")
local util = require("util")
describe("util module", function()
it("should strip punctuations around word", function()
assert.is_equal(util.stripePunctuations("\"hello world\""), "hello world")
assert.is_equal(util.stripePunctuations("\"hello world?\""), "hello world")
assert.is_equal(util.stripePunctuations("\"hello, world?\""), "hello, world")
assert.is_equal(util.stripePunctuations("“你好“"), "你好")
assert.is_equal(util.stripePunctuations("“你好?“"), "你好")
end)
it("should split string with patterns", function()
local sentence = "Hello world, welcome to KoReader!"
local words = {}
for word in util.gsplit(sentence, "%s+", false) do
table.insert(words, word)
end
assert.are_same(words, {"Hello", "world,", "welcome", "to", "KoReader!"})
end)
it("should split command line arguments with quotation", function()
local command = "./sdcv -nj \"words\" \"a lot\" 'more or less' --data-dir=dict"
local argv = {}
for arg1 in util.gsplit(command, "[\"'].-[\"']", true) do
for arg2 in util.gsplit(arg1, "^[^\"'].-%s+", true) do
for arg3 in util.gsplit(arg2, "[\"']", false) do
local trimed = arg3:gsub("^%s*(.-)%s*$", "%1")
if trimed ~= "" then
table.insert(argv, trimed)
end
end
end
end
assert.are_same(argv, {"./sdcv", "-nj", "words", "a lot", "more or less", "--data-dir=dict"})
end)
end)
Loading…
Cancel
Save