diff --git a/frontend/util.lua b/frontend/util.lua index c9ebae5e0..2272e9feb 100644 --- a/frontend/util.lua +++ b/frontend/util.lua @@ -117,6 +117,16 @@ function util.splitToChars(text) return tab end +-- Test whether c is a CJK character +function util.isCJKChar(c) + return string.match(c, "[\228-\234][\128-\191].") == c +end + +-- Test whether str contains CJK characters +function util.hasCJKChar(str) + return string.match(str, "[\228-\234][\128-\191].") ~= nil +end + --- Split text into a list of words, spaces and punctuations. ---- @string text text to split ---- @treturn table list of words, spaces and punctuations @@ -124,7 +134,7 @@ function util.splitToWords(text) local wlist = {} for word in util.gsplit(text, "[%s%p]+", true) do -- if space splitted word contains CJK characters - if word:match("[\228-\234][\128-\191]+") then + if util.hasCJKChar(word) then -- split with CJK characters for char in util.gsplit(word, "[\228-\234\192-\255][\128-\191]+", true) do table.insert(wlist, char) @@ -138,7 +148,7 @@ end -- Test whether a string could be separated by a char for multi-line rendering function util.isSplitable(c) - return c == " " or string.match(c, "%p") ~= nil + return util.isCJKChar(c) or c == " " or string.match(c, "%p") ~= nil end return util diff --git a/spec/unit/util_spec.lua b/spec/unit/util_spec.lua index 7a57fc87f..7ebdbf6e6 100644 --- a/spec/unit/util_spec.lua +++ b/spec/unit/util_spec.lua @@ -106,4 +106,24 @@ describe("util module", function() }) end) + it("should split text to line - CJK", function() + local text = "彩虹是通过太阳光的折射引起的。" + local word = "" + local table_of_words = {} + local c + local table_chars = util.splitToChars(text) + for i = 1, #table_chars do + c = table_chars[i] + word = word .. c + if util.isSplitable(c) then + table.insert(table_of_words, word) + word = "" + end + if i == #table_chars then table.insert(table_of_words, word) end + end + assert.are_same(table_of_words, { + "彩","虹","是","通","过","太","阳","光","的","折","射","引","起","的","。", + }) + end) + end)