From 22b7f17cd82f3d819203afcc27983a3e91c117b0 Mon Sep 17 00:00:00 2001 From: poire-z Date: Sun, 16 Dec 2018 18:02:38 +0100 Subject: [PATCH] Fix translator, enable selected text translation Update translator.lua to use a still working google translate API. Add a method to show translations (main and alternates) in a TextViewer. Re-enable "Translate" button in text selection/highlight buttons dialog. Target language can be set with a manually added setting: translator_target_language = "fr" and will fallback to the UI language. --- .../apps/reader/modules/readerhighlight.lua | 162 +++--- frontend/ui/translator.lua | 535 +++++++++++++++++- spec/unit/translator_spec.lua | 9 +- 3 files changed, 602 insertions(+), 104 deletions(-) diff --git a/frontend/apps/reader/modules/readerhighlight.lua b/frontend/apps/reader/modules/readerhighlight.lua index c748fc2d2..05c501052 100644 --- a/frontend/apps/reader/modules/readerhighlight.lua +++ b/frontend/apps/reader/modules/readerhighlight.lua @@ -487,13 +487,13 @@ end function ReaderHighlight:translate(selected_text) if selected_text.text ~= "" then - self.ui:handleEvent(Event:new("TranslateText", self, selected_text.text)) + self:onTranslateText(selected_text.text) -- or we will do OCR else local text = self.ui.document:getOCRText(self.hold_pos.page, selected_text) logger.dbg("OCRed text:", text) if text and text ~= "" then - self.ui:handleEvent(Event:new("TranslateText", self, text)) + self:onTranslateText(text) else UIManager:show(InfoMessage:new{ text = info_message_ocr_text, @@ -502,6 +502,11 @@ function ReaderHighlight:translate(selected_text) end end +function ReaderHighlight:onTranslateText(text) + local Translator = require("ui/translator") + Translator:showTranslation(text) +end + function ReaderHighlight:onHoldRelease() if self.hold_start_tv then local hold_duration = TimeVal.now() - self.hold_start_tv @@ -516,89 +521,94 @@ function ReaderHighlight:onHoldRelease() end if self.selected_text then logger.dbg("show highlight dialog") - self.highlight_dialog = ButtonDialog:new{ - buttons = { + local highlight_buttons = { + { { - { - text = _("Highlight"), - callback = function() - self:saveHighlight() - self:onClose() - end, - }, - { - text = _("Add Note"), - enabled = false, - callback = function() - self:addNote() - self:onClose() - end, - }, + text = _("Highlight"), + callback = function() + self:saveHighlight() + self:onClose() + end, }, { - { - text = "Copy", - enabled = Device:hasClipboard(), - callback = function() - Device.input.setClipboardText(self.selected_text.text) - end, - }, - { - text = _("View HTML"), - enabled = not self.ui.document.info.has_pages, - callback = function() - self:viewSelectionHTML() - end, - }, - --[[ - { - text = _("Translate"), - enabled = false, - callback = function() - self:translate(self.selected_text) - self:onClose() - end, - }, - --]] + text = _("Add Note"), + enabled = false, + callback = function() + self:addNote() + self:onClose() + end, }, + }, + { { - { - text = _("Wikipedia"), - callback = function() - UIManager:scheduleIn(0.1, function() - self:lookupWikipedia() - -- We don't call self:onClose(), we need the highlight - -- to still be there, as we may Highlight it from the - -- dict lookup widget - end) - end, - }, - { - text = _("Dictionary"), - callback = function() - self:onHighlightDictLookup() - -- We don't call self:onClose(), same reason as above - end, - }, + text = "Copy", + enabled = Device:hasClipboard(), + callback = function() + Device.input.setClipboardText(self.selected_text.text) + end, }, { - { - text = _("Follow Link"), - enabled = self.selected_link ~= nil, - callback = function() - self.ui.link:onGotoLink(self.selected_link) - self:onClose() - end, - }, - { - text = _("Search"), - callback = function() - self:onHighlightSearch() - UIManager:close(self.highlight_dialog) - end, - }, + text = _("View HTML"), + enabled = not self.ui.document.info.has_pages, + callback = function() + self:viewSelectionHTML() + end, + }, + }, + { + { + text = _("Wikipedia"), + callback = function() + UIManager:scheduleIn(0.1, function() + self:lookupWikipedia() + -- We don't call self:onClose(), we need the highlight + -- to still be there, as we may Highlight it from the + -- dict lookup widget + end) + end, + }, + { + text = _("Dictionary"), + callback = function() + self:onHighlightDictLookup() + -- We don't call self:onClose(), same reason as above + end, + }, + }, + { + { + text = _("Translate"), + callback = function() + self:translate(self.selected_text) + -- We don't call self:onClose(), so one can still see + -- the highlighted text when moving the translated + -- text window, and also if NetworkMgr:promptWifiOn() + -- is needed, so the user can just tap again on this + -- button and does not need to select the text again. + end, + }, + { + text = _("Search"), + callback = function() + self:onHighlightSearch() + UIManager:close(self.highlight_dialog) + end, }, }, + } + if self.selected_link ~= nil then + table.insert(highlight_buttons, { -- for now, a single button in an added row + { + text = _("Follow Link"), + callback = function() + self.ui.link:onGotoLink(self.selected_link) + self:onClose() + end, + }, + }) + end + self.highlight_dialog = ButtonDialog:new{ + buttons = highlight_buttons, tap_close_callback = function() self:handleEvent(Event:new("Tap")) end, } UIManager:show(self.highlight_dialog) diff --git a/frontend/ui/translator.lua b/frontend/ui/translator.lua index 2c945eacb..23858f21c 100644 --- a/frontend/ui/translator.lua +++ b/frontend/ui/translator.lua @@ -1,42 +1,83 @@ --[[-- This module translates text using Google Translate. - + + --]] +-- Useful other implementation and discussion: +-- https://github.com/ssut/py-googletrans/blob/master/googletrans/client.py +-- https://stackoverflow.com/questions/26714426/what-is-the-meaning-of-google-translate-query-params + local JSON = require("json") local logger = require("logger") local Translator = { - trans_servers = { - "http://translate.google.cn", - "http://translate.google.com", - }, - trans_path = "/translate_a/t", - trans_params = { - client = "z", -- client z returns normal JSON result - ie = "UTF-8", - oe = "UTF-8", - hl = "en", - tl = "en", - sl = nil, -- we don't specify source languagae to detect language - }, - default_lang = "en", + trans_servers = { + "https://translate.googleapis.com/", + -- "http://translate.google.cn", + }, + trans_path = "/translate_a/single", + trans_params = { + client = "gtx", -- (using "t" raises 403 Forbidden) + ie = "UTF-8", -- input encoding + oe = "UTF-8", -- output encoding + sl = "auto", -- source language (we need to specify "auto" to detect language) + tl = "en", -- target language + hl = "en", -- ? + otf = 1, -- ? + ssel = 0, -- ? + tsel = 0, -- ? + -- tk = "" -- auth token + dt = { -- what we want in result + "t", -- translation of source text + "at", -- alternate translations + -- Next options only give additional results when text is a single word + -- "bd", -- dictionary (articles, reverse translations, etc) + -- "ex", -- examples + -- "ld", -- ? + -- "md", -- definitions of source text + -- "qca", -- ? + -- "rw", -- "see also" list + -- "rm", -- transcription / transliteration of source and translated texts + -- "ss", -- synonyms of source text, if it's one word + } + -- q = text to translate + }, + default_lang = "en", } function Translator:getTransServer() return G_reader_settings:readSetting("trans_server") or self.trans_servers[1] end +function Translator:getTargetLanguage() + -- One can manually set his prefered target language + local lang = G_reader_settings:readSetting("translator_target_language") + if not lang then + -- Fallback to the UI language the user has selected + lang = G_reader_settings:readSetting("language") + if lang and lang ~= "" then + -- convert "zh-CN" and "zh-TW" to "zh" + lang = lang:match("(.*)-") or lang + if lang == "C" then + lang="en" + end + lang = lang:lower() + end + end + return lang or "en" +end + --[[-- Returns decoded JSON table from translate server. +@string text @string target_lang @string source_lang -@string text @treturn string result, or nil --]] -function Translator:loadPage(target_lang, source_lang, text) +function Translator:loadPage(text, target_lang, source_lang) local socket = require('socket') local url = require('socket.url') local http = require('socket.http') @@ -48,16 +89,27 @@ function Translator:loadPage(target_lang, source_lang, text) self.trans_params.tl = target_lang self.trans_params.sl = source_lang for k,v in pairs(self.trans_params) do - query = query .. k .. '=' .. v .. '&' + if type(v) == "table" then + for _, v2 in ipairs(v) do + query = query .. k .. '=' .. v2 .. '&' + end + else + query = query .. k .. '=' .. v .. '&' + end end local parsed = url.parse(self:getTransServer()) parsed.path = self.trans_path - parsed.query = query .. "text=" .. url.escape(text) + parsed.query = query .. "q=" .. url.escape(text) -- HTTP request request['url'] = url.build(parsed) + logger.dbg("Calling", request.url) request['method'] = 'GET' request['sink'] = ltn12.sink.table(sink) + -- We may try to set a common User-Agent if it happens we're 403 Forbidden + -- request['headers'] = { + -- ["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", + -- } http.TIMEOUT, https.TIMEOUT = 10, 10 local httpRequest = parsed.scheme == 'http' and http.request or https.request -- first argument returned by skip is code @@ -74,9 +126,15 @@ function Translator:loadPage(target_lang, source_lang, text) end local content = table.concat(sink) - if content ~= "" and string.sub(content, 1,1) == "{" then - local ok, result = pcall(JSON.decode, content) + -- logger.dbg("translator content:", content) + local first_char = content:sub(1, 1) + if content ~= "" and (first_char == "{" or first_char == "[") then + -- Get nil instead of functions for 'null' by using JSON.decode.simple + -- (so the result can be fully serialized when used + -- with Trapper:dismissableRunInSubprocess()) + local ok, result = pcall(JSON.decode, content, JSON.decode.simple) if ok and result then + logger.dbg("translator json:", result) return result else logger.warn("translator error:", result) @@ -85,6 +143,20 @@ function Translator:loadPage(target_lang, source_lang, text) logger.warn("not JSON in translator response:", content) end end +-- The JSON result is a list of 9 to 15 items: +-- 1: translation +-- 2: all-translations +-- 3: original-language +-- 6: possible-translations +-- 7: confidence +-- 8: possible-mistakes +-- 9: language +-- 12: synonyms +-- 13: definitions +-- 14: examples +-- 15: see-also +-- Depending on the 'dt' parameters used, some may be null or absent. +-- See bottom of this file for some sample results. --[[-- Tries to automatically detect language of `text`. @@ -93,9 +165,9 @@ Tries to automatically detect language of `text`. @treturn string lang (`"en"`, `"fr"`, `…`) --]] function Translator:detect(text) - local result = self:loadPage("en", nil, text) - if result then - local src_lang = result.src + local result = self:loadPage(text, "en", "auto") + if result and result[3] then + local src_lang = result[3] logger.dbg("detected language:", src_lang) return src_lang else @@ -103,4 +175,419 @@ function Translator:detect(text) end end +--[[-- +Translate text, returns translation as a single string. + +@string text +@string target_lang[opt] (`"en"`, `"fr"`, `…`) +@string source_lang[opt="auto"] (`"en"`, `"fr"`, `…`) or `"auto"` to auto-detect source language +@treturn string translated text, or nil +--]] +function Translator:translate(text, target_lang, source_lang) + if not target_lang then + target_lang = self:getTargetLanguage() + end + if not source_lang then + source_lang = "auto" + end + local result = self:loadPage(text, target_lang, source_lang) + if result and result[1] and type(result[1]) == "table" then + local translated = {} + for i, r in ipairs(result[1]) do + table.insert(translated, r[1]) + end + return table.concat(translated, "") + end + return nil +end + +--[[-- +Show translated text in TextViewer, with alternate translations + +@string text +@string target_lang[opt] (`"en"`, `"fr"`, `…`) +@string source_lang[opt="auto"] (`"en"`, `"fr"`, `…`) or `"auto"` to auto-detect source language +--]] +function Translator:showTranslation(text, target_lang, source_lang) + local NetworkMgr = require("ui/network/manager") + if not NetworkMgr:isOnline() then + NetworkMgr:promptWifiOn() + return + end + -- Wrap next function with Trapper to be able to interrupt + -- translation service query. + local Trapper = require("ui/trapper") + Trapper:wrap(function() + self:_showTranslation(text, target_lang, source_lang) + end) +end + +function Translator:_showTranslation(text, target_lang, source_lang) + local InfoMessage = require("ui/widget/infomessage") + local TextViewer = require("ui/widget/textviewer") + local Trapper = require("ui/trapper") + local UIManager = require("ui/uimanager") + local util = require("util") + local Screen = require("device").screen + local T = require("ffi/util").template + local _ = require("gettext") + + if not target_lang then + target_lang = self:getTargetLanguage() + end + if not source_lang then + source_lang = "auto" + end + + local completed, result = Trapper:dismissableRunInSubprocess(function() + return self:loadPage(text, target_lang, source_lang) + end, _("Querying translation service…")) + if not completed then + UIManager:show(InfoMessage:new{ + text = _("Translation interrupted.") + }) + return + end + if not result or type(result) ~= "table" then + UIManager:show(InfoMessage:new{ + text = _("Translation failed.") + }) + return + end + + if result[3] then + source_lang = result[3] + end + local output = {} + + -- For both main and alternate translations, we may get multiple slices + -- of the original text and its translations. + if result[1] and type(result[1]) == "table" and #result[1] > 0 then + -- Main translation: we can make a single string from the multiple parts + -- for easier quick reading + local source = {} + local translated = {} + for i, r in ipairs(result[1]) do + local s = type(r[2]) == "string" and r[2] or "" + local t = type(r[1]) == "string" and r[1] or "" + table.insert(source, s) + table.insert(translated, t) + end + table.insert(output, "▣ " .. table.concat(source, " ")) + table.insert(output, "● " .. table.concat(translated, " ")) + end + + if result[6] and type(result[6]) == "table" and #result[6] > 0 then + -- Alternative translations: + table.insert(output, "________") + for i, r in ipairs(result[6]) do + if type(r[3]) == "table" then + local s = type(r[1]) == "string" and r[1]:gsub("\n", "") or "" + table.insert(output, "▣ " .. s) + for j, rt in ipairs(r[3]) do + -- Use number in solid black circle symbol (U+2776...277F) + local symbol = util.unicodeCodepointToUtf8(10101 + (j < 10 and j or 10)) + local t = type(rt[1]) == "string" and rt[1]:gsub("\n", "") or "" + table.insert(output, symbol .. " " .. t) + end + end + table.insert(output, "") + end + end + + -- table.insert(output, require("dump")(result)) -- for debugging + UIManager:show(TextViewer:new{ + title = T(_("Translation from %1 to %2"), source_lang:upper(), target_lang:upper()), + text = table.concat(output, "\n"), + height = Screen:getHeight() * 3/4, + }) +end + return Translator + +-- Sample JSON results: +-- +-- Multiple words result: +-- { +-- [1] = { +-- [1] = { +-- [1] = "I know you did not destroy your King's house, because then you had none. ", +-- [2] = "Ich weiß, dass ihr nicht eures Königs Haus zerstört habt, denn damals hattet ihr ja keinen.", +-- [5] = 3, +-- ["n"] = 5 +-- }, +-- [2] = { +-- [1] = "But you can not deny that you destroyed a royal palace. ", +-- [2] = "Aber ihr könnt nicht leugnen, dass ihr einen Königspalast zerstört habt.", +-- [5] = 3, +-- ["n"] = 5 +-- }, +-- [3] = { +-- [1] = "If the king is dead, then the kingdom remains, just as a ship remains, whose helmsman has fallen", +-- [2] = "Ist der König tot, so bleibt doch das Reich bestehen, ebenso wie ein Schiff bleibt, dessen Steuermann gefallen ist", +-- [5] = 3, +-- ["n"] = 5 +-- } +-- }, +-- [3] = "de", +-- [6] = { +-- [1] = { +-- [1] = "Ich weiß, dass ihr nicht eures Königs Haus zerstört habt, denn damals hattet ihr ja keinen.", +-- [3] = { +-- [1] = { +-- [1] = "I know you did not destroy your King's house, because then you had none.", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- }, +-- [2] = { +-- [1] = "I know that you have not destroyed your king house, because at that time you had not any.", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- } +-- }, +-- [4] = { +-- [1] = { +-- [1] = 0, +-- [2] = 91 +-- } +-- }, +-- [5] = "Ich weiß, dass ihr nicht eures Königs Haus zerstört habt, denn damals hattet ihr ja keinen.", +-- [6] = 0, +-- [7] = 0 +-- }, +-- [2] = { +-- [1] = "Aber ihr könnt nicht leugnen, dass ihr einen Königspalast zerstört habt.", +-- [3] = { +-- [1] = { +-- [1] = "But you can not deny that you destroyed a royal palace.", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- }, +-- [2] = { +-- [1] = "But you can not deny that you have destroyed a royal palace.", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- } +-- }, +-- [4] = { +-- [1] = { +-- [1] = 0, +-- [2] = 72 +-- } +-- }, +-- [5] = "Aber ihr könnt nicht leugnen, dass ihr einen Königspalast zerstört habt.", +-- [6] = 0, +-- [7] = 0 +-- }, +-- [3] = { +-- [1] = "Ist der König tot, so bleibt doch das Reich bestehen, ebenso wie ein Schiff bleibt, dessen Steuermann gefallen ist", +-- [3] = { +-- [1] = { +-- [1] = "If the king is dead, then the kingdom remains, just as a ship remains, whose helmsman has fallen", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- }, +-- [2] = { +-- [1] = "yet the king dead, remains the kingdom stand remains as a ship the helmsman has fallen", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- } +-- }, +-- [4] = { +-- [1] = { +-- [1] = 0, +-- [2] = 114 +-- } +-- }, +-- [5] = "Ist der König tot, so bleibt doch das Reich bestehen, ebenso wie ein Schiff bleibt, dessen Steuermann gefallen ist", +-- [6] = 0, +-- [7] = 0 +-- } +-- }, +-- [7] = 1, +-- [9] = { +-- [1] = { +-- [1] = "de" +-- }, +-- [3] = { +-- [1] = 1 +-- }, +-- [4] = { +-- [1] = "de" +-- } +-- }, +-- ["n"] = 9 +-- } +-- +-- Single word result with all dt= enabled: +-- { +-- [1] = { +-- [1] = { +-- [1] = "fork", +-- [2] = "fourchette", +-- [5] = 0, +-- ["n"] = 5 +-- } +-- }, +-- [2] = { +-- [1] = { +-- [1] = "noun", +-- [2] = { +-- [1] = "fork" +-- }, +-- [3] = { +-- [1] = { +-- [1] = "fork", +-- [2] = { +-- [1] = "fourche", +-- [2] = "fourchette", +-- [3] = "embranchement", +-- [4] = "chariot", +-- [5] = "chariot à fourche" +-- }, +-- [4] = 0.21967085 +-- } +-- }, +-- [4] = "fourchette", +-- [5] = 1 +-- } +-- }, +-- [3] = "fr", +-- [6] = { +-- [1] = { +-- [1] = "fourchette", +-- [3] = { +-- [1] = { +-- [1] = "fork", +-- [2] = 1000, +-- [3] = true, +-- [4] = false +-- }, +-- [2] = { +-- [1] = "band", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- }, +-- [3] = { +-- [1] = "bracket", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- }, +-- [4] = { +-- [1] = "range", +-- [2] = 0, +-- [3] = true, +-- [4] = false +-- } +-- }, +-- [4] = { +-- [1] = { +-- [1] = 0, +-- [2] = 10 +-- } +-- }, +-- [5] = "fourchette", +-- [6] = 0, +-- [7] = 1 +-- } +-- }, +-- [7] = 1, +-- [9] = { +-- [1] = { +-- [1] = "fr" +-- }, +-- [3] = { +-- [1] = 1 +-- }, +-- [4] = { +-- [1] = "fr" +-- } +-- }, +-- [12] = { +-- [1] = { +-- [1] = "noun", +-- [2] = { +-- [1] = { +-- [1] = { +-- [1] = "ramification", +-- [2] = "enfourchure" +-- }, +-- [2] = "" +-- }, +-- [2] = { +-- [1] = { +-- [1] = "échéance", +-- [2] = "bande" +-- }, +-- [2] = "" +-- }, +-- [3] = { +-- [1] = { +-- [1] = "ramification", +-- [2] = "jambe" +-- }, +-- [2] = "" +-- }, +-- [4] = { +-- [1] = { +-- [1] = "bifurcation" +-- }, +-- [2] = "" +-- }, +-- [5] = { +-- [1] = { +-- [1] = "fourche", +-- [2] = "bifurcation", +-- [3] = "entrejambe" +-- }, +-- [2] = "" +-- }, +-- [6] = { +-- [1] = { +-- [1] = "fourche", +-- [2] = "bifurcation" +-- }, +-- [2] = "" +-- } +-- }, +-- [3] = "fourchette" +-- } +-- }, +-- [13] = { +-- [1] = { +-- [1] = "noun", +-- [2] = { +-- [1] = { +-- [1] = "Ustensile de table.", +-- [2] = "12518.0", +-- [3] = "Des fourchettes, des couteaux et des cuillères ." +-- }, +-- [2] = { +-- [1] = "Ecart entre deux valeurs.", +-- [2] = "12518.1", +-- [3] = "La fourchette des prix ." +-- } +-- }, +-- [3] = "fourchette" +-- } +-- }, +-- [14] = { +-- [1] = { +-- [1] = { +-- [1] = "La fourchette des prix .", +-- [5] = 3, +-- [6] = "12518.1", +-- ["n"] = 6 +-- } +-- } +-- }, +-- ["n"] = 14 +-- } diff --git a/spec/unit/translator_spec.lua b/spec/unit/translator_spec.lua index 6c0270e13..e1957cf30 100644 --- a/spec/unit/translator_spec.lua +++ b/spec/unit/translator_spec.lua @@ -7,21 +7,22 @@ describe("Translator module", function() Translator = require("ui/translator") end) it("should return server", function() - assert.is.same("http://translate.google.cn", Translator:getTransServer()) + assert.is.same("https://translate.googleapis.com/", Translator:getTransServer()) G_reader_settings:saveSetting("trans_server", "http://translate.google.nl") G_reader_settings:flush() assert.is.same("http://translate.google.nl", Translator:getTransServer()) G_reader_settings:delSetting("trans_server") G_reader_settings:flush() end) - it("should return translation #notest #nocov", function() - local translation_result = Translator:loadPage("en", "nl", dutch_wikipedia_text) + -- add " #notest #nocov" to the it("description string") when it does not work anymore + it("should return translation", function() + local translation_result = Translator:translate(dutch_wikipedia_text, "en") assert.is.truthy(translation_result) -- while some minor variation in the translation is possible it should -- be between about 100 and 130 characters assert.is_true(#translation_result > 50 and #translation_result < 200) end) - it("should autodetect language #notest #nocov", function() + it("should autodetect language", function() local detect_result = Translator:detect(dutch_wikipedia_text) assert.is.same("nl", detect_result) end)