You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
koreader/frontend/ui/wikipedia.lua

132 lines
4.1 KiB
Lua

local JSON = require("json")
local logger = require("logger")
--[[
-- Query wikipedia using Wikimedia Web API.
-- https://en.wikipedia.org/w/api.php?format=jsonfm&action=query&generator=search&gsrnamespace=0&gsrsearch=ereader&gsrlimit=10&prop=extracts&exintro&explaintext&exlimit=max
-- https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=jsonfm&explaintext=&redirects=&titles=E-reader
--]]
local Wikipedia = {
wiki_server = "https://%s.wikipedia.org",
wiki_path = "/w/api.php",
wiki_params = {
action = "query",
prop = "extracts",
format = "json",
-- exintro = nil, -- get more than only the intro
explaintext = "",
redirects = "",
-- title = nil, -- text to lookup, will be added below
},
default_lang = "en",
-- Search query for better results
-- see https://www.mediawiki.org/wiki/API:Main_page
wiki_search_params = {
action = "query",
generator = "search",
gsrnamespace = "0",
-- gsrsearch = nil, -- text to lookup, will be added below
gsrlimit = 20, -- max nb of results to get
exlimit = "max",
prop = "extracts|info", -- 'extracts' to get text, 'info' to get full page length
format = "json",
explaintext = "",
exintro = "",
-- We have to use 'exintro=' to get extracts for ALL results
-- (otherwise, we get the full text for only the first result, and
-- no text at all for the others
},
}
function Wikipedia:getWikiServer(lang)
return string.format(self.wiki_server, lang or self.default_lang)
end
--[[
-- return decoded JSON table from Wikipedia
--]]
function Wikipedia:loadPage(text, lang, intro, plain)
local socket = require('socket')
local url = require('socket.url')
local http = require('socket.http')
local https = require('ssl.https')
local ltn12 = require('ltn12')
local request, sink = {}, {}
local query = ""
local parsed = url.parse(self:getWikiServer(lang))
parsed.path = self.wiki_path
if intro == true then -- search query
self.wiki_search_params.explaintext = plain and "" or nil
for k,v in pairs(self.wiki_search_params) do
query = query .. k .. '=' .. v .. '&'
end
parsed.query = query .. "gsrsearch=" .. url.escape(text)
else -- full page content
self.wiki_params.explaintext = plain and "" or nil
for k,v in pairs(self.wiki_params) do
query = query .. k .. '=' .. v .. '&'
end
parsed.query = query .. "titles=" .. url.escape(text)
end
-- HTTP request
request['url'] = url.build(parsed)
request['method'] = 'GET'
request['sink'] = ltn12.sink.table(sink)
http.TIMEOUT, https.TIMEOUT = 10, 10
local httpRequest = parsed.scheme == 'http' and http.request or https.request
-- first argument returned by skip is code
local _, headers, status = socket.skip(1, httpRequest(request))
-- raise error message when network is unavailable
if headers == nil then
error("Network is unreachable")
end
if status ~= "HTTP/1.1 200 OK" then
logger.warn("HTTP status not okay:", status)
return
end
local content = table.concat(sink)
if content ~= "" and string.sub(content, 1,1) == "{" then
local ok, result = pcall(JSON.decode, content)
if ok and result then
logger.dbg("wiki result", result)
return result
else
logger.warn("wiki error:", result)
end
else
logger.warn("not JSON from wiki response:", content)
end
end
-- search wikipedia and get intros for results
function Wikipedia:wikintro(text, lang)
local result = self:loadPage(text, lang, true, true)
if result then
local query = result.query
if query then
return query.pages
end
end
end
-- get full content of a wiki page
function Wikipedia:wikifull(text, lang)
local result = self:loadPage(text, lang, false, true)
if result then
local query = result.query
if query then
return query.pages
end
end
end
return Wikipedia