NewsDownloader: use feed.description as news context instead download full web page (#3426)

Fixes #3425.
pull/3437/head
mwoz123 7 years ago committed by Frans de Jonge
parent 2ddcc7155a
commit 97a72e2dd4

@ -1,11 +1,27 @@
return {
-- list your feeds here:
{ "http://feeds.reuters.com/Reuters/worldNews?format=xml", limit = 2},
-- set 'limit' to change number of 'news' to be downloaded from source
{ "http://feeds.reuters.com/Reuters/worldNews?format=xml", limit = 2, download_full_article=false},
{ "https://www.pcworld.com/index.rss", limit = 7 , download_full_article=true},
-- comment out line ("--" at line start) to stop downloading source
--{ "http://www.football.co.uk/international/rss.xml", limit = 0 , download_full_article=true},
--HELP:
-- use syntax: {"your_url", limit= max_number_of_items_to_be_created, download_full_article=true/false}
-- set 'limit' to change number of 'news' to be created
-- 'limit' equal "0" means no limit.
{ "http://www.pcworld.com/index.rss", limit = 1 },
-- comment out line to stop downloading source
--{ "http://www.football.co.uk/international/rss.xml", limit = 0 },
-- 'download_full_article=false' - means download full article using feed link (may not always work correctly)
-- 'download_full_article=true' - means use only feed description to create feeds (usually only part of the article)
}

@ -160,12 +160,13 @@ function NewsDownloader:loadConfigAndProcessFeeds()
for idx, feed in ipairs(feed_config) do
local url = feed[1]
local limit = feed.limit
local download_full_article = feed.download_full_article
if url and limit then
info = InfoMessage:new{ text = T(_("Processing: %1"), url) }
UIManager:show(info)
-- processFeedSource is a blocking call, so manually force a UI refresh beforehand
UIManager:forceRePaint()
self:processFeedSource(url, tonumber(limit), unsupported_feeds_urls)
self:processFeedSource(url, tonumber(limit), unsupported_feeds_urls, download_full_article)
UIManager:close(info)
else
logger.warn('NewsDownloader: invalid feed config entry', feed)
@ -191,7 +192,7 @@ function NewsDownloader:loadConfigAndProcessFeeds()
end
end
function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls)
function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, download_full_article)
local resp_lines = {}
local parsed = socket_url.parse(url)
local httpRequest = parsed.scheme == 'http' and http.request or https.request
@ -207,9 +208,9 @@ function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls)
local is_atom = feeds.feed and feeds.feed.title and feeds.feed.entry[1] and feeds.feed.entry[1].title and feeds.feed.entry[1].link
if is_atom then
self:processAtom(feeds, limit)
self:processAtom(feeds, limit, download_full_article)
elseif is_rss then
self:processRSS(feeds, limit)
self:processRSS(feeds, limit, download_full_article)
else
table.insert(unsupported_feeds_urls, url)
return
@ -233,7 +234,7 @@ function NewsDownloader:deserializeXMLString(xml_str)
return xmlhandler.root
end
function NewsDownloader:processAtom(feeds, limit)
function NewsDownloader:processAtom(feeds, limit, download_full_article)
local feed_output_dir = string.format("%s%s/",
news_download_dir_path,
util.replaceInvalidChars(getFeedTitle(feeds.feed.title)))
@ -245,11 +246,15 @@ function NewsDownloader:processAtom(feeds, limit)
if limit ~= 0 and index - 1 == limit then
break
end
self:downloadFeed(feed, feed_output_dir)
if download_full_article then
self:downloadFeed(feed, feed_output_dir)
else
self:createFromDescription(feed, feed.context, feed_output_dir)
end
end
end
function NewsDownloader:processRSS(feeds, limit)
function NewsDownloader:processRSS(feeds, limit, download_full_article)
local feed_output_dir = ("%s%s/"):format(
news_download_dir_path, util.replaceInvalidChars(feeds.rss.channel.title))
if not lfs.attributes(feed_output_dir, "mode") then
@ -260,7 +265,11 @@ function NewsDownloader:processRSS(feeds, limit)
if limit ~= 0 and index - 1 == limit then
break
end
self:downloadFeed(feed, feed_output_dir)
if download_full_article then
self:downloadFeed(feed, feed_output_dir)
else
self:createFromDescription(feed, feed.description, feed_output_dir)
end
end
end
@ -276,6 +285,25 @@ function NewsDownloader:downloadFeed(feed, feed_output_dir)
httpRequest({ url = link, sink = ltn12.sink.file(io.open(news_dl_path, 'w')), })
end
function NewsDownloader:createFromDescription(feed, context, feed_output_dir)
local news_file_path = ("%s%s%s"):format(feed_output_dir,
util.replaceInvalidChars(getFeedTitle(feed.title)),
file_extension)
logger.dbg("NewsDownloader: News file will be created :", news_file_path)
local file = io.open(news_file_path, "w")
local footer = _("This is just description of the feed. To download full article go to News Downloader settings and change 'download_full_article' to 'true'")
local html = string.format([[<!DOCTYPE html>
<html>
<head><meta charset='UTF-8'><title>%s</title></head>
<body><header><h2>%s</h2></header><article>%s</article>
<br><footer><small>%s</small></footer>
</body>
</html>]], feed.title, feed.title, context, footer)
file:write(html)
file:close()
end
function NewsDownloader:removeNewsButKeepFeedConfig()
logger.dbg("NewsDownloader: Removing news from :", news_download_dir_path)
for entry in lfs.dir(news_download_dir_path) do

Loading…
Cancel
Save