Bug 1147584 - Don't remove unlikely <a> tags, and replace <a> tags with their text content if they won't be useful links

pull/79/head
Margaret Leibovic 9 years ago
parent a6014f5854
commit 37a8cd4171

@ -197,20 +197,29 @@ Readability.prototype = {
return pathBase + uri;
}
function convertRelativeURIs(tagName, propName) {
var elems = articleContent.getElementsByTagName(tagName);
this._forEachNode(elems, function(elem) {
var relativeURI = elem.getAttribute(propName);
if (relativeURI != null)
elem.setAttribute(propName, toAbsoluteURI(relativeURI));
});
}
// Fix links.
convertRelativeURIs.call(this, "a", "href");
var links = articleContent.getElementsByTagName("a");
this._forEachNode(links, function(link) {
var href = link.getAttribute("href");
if (href) {
// Replace links with javascript: URIs with text content, since
// they won't work after scripts have been removed from the page.
if (href.indexOf("javascript:") === 0) {
var text = new Text();
text.textContent = link.textContent;
link.parentNode.replaceChild(text, link);
} else {
link.setAttribute("href", toAbsoluteURI(href));
}
}
});
// Fix images.
convertRelativeURIs.call(this, "img", "src");
var imgs = articleContent.getElementsByTagName("img");
this._forEachNode(imgs, function(img) {
var src = img.getAttribute("src");
if (src) {
img.setAttribute("src", toAbsoluteURI(src));
}
});
},
/**
@ -587,7 +596,8 @@ Readability.prototype = {
if (stripUnlikelyCandidates) {
if (this.REGEXPS.unlikelyCandidates.test(matchString) &&
!this.REGEXPS.okMaybeItsACandidate.test(matchString) &&
node.tagName !== "BODY") {
node.tagName !== "BODY" &&
node.tagName !== "A") {
this.log("Removing unlikely candidate - " + matchString);
node = this._removeAndGetNext(node);
continue;

@ -0,0 +1,6 @@
{
"title": "Bartleby the Scrivener Web Study Text",
"byline": null,
"excerpt": "Ere introducing the scrivener, as he first appeared to me, it is fit \n I make some mention of myself, my employees, my business, my chambers, \n and general surroundings; because some such description is indispensable \n to an adequate understanding of the chief character about to be presented.",
"readerable": true
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -31,4 +31,4 @@
</p><a name="continued"></a>
</div>
</div>
</div>
</div>
Loading…
Cancel
Save