Bug fix: still not grabbing full content from nytimes.com articles

Solution: strip one level of empty <DIV> elements so they don’t
obstruct merging adjacent content downstream.
pull/338/head
andrei-ch 8 years ago committed by Gijs
parent 486927ebd9
commit 5b1e69bdf2

@ -727,12 +727,14 @@ Readability.prototype = {
} else {
// EXPERIMENTAL
this._forEachNode(node.childNodes, function(childNode) {
if (childNode.nodeType === Node.TEXT_NODE && childNode.textContent.trim() != "") {
if (childNode.nodeType === Node.TEXT_NODE && childNode.textContent.trim().length > 0) {
var p = doc.createElement('p');
p.textContent = childNode.textContent;
p.style.display = 'inline';
p.className = 'readability-styled';
node.replaceChild(p, childNode);
} else if (this._isEmptyDivElement(childNode)) {
node.replaceChild(doc.createTextNode(childNode.textContent), childNode);
}
});
}
@ -1158,6 +1160,13 @@ Readability.prototype = {
});
},
_isEmptyDivElement: function(node) {
return node.nodeType === Node.ELEMENT_NODE &&
node.tagName === "DIV" &&
node.children.length == 0 &&
node.textContent.trim().length == 0;
},
/**
* Determine whether element has any children block level elements.
*

Loading…
Cancel
Save