From d8366f0686687cdfa2d8c70ad8b395639c6815f1 Mon Sep 17 00:00:00 2001 From: Radhi Fadlillah Date: Fri, 3 Apr 2020 06:31:02 +0700 Subject: [PATCH] Keep all attributes that might contain image --- Readability.js | 38 +++++++++++++++----- test/test-pages/mozilla-1/expected.html | 2 +- test/test-pages/seattletimes-1/expected.html | 2 +- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/Readability.js b/Readability.js index 4f5d687..3259bcb 100644 --- a/Readability.js +++ b/Readability.js @@ -1343,18 +1343,26 @@ Readability.prototype = { * @param Element **/ _unwrapNoscriptImages: function(doc) { - // Find img without source and remove it. This is done to prevent a placeholder - // img is replaced by img from noscript in next step. + // Find img without source or attributes that might contains image, and remove it. + // This is done to prevent a placeholder img is replaced by img from noscript in next step. var imgs = Array.from(doc.getElementsByTagName("img")); this._forEachNode(imgs, function(img) { - var src = img.getAttribute("src"); - var srcset = img.getAttribute("srcset"); - var dataSrc = img.getAttribute("data-src"); - var dataSrcset = img.getAttribute("data-srcset"); + for (var i = 0; i < img.attributes.length; i++) { + var attr = img.attributes[i]; + switch (attr.name) { + case "src": + case "srcset": + case "data-src": + case "data-srcset": + return + } - if (!src && !srcset && !dataSrc && !dataSrcset) { - img.parentNode.removeChild(img); + if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) { + return + } } + + img.parentNode.removeChild(img); }); // Next find noscript and try to extract its image @@ -1372,7 +1380,8 @@ Readability.prototype = { // is possibility that img inside noscript has lower quality // than the one in previous sibling, so we will keep the src // and srcset attribute from old img as data attribute for - // img from noscript. + // img from noscript. We also keep old attributes that might + // contains image. var prevElement = noscript.previousElementSibling; if (prevElement && this._isSingleImage(prevElement)) { var prevImg = prevElement; @@ -1394,6 +1403,17 @@ Readability.prototype = { newImg.setAttribute("data-old-srcset", prevImgSrcset); } + for (var i = 0; i < prevImg.attributes.length; i++) { + var attr = prevImg.attributes[i]; + if (attr.name == "src" || attr.name == "srcset" || newImg.hasAttribute(attr.name)) { + continue; + } + + if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) { + newImg.setAttribute(attr.name, attr.value); + } + } + noscript.parentNode.replaceChild(tmp.children[0], prevElement); } }); diff --git a/test/test-pages/mozilla-1/expected.html b/test/test-pages/mozilla-1/expected.html index 8592639..cdd45c1 100644 --- a/test/test-pages/mozilla-1/expected.html +++ b/test/test-pages/mozilla-1/expected.html @@ -13,7 +13,7 @@

Designed to
be redesigned

Get fast and easy access to the features you use most in the new menu. Open the “Customize” panel to add, move or remove any button you want. Keep your favorite features — add-ons, private browsing, Sync and more — one quick click away.

-

+

diff --git a/test/test-pages/seattletimes-1/expected.html b/test/test-pages/seattletimes-1/expected.html index 6af8fa3..783af80 100644 --- a/test/test-pages/seattletimes-1/expected.html +++ b/test/test-pages/seattletimes-1/expected.html @@ -24,7 +24,7 @@

As it tries to convince people it has lower prices, Whole Foods has been very careful to maintain the reputation built on products like MSC-certified halibut.

- Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times) + Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)
Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)