diff --git a/.eslintrc b/.eslintrc index d43f454..0185472 100644 --- a/.eslintrc +++ b/.eslintrc @@ -10,10 +10,10 @@ // "block-spacing": 1, // No newline before open brace for a block - // "brace-style": 2, + "brace-style": 2, // No space before always a space after a comma - // "comma-spacing": [2, {"before": false, "after": true}], + "comma-spacing": [2, {"before": false, "after": true}], // Commas at the end of the line not the start // "comma-style": 2, @@ -36,16 +36,16 @@ // "generator-star-spacing": [2, {"before": false, "after": true}], // Two space indent - // "indent": [2, 2, { "SwitchCase": 1 }], + "indent": [2, 2, { "SwitchCase": 1 }], // Space after colon not before in property declarations - // "key-spacing": [2, { "beforeColon": false, "afterColon": true, "mode": "minimum" }], + "key-spacing": [2, { "beforeColon": false, "afterColon": true, "mode": "minimum" }], // Unix linebreaks "linebreak-style": [2, "unix"], // Always require parenthesis for new calls - // "new-parens": 2, + "new-parens": 2, // Use [] instead of Array() // "no-array-constructor": 2, @@ -63,10 +63,10 @@ "no-labels": 2, // If an if block ends with a return no need for an else block - // "no-else-return": 2, + "no-else-return": 2, // No empty statements - // "no-empty": 2, + "no-empty": 2, // No empty character classes in regex "no-empty-character-class": 2, @@ -96,13 +96,13 @@ "no-irregular-whitespace": 2, // No single if block inside an else block - // "no-lonely-if": 2, + "no-lonely-if": 2, // No mixing spaces and tabs in indent "no-mixed-spaces-and-tabs": [2, "smart-tabs"], // No unnecessary spacing - // "no-multi-spaces": [2, { exceptions: { "AssignmentExpression": true, "VariableDeclarator": true, "ArrayExpression": true, "ObjectExpression": true } }], + "no-multi-spaces": [2, { exceptions: { "AssignmentExpression": true, "VariableDeclarator": true, "ArrayExpression": true, "ObjectExpression": true } }], // No reassigning native JS objects "no-native-reassign": 2, @@ -129,13 +129,13 @@ "no-self-compare": 2, // No declaring variables from an outer scope - // "no-shadow": 2, + "no-shadow": 2, // No declaring variables that hide things like arguments - // "no-shadow-restricted-names": 2, + "no-shadow-restricted-names": 2, // No spaces between function name and parentheses - // "no-spaced-func": 2, + "no-spaced-func": 2, // No trailing whitespace "no-trailing-spaces": 2, @@ -147,13 +147,13 @@ "no-unexpected-multiline": 2, // No unreachable statements - // "no-unreachable": 2, + "no-unreachable": 2, // No expressions where a statement is expected // "no-unused-expressions": 2, // No declaring variables that are never used - // "no-unused-vars": [2, {"vars": "all", "args": "none"}], + "no-unused-vars": [2, {"vars": "all", "args": "none"}], // No using variables before defined // "no-use-before-define": [2, "nofunc"], @@ -162,13 +162,13 @@ "no-with": 2, // Always require semicolon at end of statement - // "semi": [2, "always"], + "semi": [2, "always"], // Require space after keywords - // "space-after-keywords": 2, + "keyword-spacing": 2, // Require space before blocks - // "space-before-blocks": 2, + "space-before-blocks": 2, // Never use spaces before function parentheses // "space-before-function-paren": [2, { "anonymous": "always", "named": "never" }], diff --git a/JSDOMParser.js b/JSDOMParser.js index 58b7584..3a26037 100644 --- a/JSDOMParser.js +++ b/JSDOMParser.js @@ -453,16 +453,15 @@ else this.children.push(newNode); } - } else { + } else if (oldNode.nodeType === Node.ELEMENT_NODE) { // new node is not an element node. // if the old one was, update its element siblings: - if (oldNode.nodeType === Node.ELEMENT_NODE) { - if (oldNode.previousElementSibling) - oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling; - if (oldNode.nextElementSibling) - oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling; - this.children.splice(this.children.indexOf(oldNode), 1); - } + if (oldNode.previousElementSibling) + oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling; + if (oldNode.nextElementSibling) + oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling; + this.children.splice(this.children.indexOf(oldNode), 1); + // If the old node wasn't an element, neither the new nor the old node was an element, // and the children array and its members shouldn't need any updating. } @@ -482,8 +481,8 @@ __JSDOMParser__: true, }; - for (var i in nodeTypes) { - Node[i] = Node.prototype[i] = nodeTypes[i]; + for (var nodeType in nodeTypes) { + Node[nodeType] = Node.prototype[nodeType] = nodeTypes[nodeType]; } var Attribute = function (name, value) { @@ -552,7 +551,7 @@ this._textContent = newText; delete this._innerHTML; }, - } + }; var Document = function () { this.styleSheets = []; @@ -822,7 +821,7 @@ Style.prototype.__defineSetter__(jsName, function (value) { this.setStyle(cssName, value); }); - }) (styleMap[jsName]); + })(styleMap[jsName]); } var JSDOMParser = function () { @@ -969,7 +968,7 @@ retPair[0] = node; retPair[1] = closed; - return true + return true; }, /** @@ -1186,4 +1185,4 @@ // Attach JSDOMParser to the global scope global.JSDOMParser = JSDOMParser; -}) (this); +})(this); diff --git a/Readability.js b/Readability.js index c4c4d44..08c5246 100644 --- a/Readability.js +++ b/Readability.js @@ -19,7 +19,6 @@ * This code is heavily based on Arc90's readability.js (1.7.1) script * available at: http://code.google.com/p/arc90labs-readability */ -var root = this; /** * Public constructor. @@ -27,7 +26,7 @@ var root = this; * @param {HTMLDocument} doc The document to parse. * @param {Object} options The options object. */ -var Readability = function(uri, doc, options) { +function Readability(uri, doc, options) { options = options || {}; this._uri = uri; @@ -76,12 +75,12 @@ var Readability = function(uri, doc, options) { return rv + elDesc; }; this.log = function () { - if ("dump" in root) { + if (typeof dump !== undefined) { var msg = Array.prototype.map.call(arguments, function(x) { return (x && x.nodeName) ? logEl(x) : x; }).join(" "); dump("Reader: (Readability) " + msg + "\n"); - } else if ("console" in root) { + } else if (typeof console !== undefined) { var args = ["Reader: (Readability) "].concat(arguments); console.log.apply(console, args); } @@ -158,7 +157,7 @@ Readability.prototype = { var node = nodeList[i]; var parentNode = node.parentNode; if (parentNode) { - if(!filterFn || filterFn.call(this, node, i, nodeList)) { + if (!filterFn || filterFn.call(this, node, i, nodeList)) { parentNode.removeChild(node); } } @@ -178,7 +177,7 @@ Readability.prototype = { * @return void */ _forEachNode: function(nodeList, fn, backward) { - Array.prototype.forEach.call(nodeList, fn, this); + Array.prototype.forEach.call(nodeList, fn, this); }, /** @@ -299,13 +298,13 @@ Readability.prototype = { // If they had an element with id "title" in their HTML if (typeof curTitle !== "string") curTitle = origTitle = this._getInnerText(doc.getElementsByTagName('title')[0]); - } catch(e) {} + } catch (e) {/* ignore exceptions setting the title. */} if (curTitle.match(/ [\|\-] /)) { - curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1'); + curTitle = origTitle.replace(/(.*)[\|\-] .*/gi, '$1'); if (curTitle.split(' ').length < 3) - curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1'); + curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi, '$1'); } else if (curTitle.indexOf(': ') !== -1) { // Check if we have an heading containing this exact string, so we // could assume it's the full title. @@ -508,7 +507,7 @@ Readability.prototype = { _initializeNode: function(node) { node.readability = {"contentScore": 0}; - switch(node.tagName) { + switch (node.tagName) { case 'DIV': node.readability.contentScore += 5; break; @@ -627,7 +626,7 @@ Readability.prototype = { maxDepth = maxDepth || 0; var i = 0, ancestors = []; while (node.parentNode) { - ancestors.push(node.parentNode) + ancestors.push(node.parentNode); if (maxDepth && ++i === maxDepth) break; node = node.parentNode; @@ -1109,9 +1108,8 @@ Readability.prototype = { if (normalizeSpaces) { return textContent.replace(this.REGEXPS.normalize, " "); - } else { - return textContent; } + return textContent; }, /** @@ -1121,7 +1119,7 @@ Readability.prototype = { * @param string - what to split on. Default is "," * @return number (integer) **/ - _getCharCount: function(e,s) { + _getCharCount: function(e, s) { s = s || ","; return this._getInnerText(e).split(s).length - 1; }, @@ -1390,15 +1388,14 @@ Readability.prototype = { } } + var nextHref = null; if (topPage) { - var nextHref = topPage.href.replace(/\/$/,''); + nextHref = topPage.href.replace(/\/$/, ''); this.log('NEXT PAGE IS ' + nextHref); this._parsedPages[nextHref] = true; - return nextHref; - } else { - return null; } + return nextHref; }, _successfulRequest: function(request) { @@ -1415,9 +1412,8 @@ Readability.prototype = { if (this._successfulRequest(request)) { if (options.success) options.success(request); - } else { - if (options.error) - options.error(request); + } else if (options.error) { + options.error(request); } } } @@ -1470,9 +1466,8 @@ Readability.prototype = { this.log("Exact duplicate page found via ETag. Aborting."); articlePage.style.display = 'none'; return; - } else { - this._pageETags[eTag] = 1; } + this._pageETags[eTag] = 1; } // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away. @@ -1486,9 +1481,9 @@ Readability.prototype = { // - Turn all double br's into p's - was handled by prepDocument in the original view. // Maybe in the future abstract out prepDocument to work for both the original document // and AJAX-added pages. - var responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/.*?<\/script>/gi, ''); - responseHtml = responseHtml.replace(/\n/g,'\uffff').replace(/.*?<\/script>/gi, ''); - responseHtml = responseHtml.replace(/\uffff/g,'\n').replace(/<(\/?)noscript/gi, '<$1div'); + var responseHtml = r.responseText.replace(/\n/g, '\uffff').replace(/.*?<\/script>/gi, ''); + responseHtml = responseHtml.replace(/\n/g, '\uffff').replace(/.*?<\/script>/gi, ''); + responseHtml = responseHtml.replace(/\uffff/g, '\n').replace(/<(\/?)noscript/gi, '<$1div'); responseHtml = responseHtml.replace(this.REGEXPS.replaceFonts, '<$1span>'); page.innerHTML = responseHtml; @@ -1498,7 +1493,7 @@ Readability.prototype = { // disable as necessary at the end of grabArticle. this._flags = 0x1 | 0x2 | 0x4; - var nextPageLink = this._findNextPageLink(page); + var secondNextPageLink = this._findNextPageLink(page); // NOTE: if we end up supporting _appendNextPage(), we'll need to // change this call to be async @@ -1537,8 +1532,8 @@ Readability.prototype = { }).bind(this), 500); - if (nextPageLink) - this._appendNextPage(nextPageLink); + if (secondNextPageLink) + this._appendNextPage(secondNextPageLink); } }); }).bind(this)(nextPageLink, articlePage); @@ -1659,7 +1654,7 @@ Readability.prototype = { return true; } - if (this._getCharCount(node,',') < 10) { + if (this._getCharCount(node, ',') < 10) { // If there are not very many commas, and the number of // non-paragraph elements is more than paragraphs or other // ominous signs, remove the element. @@ -1689,7 +1684,7 @@ Readability.prototype = { ((embedCount === 1 && contentLength < 75) || embedCount > 1); return haveToRemove; } - return false + return false; }); }, diff --git a/benchmarks/benchmark-reporter.js b/benchmarks/benchmark-reporter.js index c0ae0ba..f58278d 100644 --- a/benchmarks/benchmark-reporter.js +++ b/benchmarks/benchmark-reporter.js @@ -4,7 +4,9 @@ function average(list) { if (!list.length) return 0; - var sum = list.reduce(function(previous, current) { return previous + current; }); + var sum = list.reduce(function(previous, current) { + return previous + current; + }); return (sum / list.length).toFixed(0); } @@ -16,8 +18,8 @@ module.exports = function(runner, utils) { var color = utils.color; var results = {}; var currentResults = []; - runner.on('bench end', function(results) { - currentResults.push(results.ops); + runner.on('bench end', function(benchResults) { + currentResults.push(benchResults.ops); }); runner.on('suite end', function(suite) { var avg = humanize(average(currentResults)); diff --git a/index.js b/index.js index 149d760..214c4f2 100644 --- a/index.js +++ b/index.js @@ -12,7 +12,7 @@ var jsdomPath = path.join(__dirname, "JSDOMParser.js"); var scopeContext = {}; // We generally expect dump() and console.{whatever} to work, so make these available // in the scope we're using: -scopeContext.dump = console.log +scopeContext.dump = console.log; scopeContext.console = console; // Actually load files. NB: if either of the files has parse errors, diff --git a/test/generate-testcase.js b/test/generate-testcase.js index c599ac2..74af11f 100644 --- a/test/generate-testcase.js +++ b/test/generate-testcase.js @@ -21,7 +21,7 @@ if (process.argv.length < 3) { } var slug = process.argv[2]; -var url = process.argv[3]; // Could be undefined, we'll warn if it is if that is an issue. +var argURL = process.argv[3]; // Could be undefined, we'll warn if it is if that is an issue. var destRoot = path.join(__dirname, "test-pages", slug); @@ -30,8 +30,8 @@ fs.mkdir(destRoot, function(err) { var sourceFile = path.join(destRoot, "source.html"); fs.exists(sourceFile, function(exists) { if (exists) { - fs.readFile(sourceFile, {encoding: "utf-8"}, function(err, data) { - if (err) { + fs.readFile(sourceFile, {encoding: "utf-8"}, function(readFileErr, data) { + if (readFileErr) { console.error("Source existed but couldn't be read?"); process.exit(1); return; @@ -39,12 +39,12 @@ fs.mkdir(destRoot, function(err) { onResponseReceived(data); }); } else { - fetchSource(url, onResponseReceived); + fetchSource(argURL, onResponseReceived); } }); return; } - fetchSource(url, onResponseReceived); + fetchSource(argURL, onResponseReceived); }); function fetchSource(url, callbackFn) { @@ -108,11 +108,11 @@ function runReadability(source, destPath, metadataDestPath) { scheme: "http", pathBase: "http://fakehost/test/" }; - var readability, result, readerable; + var myReader, result, readerable; try { - readability = new Readability(uri, doc); - readerable = readability.isProbablyReaderable(); - result = readability.parse(); + myReader = new Readability(uri, doc); + readerable = myReader.isProbablyReaderable(); + result = myReader.parse(); } catch (ex) { console.error(ex); ex.stack.forEach(console.log.bind(console)); @@ -122,10 +122,10 @@ function runReadability(source, destPath, metadataDestPath) { return; } - fs.writeFile(destPath, prettyPrint(result.content), function(err) { - if (err) { + fs.writeFile(destPath, prettyPrint(result.content), function(fileWriteErr) { + if (fileWriteErr) { console.error("Couldn't write data to expected.html!"); - console.error(err); + console.error(fileWriteErr); } // Delete the result data we don't care about checking. @@ -136,10 +136,10 @@ function runReadability(source, destPath, metadataDestPath) { // Add isProbablyReaderable result result.readerable = readerable; - fs.writeFile(metadataDestPath, JSON.stringify(result, null, 2) + "\n", function(err) { - if (err) { + fs.writeFile(metadataDestPath, JSON.stringify(result, null, 2) + "\n", function(metadataWriteErr) { + if (metadataWriteErr) { console.error("Couldn't write data to expected-metadata.json!"); - console.error(err); + console.error(metadataWriteErr); } process.exit(0); diff --git a/test/test-jsdomparser.js b/test/test-jsdomparser.js index 6543a54..4dab953 100644 --- a/test/test-jsdomparser.js +++ b/test/test-jsdomparser.js @@ -1,6 +1,3 @@ -var path = require("path"); -var fs = require("fs"); - var chai = require("chai"); chai.config.includeStack = true; var expect = chai.expect; @@ -226,8 +223,8 @@ describe("Test HTML escaping", function() { }); it("should handle decimal and hex escape sequences", function() { - var doc = new JSDOMParser().parse("

"); - expect(doc.getElementsByTagName("p")[0].textContent).eql(" "); + var parsedDoc = new JSDOMParser().parse("

"); + expect(parsedDoc.getElementsByTagName("p")[0].textContent).eql(" "); }); }); diff --git a/test/test-readability.js b/test/test-readability.js index c7d4f8f..0425db7 100644 --- a/test/test-readability.js +++ b/test/test-readability.js @@ -1,4 +1,3 @@ -var prettyPrint = require("./utils").prettyPrint; var jsdom = require("jsdom").jsdom; var chai = require("chai"); chai.config.includeStack = true; @@ -59,11 +58,11 @@ function runTestsWithItems(label, domGenerationFn, uri, source, expectedContent, before(function() { try { var doc = domGenerationFn(source); - var readability = new Readability(uri, doc); - var readerable = readability.isProbablyReaderable(); - result = readability.parse(); + var myReader = new Readability(uri, doc); + var readerable = myReader.isProbablyReaderable(); + result = myReader.parse(); result.readerable = readerable; - } catch(err) { + } catch (err) { throw reformatError(err); } }); diff --git a/test/utils.js b/test/utils.js index 5a9dfa4..5ae9657 100644 --- a/test/utils.js +++ b/test/utils.js @@ -2,12 +2,12 @@ var path = require("path"); var fs = require("fs"); var prettyPrint = require("js-beautify").html; -function readFile(path) { - return fs.readFileSync(path, {encoding: "utf-8"}).trim(); +function readFile(filePath) { + return fs.readFileSync(filePath, {encoding: "utf-8"}).trim(); } -function readJSON(path) { - return JSON.parse(readFile(path)); +function readJSON(jsonPath) { + return JSON.parse(readFile(jsonPath)); } var testPageRoot = path.join(__dirname, "test-pages"); @@ -37,4 +37,4 @@ exports.prettyPrint = function(html) { "wrap_attributes": "auto", "wrap_attributes_indent_size": 4 }); -} +};