Added benchmarks.

pull/90/head
Nicolas Perriault 9 years ago
parent f2b04930c1
commit 1e14d47bb7

@ -0,0 +1,14 @@
var readability = require("./index.js");
var Readability = readability.Readability;
var JSDOMParser = readability.JSDOMParser;
suite("Readability test suite benchmarks", function () {
set("iterations", 100);
set("type", "static");
require("./test/bootstrap").getTestPages().forEach(function(testPage) {
bench(testPage.dir + " perf", function() {
new JSDOMParser().parse(testPage.source);
});
});
});

@ -0,0 +1,31 @@
var path = require("path");
var fs = require("fs");
var prettyPrint = require("html").prettyPrint;
var chai = require("chai");
chai.config.includeStack = true;
var expect = chai.expect;
// We want to load Readability and JSDOMParser, which aren't set up as commonjs libraries,
// and so we need to do some hocus-pocus with 'vm' to import them on a separate scope
// (identical) scope context.
var vm = require("vm");
var readabilityPath = path.join(__dirname, "Readability.js");
var jsdomPath = path.join(__dirname, "JSDOMParser.js");
var scopeContext = {};
// We generally expect dump() and console.{whatever} to work, so make these available
// in the scope we're using:
scopeContext.dump = console.log
scopeContext.console = console;
// Actually load files. NB: if either of the files has parse errors,
// node is dumb and shows you a syntax error *at this callsite* . Don't try to find
// a syntax error on this line, there isn't one. Go look in the file it's loading instead.
vm.runInNewContext(fs.readFileSync(jsdomPath), scopeContext, jsdomPath);
vm.runInNewContext(fs.readFileSync(readabilityPath), scopeContext, readabilityPath);
module.exports = {
Readability: scopeContext.Readability,
JSDOMParser: scopeContext.JSDOMParser
};

@ -5,7 +5,8 @@
"main": "Readability.js",
"scripts": {
"test": "mocha test/test-*.js",
"generate-testcase": "node test/generate-testcase.js"
"generate-testcase": "node test/generate-testcase.js",
"perf": "matcha benchmarks.js"
},
"repository": {
"type": "git",
@ -21,6 +22,7 @@
"chai": "^2.1.*",
"html": "0.0.*",
"jsdom": "^3.1.2",
"matcha": "^0.6.0",
"mocha": "^2.2.*"
}
}

23
test/bootstrap.js vendored

@ -0,0 +1,23 @@
var path = require("path");
var fs = require("fs");
function readFile(path) {
return fs.readFileSync(path, {encoding: "utf-8"}).trim();
}
function readJSON(path) {
return JSON.parse(readFile(path));
}
var testPageRoot = path.join(__dirname, "test-pages");
exports.getTestPages = function() {
return fs.readdirSync(testPageRoot).map(function(dir) {
return {
dir: dir,
source: readFile(path.join(testPageRoot, dir, "source.html")),
expectedContent: readFile(path.join(testPageRoot, dir, "expected.html")),
expectedMetadata: readJSON(path.join(testPageRoot, dir, "expected-metadata.json")),
};
});
};

@ -5,25 +5,8 @@ var chai = require("chai");
chai.config.includeStack = true;
var expect = chai.expect;
// We want to load JSDOMParser, which isn't set up as commonjs libraries,
// and so we need to do some hocus-pocus with 'vm' to import them on a separate scope
// (identical) scope context.
var vm = require("vm");
var jsdomPath = path.join(__dirname, "..", "JSDOMParser.js");
var scopeContext = {};
// We generally expect dump() and console.{whatever} to work, so make these available
// in the scope we're using:
scopeContext.dump = console.log
scopeContext.console = console;
// Actually load file. NB: if the file has parse errors,
// node is dumb and shows you a syntax error *at this callsite* . Don't try to find
// a syntax error on this line, there isn't one. Go look in the file it's loading instead.
vm.runInNewContext(fs.readFileSync(jsdomPath), scopeContext, jsdomPath);
var JSDOMParser = scopeContext.JSDOMParser;
var readability = require("../index.js");
var JSDOMParser = readability.JSDOMParser;
var BASETESTCASE = '<html><body><p>Some text and <a class="someclass" href="#">a link</a></p>' +
'<div id="foo">With a <script>With < fancy " characters in it because' +

@ -1,60 +1,19 @@
var path = require("path");
var fs = require("fs");
var prettyPrint = require("html").prettyPrint;
var chai = require("chai");
chai.config.includeStack = true;
var expect = chai.expect;
// We want to load Readability and JSDOMParser, which aren't set up as commonjs libraries,
// and so we need to do some hocus-pocus with 'vm' to import them on a separate scope
// (identical) scope context.
var vm = require("vm");
var readabilityPath = path.join(__dirname, "..", "Readability.js");
var jsdomPath = path.join(__dirname, "..", "JSDOMParser.js");
var readability = require("../index.js");
var Readability = readability.Readability;
var JSDOMParser = readability.JSDOMParser;
var scopeContext = {};
// We generally expect dump() and console.{whatever} to work, so make these available
// in the scope we're using:
scopeContext.dump = console.log
scopeContext.console = console;
// Actually load files. NB: if either of the files has parse errors,
// node is dumb and shows you a syntax error *at this callsite* . Don't try to find
// a syntax error on this line, there isn't one. Go look in the file it's loading instead.
vm.runInNewContext(fs.readFileSync(jsdomPath), scopeContext, jsdomPath);
vm.runInNewContext(fs.readFileSync(readabilityPath), scopeContext, readabilityPath);
// Now make references to the globals in our scope so we can use them easily:
var Readability = scopeContext.Readability;
var JSDOMParser = scopeContext.JSDOMParser;
function readFile(path) {
return fs.readFileSync(path, {encoding: "utf-8"}).trim();
}
function readJSON(path) {
return JSON.parse(readFile(path));
}
var testPageRoot = path.join(__dirname, "test-pages");
var testPages = fs.readdirSync(testPageRoot).map(function(dir) {
return {
dir: dir,
source: path.join(testPageRoot, dir, "source.html"),
expected: path.join(testPageRoot, dir, "expected.html"),
expectedMetadata: path.join(testPageRoot, dir, "expected-metadata.json"),
};
});
var testPages = require("./bootstrap").getTestPages();
describe("Test page", function() {
testPages.forEach(function(testPage) {
describe(testPage.dir, function() {
var doc, result, metadata;
var doc, result;
var expectedMetadata = readJSON(testPage.expectedMetadata);
var expectedContent = readFile(testPage.expected);
var source = readFile(testPage.source);
var uri = {
spec: "http://fakehost/test/page.html",
host: "fakehost",
@ -64,7 +23,7 @@ describe("Test page", function() {
};
before(function() {
doc = new JSDOMParser().parse(source);
doc = new JSDOMParser().parse(testPage.source);
result = new Readability(uri, doc).parse();
});
@ -73,19 +32,19 @@ describe("Test page", function() {
});
it("should extract expected content", function() {
expect(expectedContent).eql(prettyPrint(result.content));
expect(testPage.expectedContent).eql(prettyPrint(result.content));
});
it("should extract expected title", function() {
expect(expectedMetadata.title).eql(result.title);
expect(testPage.expectedMetadata.title).eql(result.title);
});
it("should extract expected byline", function() {
expect(expectedMetadata.byline).eql(result.byline);
expect(testPage.expectedMetadata.byline).eql(result.byline);
});
it("should extract expected excerpt", function() {
expect(expectedMetadata.excerpt).eql(result.excerpt);
expect(testPage.expectedMetadata.excerpt).eql(result.excerpt);
});
});
});

Loading…
Cancel
Save