Fix JSDOMParser to do some HTML escaping and identify as JSDOMParser + add tests

pull/80/head
Gijs Kruitbosch 9 years ago
parent 0f2f61d0a6
commit 62801faa0e

@ -30,6 +30,39 @@
dump("JSDOMParser error: " + m + "\n");
}
// XML only defines these and the numeric ones:
var entityTable = {
"lt": "<",
"gt": ">",
"amp": "&",
"quot": '"',
"apos": "'",
};
var reverseEntityTable = {
"<": "&lt;",
">": "&gt;",
"&": "&amp;",
'"': "&quot;",
"'": "&apos;",
};
function encodeHTML(s) {
return s.replace(/[&<>'"]/g, function(x) {
return reverseEntityTable[x];
});
}
function decodeHTML(str) {
return str.replace(/&(quot|amp|apos|lt|gt);/g, function(match, tag) {
return entityTable[tag];
}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(match, hex, numStr) {
var num = parseInt(hex || numStr, hex ? 16 : 10); // read num
return String.fromCharCode(num);
});
}
// When a style is set in JS, map it to the corresponding CSS attribute
var styleMap = {
"alignmentBaseline": "alignment-baseline",
@ -440,7 +473,9 @@
}
return oldNode;
}
}
},
__JSDOMParser__: true,
};
for (var i in nodeTypes) {
@ -449,7 +484,27 @@
var Attribute = function (name, value) {
this.name = name;
this.value = value;
this._value = value;
};
Attribute.prototype = {
get value() {
return this._value;
},
setValue: function(newValue) {
this._value = newValue;
delete this._decodedValue;
},
setDecodedValue: function(newValue) {
this._value = encodeHTML(newValue);
this._decodedValue = newValue;
},
getDecodedValue: function() {
if (typeof this._decodedValue === "undefined") {
this._decodedValue = (this._value && decodeHTML(this._value)) || "";
}
return this._decodedValue;
},
};
var Comment = function () {
@ -472,7 +527,27 @@
nodeName: "#text",
nodeType: Node.TEXT_NODE,
textContent: ""
get textContent() {
if (typeof this._textContent === "undefined") {
this._textContent = decodeHTML(this._innerHTML || "");
}
return this._textContent;
},
get innerHTML() {
if (typeof this._innerHTML === "undefined") {
this._innerHTML = encodeHTML(this._textContent || "");
}
return this._innerHTML;
},
set innerHTML(newHTML) {
this._innerHTML = newHTML;
delete this._textContent;
},
set textContent(newText) {
this._textContent = newText;
delete this._innerHTML;
},
}
var Document = function () {
@ -575,13 +650,15 @@
// serialize attribute list
for (var j = 0; j < child.attributes.length; j++) {
var attr = child.attributes[j];
var quote = (attr.value.indexOf('"') === -1 ? '"' : "'");
arr.push(" " + attr.name + '=' + quote + attr.value + quote);
// the attribute value will be HTML escaped.
var val = attr.value;
var quote = (val.indexOf('"') === -1 ? '"' : "'");
arr.push(" " + attr.name + '=' + quote + val + quote);
}
if (child.localName in voidElems) {
// if this is a self-closing element, end it here
arr.push("/>");
arr.push(">");
} else {
// otherwise, add its children
arr.push(">");
@ -589,7 +666,8 @@
arr.push("</" + child.localName + ">");
}
} else {
arr.push(child.textContent);
// This is a text node, so asking for innerHTML won't recurse.
arr.push(child.innerHTML);
}
}
}
@ -651,7 +729,7 @@
for (var i = this.attributes.length; --i >= 0;) {
var attr = this.attributes[i];
if (attr.name === name)
return attr.value;
return attr.getDecodedValue();
}
return undefined;
},
@ -660,11 +738,11 @@
for (var i = this.attributes.length; --i >= 0;) {
var attr = this.attributes[i];
if (attr.name === name) {
attr.value = value;
attr.setDecodedValue(value);
return;
}
}
this.attributes.push(new Attribute(name, value));
this.attributes.push(new Attribute(name, encodeHTML(value)));
},
removeAttribute: function (name) {
@ -815,9 +893,6 @@
// Read the attribute value (and consume the matching quote)
var value = this.readString(c);
if (!value)
return;
node.attributes.push(new Attribute(name, value));
return;
@ -947,10 +1022,10 @@
// looking for the same < all the time.
this.currentChar--;
if (n === -1) {
textNode.textContent += this.html.substring(this.currentChar, this.html.length);
textNode.innerHTML += this.html.substring(this.currentChar, this.html.length);
this.currentChar = this.html.length;
} else {
textNode.textContent += this.html.substring(this.currentChar, n);
textNode.innerHTML += this.html.substring(this.currentChar, n);
this.currentChar = n;
}
if (!haveTextNode)
@ -993,10 +1068,10 @@
var node = new Text();
var n = this.html.indexOf("<", this.currentChar);
if (n === -1) {
node.textContent = this.html.substring(this.currentChar, this.html.length);
node.innerHTML = this.html.substring(this.currentChar, this.html.length);
this.currentChar = this.html.length;
} else {
node.textContent = this.html.substring(this.currentChar, n);
node.innerHTML = this.html.substring(this.currentChar, n);
this.currentChar = n;
}
return node;

@ -31,8 +31,13 @@ describe("Test JSDOM functionality", function() {
nodeExpect(baseDoc.body.parentNode, baseDoc.documentElement);
expect(baseDoc.body.childNodes.length).eql(3);
var generatedHTML = "<html>" + baseDoc.documentElement.innerHTML + "</html>";
expect(generatedHTML).eql(BASETESTCASE);
var generatedHTML = baseDoc.getElementsByTagName("p")[0].innerHTML;
expect(generatedHTML).eql('Some text and <a class="someclass" href="#">a link</a>');
var scriptNode = baseDoc.getElementsByTagName("script")[0];
generatedHTML = scriptNode.innerHTML;
expect(generatedHTML).eql('With < fancy " characters in it because');
expect(scriptNode.textContent).eql('With < fancy " characters in it because');
});
it("should deal with script tags", function() {
@ -192,6 +197,38 @@ describe("Test JSDOM functionality", function() {
});
});
describe("Test HTML escaping", function() {
var baseStr = "<p>Hello, everyone &amp; all their friends, &lt;this&gt; is a &quot; test with &apos; quotes.</p>";
var doc = new JSDOMParser().parse(baseStr);
var p = doc.getElementsByTagName("p")[0];
var txtNode = p.firstChild;
it("should handle encoding HTML correctly", function() {
// This /should/ just be cached straight from reading it:
expect("<p>" + p.innerHTML + "</p>").eql(baseStr);
expect("<p>" + txtNode.innerHTML + "</p>").eql(baseStr);
});
it("should have decoded correctly", function() {
expect(p.textContent).eql("Hello, everyone & all their friends, <this> is a \" test with ' quotes.");
expect(txtNode.textContent).eql("Hello, everyone & all their friends, <this> is a \" test with ' quotes.");
});
it("should handle updates via textContent correctly", function() {
// Because the initial tests might be based on cached innerHTML values,
// let's manipulate via textContent in order to test that it alters
// the innerHTML correctly.
txtNode.textContent = txtNode.textContent + " ";
expect("<p>" + txtNode.innerHTML + "</p>").eql(baseStr.replace("</p>", " </p>"));
expect("<p>" + p.innerHTML + "</p>").eql(baseStr.replace("</p>", " </p>"));
});
it("should handle decimal and hex escape sequences", function() {
var doc = new JSDOMParser().parse("<p>&#32;&#x20;</p>");
expect(doc.getElementsByTagName("p")[0].textContent).eql(" ");
});
});
describe("Script parsing", function() {
it("should strip ?-based comments within script tags", function() {

Loading…
Cancel
Save