diff --git a/lib/htmlparser.js b/lib/htmlparser.js
index c56928b..a43b168 100644
--- a/lib/htmlparser.js
+++ b/lib/htmlparser.js
@@ -686,8 +686,11 @@ function DefaultHandler (callback, options) {
var pos = this._tagStack.length - 1;
while (pos > -1 && this._tagStack[pos--].name != baseName) { }
if (pos > -1 || this._tagStack[0].name == baseName)
- while (pos < this._tagStack.length - 1)
- this._tagStack.pop();
+ var elem;
+ while (pos < this._tagStack.length - 1) {
+ elem = this._tagStack.pop();
+ }
+ elem.closed = true;
}
}
else { //This is not a closing tag
@@ -799,6 +802,67 @@ function DefaultHandler (callback, options) {
, getElementsByTagType: function DomUtils$getElementsByTagType (type, currentElement, recurse, limit) {
return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit));
}
+
+ , printHtml: function DomUtils$print(nodes, output, encoding) {
+ for (var idx in nodes) {
+ var node = nodes[idx];
+ var text = node.data;
+ if (text === null || text === undefined) {
+ // need to reconstruct from name and attributes
+ text = node.name;
+ for (var attrName in node.attribs) {
+ var attrVal = node.attribs[attrName];
+ text += ' ' + attrName;
+ if (attrVal !== null && attrVal !== undefined) {
+ if (attrVal.match(/[^\\]"/)) {
+ text += "='" + attrVal + "'";
+ }
+ else {
+ text += '="' + attrVal + '"';
+ }
+ }
+ }
+ }
+ if (node.type == 'directive') {
+ output.write("<" + text + ">", encoding);
+ }
+ else if (node.type == 'comment') {
+ output.write("", encoding);
+ }
+ else if (node.type == 'tag' || node.type == 'script' || node.type == 'style') {
+ output.write("<" + text, encoding);
+ }
+ else {
+ output.write(text, encoding);
+ }
+
+ if (node.type == 'tag' || node.type == 'script' || node.type == 'style') {
+ output.write(">", encoding);
+ }
+ if (node.children) {
+ DomUtils.printHtml(node.children, output);
+ if ((node.closed && node.type == 'tag') || node.type == 'script' || node.type == 'style') {
+ output.write("" + node.name + ">", encoding);
+ }
+ }
+ else {
+ if (((node.closed && node.type == 'tag') || node.type == 'script' || node.type == 'style') && !text.match(/\/\s*$/)) {
+ output.write("" + node.name + ">", encoding);
+ }
+ }
+ }
+ }
+
+ , toHtml: function DomUtils$toString(nodes) {
+ var result = "";
+ DomUtils.printHtml(nodes, {
+ write: function(text, encoding) {
+ result += text;
+ }},
+ "utf-8"
+ );
+ return result;
+ }
}
function inherits (ctor, superCtor) {
diff --git a/runtests.js b/runtests.js
index e906fe4..c33ef31 100644
--- a/runtests.js
+++ b/runtests.js
@@ -22,6 +22,7 @@ IN THE SOFTWARE.
var sys = require("sys");
var fs = require("fs");
var htmlparser = require("./lib/htmlparser");
+var domutils = require("./lib/htmlparser").DomUtils;
var testFolder = "./tests";
var chunkSize = 5;
@@ -58,17 +59,33 @@ for (var i in testFiles) {
var testResult =
sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
&&
- sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null)
- ;
+ sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null);
+ if (test.type != 'rss') {
+ testResult = testResult
+ &&
+ domutils.toHtml(resultComplete) == (test.expectedHtml ? test.expectedHtml : test.html)
+ &&
+ domutils.toHtml(resultChunk) == (test.expectedHtml ? test.expectedHtml : test.html)
+ ;
+ }
sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED"));
if (!testResult) {
failedCount++;
sys.puts("== Complete ==");
sys.puts(sys.inspect(resultComplete, false, null));
+ if (test.type != 'rss') {
+ sys.puts(domutils.toHtml(resultComplete));
+ }
sys.puts("== Chunked ==");
sys.puts(sys.inspect(resultChunk, false, null));
+ if (test.type != 'rss') {
+ sys.puts(domutils.toHtml(resultChunk));
+ }
sys.puts("== Expected ==");
sys.puts(sys.inspect(test.expected, false, null));
+ if (test.type != 'rss') {
+ sys.puts(test.expectedHtml ? test.expectedHtml : test.html);
+ }
}
}
sys.puts("Total tests: " + testCount);
diff --git a/tests/01-basic.js b/tests/01-basic.js
index 7846898..d2ab41d 100644
--- a/tests/01-basic.js
+++ b/tests/01-basic.js
@@ -42,6 +42,7 @@ exports.expected =
, type: 'tag'
, name: 'title'
, children: [ { raw: 'The Title', data: 'The Title', type: 'text' } ]
+ , closed: true
}
, { raw: 'body'
, data: 'body'
@@ -53,8 +54,10 @@ exports.expected =
, type: 'text'
}
]
+ , closed: true
}
]
+ , closed: true
}
];
diff --git a/tests/02-single_tag_1.js b/tests/02-single_tag_1.js
index 1735b5e..fe2b712 100644
--- a/tests/02-single_tag_1.js
+++ b/tests/02-single_tag_1.js
@@ -35,5 +35,5 @@ exports.expected =
[ { raw: 'br', data: 'br', type: 'tag', name: 'br' }
, { raw: 'text', data: 'text', type: 'text' }
];
-
+exports.expectedHtml = "
text";
})();
diff --git a/tests/04-unescaped_in_script.js b/tests/04-unescaped_in_script.js
index fb2cc3a..379e57a 100644
--- a/tests/04-unescaped_in_script.js
+++ b/tests/04-unescaped_in_script.js
@@ -48,8 +48,10 @@ exports.expected =
, type: 'text'
}
]
+ , closed: true
}
]
+ , closed: true
}
];
diff --git a/tests/05-tags_in_comment.js b/tests/05-tags_in_comment.js
index 68a0779..818fda6 100644
--- a/tests/05-tags_in_comment.js
+++ b/tests/05-tags_in_comment.js
@@ -42,6 +42,7 @@ exports.expected =
, type: 'comment'
}
]
+ , closed: true
}
];
diff --git a/tests/06-comment_in_script.js b/tests/06-comment_in_script.js
index 2d04ec0..ffefb22 100644
--- a/tests/06-comment_in_script.js
+++ b/tests/06-comment_in_script.js
@@ -42,6 +42,7 @@ exports.expected =
, type: 'comment'
}
]
+ , closed: true
}
];
diff --git a/tests/07-unescaped_in_style.js b/tests/07-unescaped_in_style.js
index 563a64a..5a74341 100644
--- a/tests/07-unescaped_in_style.js
+++ b/tests/07-unescaped_in_style.js
@@ -43,6 +43,7 @@ exports.expected =
, type: 'text'
}
]
+ , closed: true
}
];
diff --git a/tests/08-extra_spaces_in_tag.js b/tests/08-extra_spaces_in_tag.js
index 1767565..1cbea6e 100644
--- a/tests/08-extra_spaces_in_tag.js
+++ b/tests/08-extra_spaces_in_tag.js
@@ -43,7 +43,9 @@ exports.expected =
, type: 'text'
}
]
+ , closed: true
}
];
+exports.expectedHtml = "the text";
})();
diff --git a/tests/09-unquoted_attrib.js b/tests/09-unquoted_attrib.js
index da6bac7..5f6e75d 100644
--- a/tests/09-unquoted_attrib.js
+++ b/tests/09-unquoted_attrib.js
@@ -43,6 +43,7 @@ exports.expected =
, type: 'text'
}
]
+ , closed: true
}
];
diff --git a/tests/14-comment_in_text_in_script.js b/tests/14-comment_in_text_in_script.js
index 215a02e..bf4707e 100644
--- a/tests/14-comment_in_text_in_script.js
+++ b/tests/14-comment_in_text_in_script.js
@@ -49,8 +49,8 @@ exports.expected =
, data: ' the text'
, type: 'text'
}
-
]
+ , closed: true
}
];
diff --git a/tests/15-non-verbose.js b/tests/15-non-verbose.js
index 829fce4..9cf3e6c 100644
--- a/tests/15-non-verbose.js
+++ b/tests/15-non-verbose.js
@@ -40,7 +40,9 @@ exports.expected =
, type: 'text'
}
]
+ , closed: true
}
];
+exports.expectedHtml = 'the text';
})();
diff --git a/tests/16-ignore_whitespace.js b/tests/16-ignore_whitespace.js
index 68f4439..2d8c185 100644
--- a/tests/16-ignore_whitespace.js
+++ b/tests/16-ignore_whitespace.js
@@ -65,7 +65,9 @@ exports.expected =
, type: 'text'
}
]
+ , closed: true
}
];
+exports.expectedHtml = "Line one\n
\nline two
x ";
})();
diff --git a/tests/17-xml_namespace.js b/tests/17-xml_namespace.js
index 562f26b..137cd50 100644
--- a/tests/17-xml_namespace.js
+++ b/tests/17-xml_namespace.js
@@ -32,7 +32,7 @@ exports.options = {
};
exports.html = "text";
exports.expected =
- [ { raw: 'ns:tag', data: 'ns:tag', type: 'tag', name: 'ns:tag', children: [ { raw: 'text', data: 'text', type: 'text' } ] }
+ [ { raw: 'ns:tag', data: 'ns:tag', type: 'tag', name: 'ns:tag', children: [ { raw: 'text', data: 'text', type: 'text' } ], closed: true }
];
})();
diff --git a/tests/18-enforce_empty_tags.js b/tests/18-enforce_empty_tags.js
index 3ea3757..0c183b8 100644
--- a/tests/18-enforce_empty_tags.js
+++ b/tests/18-enforce_empty_tags.js
@@ -36,5 +36,6 @@ exports.expected =
{ raw: 'link', data: 'link', type: 'tag', name: 'link' }
, { raw: 'text', data: 'text', type: 'text' }
];
+exports.expectedHtml = "text";
})();
diff --git a/tests/19-ignore_empty_tags.js b/tests/19-ignore_empty_tags.js
index 4f47a59..e8bfdde 100644
--- a/tests/19-ignore_empty_tags.js
+++ b/tests/19-ignore_empty_tags.js
@@ -35,7 +35,7 @@ exports.expected =
[
{ raw: 'link', data: 'link', type: 'tag', name: 'link', children: [
{ raw: 'text', data: 'text', type: 'text' }
- ] }
+ ], closed: true }
];
})();
diff --git a/tests/22-position_data.js b/tests/22-position_data.js
index fcd7c90..2400995 100644
--- a/tests/22-position_data.js
+++ b/tests/22-position_data.js
@@ -66,7 +66,8 @@ exports.expected = [
line: 3,
col: 9
}
- }]
+ }],
+ closed: true
}, {
raw: 'body',
data: 'body',
@@ -84,7 +85,8 @@ exports.expected = [
line: 3,
col: 32
}
- }]
+ }],
+ closed: true
}, {
raw: '\n\n',
data: '\n\n',
@@ -93,7 +95,8 @@ exports.expected = [
line: 6,
col: 8
}
- }]
+ }],
+ closed: true
}
];