diff --git a/lib/htmlparser.js b/lib/htmlparser.js index c56928b..a43b168 100644 --- a/lib/htmlparser.js +++ b/lib/htmlparser.js @@ -686,8 +686,11 @@ function DefaultHandler (callback, options) { var pos = this._tagStack.length - 1; while (pos > -1 && this._tagStack[pos--].name != baseName) { } if (pos > -1 || this._tagStack[0].name == baseName) - while (pos < this._tagStack.length - 1) - this._tagStack.pop(); + var elem; + while (pos < this._tagStack.length - 1) { + elem = this._tagStack.pop(); + } + elem.closed = true; } } else { //This is not a closing tag @@ -799,6 +802,67 @@ function DefaultHandler (callback, options) { , getElementsByTagType: function DomUtils$getElementsByTagType (type, currentElement, recurse, limit) { return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit)); } + + , printHtml: function DomUtils$print(nodes, output, encoding) { + for (var idx in nodes) { + var node = nodes[idx]; + var text = node.data; + if (text === null || text === undefined) { + // need to reconstruct from name and attributes + text = node.name; + for (var attrName in node.attribs) { + var attrVal = node.attribs[attrName]; + text += ' ' + attrName; + if (attrVal !== null && attrVal !== undefined) { + if (attrVal.match(/[^\\]"/)) { + text += "='" + attrVal + "'"; + } + else { + text += '="' + attrVal + '"'; + } + } + } + } + if (node.type == 'directive') { + output.write("<" + text + ">", encoding); + } + else if (node.type == 'comment') { + output.write("", encoding); + } + else if (node.type == 'tag' || node.type == 'script' || node.type == 'style') { + output.write("<" + text, encoding); + } + else { + output.write(text, encoding); + } + + if (node.type == 'tag' || node.type == 'script' || node.type == 'style') { + output.write(">", encoding); + } + if (node.children) { + DomUtils.printHtml(node.children, output); + if ((node.closed && node.type == 'tag') || node.type == 'script' || node.type == 'style') { + output.write("", encoding); + } + } + else { + if (((node.closed && node.type == 'tag') || node.type == 'script' || node.type == 'style') && !text.match(/\/\s*$/)) { + output.write("", encoding); + } + } + } + } + + , toHtml: function DomUtils$toString(nodes) { + var result = ""; + DomUtils.printHtml(nodes, { + write: function(text, encoding) { + result += text; + }}, + "utf-8" + ); + return result; + } } function inherits (ctor, superCtor) { diff --git a/runtests.js b/runtests.js index e906fe4..c33ef31 100644 --- a/runtests.js +++ b/runtests.js @@ -22,6 +22,7 @@ IN THE SOFTWARE. var sys = require("sys"); var fs = require("fs"); var htmlparser = require("./lib/htmlparser"); +var domutils = require("./lib/htmlparser").DomUtils; var testFolder = "./tests"; var chunkSize = 5; @@ -58,17 +59,33 @@ for (var i in testFiles) { var testResult = sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null) && - sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null) - ; + sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null); + if (test.type != 'rss') { + testResult = testResult + && + domutils.toHtml(resultComplete) == (test.expectedHtml ? test.expectedHtml : test.html) + && + domutils.toHtml(resultChunk) == (test.expectedHtml ? test.expectedHtml : test.html) + ; + } sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED")); if (!testResult) { failedCount++; sys.puts("== Complete =="); sys.puts(sys.inspect(resultComplete, false, null)); + if (test.type != 'rss') { + sys.puts(domutils.toHtml(resultComplete)); + } sys.puts("== Chunked =="); sys.puts(sys.inspect(resultChunk, false, null)); + if (test.type != 'rss') { + sys.puts(domutils.toHtml(resultChunk)); + } sys.puts("== Expected =="); sys.puts(sys.inspect(test.expected, false, null)); + if (test.type != 'rss') { + sys.puts(test.expectedHtml ? test.expectedHtml : test.html); + } } } sys.puts("Total tests: " + testCount); diff --git a/tests/01-basic.js b/tests/01-basic.js index 7846898..d2ab41d 100644 --- a/tests/01-basic.js +++ b/tests/01-basic.js @@ -42,6 +42,7 @@ exports.expected = , type: 'tag' , name: 'title' , children: [ { raw: 'The Title', data: 'The Title', type: 'text' } ] + , closed: true } , { raw: 'body' , data: 'body' @@ -53,8 +54,10 @@ exports.expected = , type: 'text' } ] + , closed: true } ] + , closed: true } ]; diff --git a/tests/02-single_tag_1.js b/tests/02-single_tag_1.js index 1735b5e..fe2b712 100644 --- a/tests/02-single_tag_1.js +++ b/tests/02-single_tag_1.js @@ -35,5 +35,5 @@ exports.expected = [ { raw: 'br', data: 'br', type: 'tag', name: 'br' } , { raw: 'text', data: 'text', type: 'text' } ]; - +exports.expectedHtml = "
text"; })(); diff --git a/tests/04-unescaped_in_script.js b/tests/04-unescaped_in_script.js index fb2cc3a..379e57a 100644 --- a/tests/04-unescaped_in_script.js +++ b/tests/04-unescaped_in_script.js @@ -48,8 +48,10 @@ exports.expected = , type: 'text' } ] + , closed: true } ] + , closed: true } ]; diff --git a/tests/05-tags_in_comment.js b/tests/05-tags_in_comment.js index 68a0779..818fda6 100644 --- a/tests/05-tags_in_comment.js +++ b/tests/05-tags_in_comment.js @@ -42,6 +42,7 @@ exports.expected = , type: 'comment' } ] + , closed: true } ]; diff --git a/tests/06-comment_in_script.js b/tests/06-comment_in_script.js index 2d04ec0..ffefb22 100644 --- a/tests/06-comment_in_script.js +++ b/tests/06-comment_in_script.js @@ -42,6 +42,7 @@ exports.expected = , type: 'comment' } ] + , closed: true } ]; diff --git a/tests/07-unescaped_in_style.js b/tests/07-unescaped_in_style.js index 563a64a..5a74341 100644 --- a/tests/07-unescaped_in_style.js +++ b/tests/07-unescaped_in_style.js @@ -43,6 +43,7 @@ exports.expected = , type: 'text' } ] + , closed: true } ]; diff --git a/tests/08-extra_spaces_in_tag.js b/tests/08-extra_spaces_in_tag.js index 1767565..1cbea6e 100644 --- a/tests/08-extra_spaces_in_tag.js +++ b/tests/08-extra_spaces_in_tag.js @@ -43,7 +43,9 @@ exports.expected = , type: 'text' } ] + , closed: true } ]; +exports.expectedHtml = "the text"; })(); diff --git a/tests/09-unquoted_attrib.js b/tests/09-unquoted_attrib.js index da6bac7..5f6e75d 100644 --- a/tests/09-unquoted_attrib.js +++ b/tests/09-unquoted_attrib.js @@ -43,6 +43,7 @@ exports.expected = , type: 'text' } ] + , closed: true } ]; diff --git a/tests/14-comment_in_text_in_script.js b/tests/14-comment_in_text_in_script.js index 215a02e..bf4707e 100644 --- a/tests/14-comment_in_text_in_script.js +++ b/tests/14-comment_in_text_in_script.js @@ -49,8 +49,8 @@ exports.expected = , data: ' the text' , type: 'text' } - ] + , closed: true } ]; diff --git a/tests/15-non-verbose.js b/tests/15-non-verbose.js index 829fce4..9cf3e6c 100644 --- a/tests/15-non-verbose.js +++ b/tests/15-non-verbose.js @@ -40,7 +40,9 @@ exports.expected = , type: 'text' } ] + , closed: true } ]; +exports.expectedHtml = 'the text'; })(); diff --git a/tests/16-ignore_whitespace.js b/tests/16-ignore_whitespace.js index 68f4439..2d8c185 100644 --- a/tests/16-ignore_whitespace.js +++ b/tests/16-ignore_whitespace.js @@ -65,7 +65,9 @@ exports.expected = , type: 'text' } ] + , closed: true } ]; +exports.expectedHtml = "Line one\n

\nline two
x
"; })(); diff --git a/tests/17-xml_namespace.js b/tests/17-xml_namespace.js index 562f26b..137cd50 100644 --- a/tests/17-xml_namespace.js +++ b/tests/17-xml_namespace.js @@ -32,7 +32,7 @@ exports.options = { }; exports.html = "text"; exports.expected = - [ { raw: 'ns:tag', data: 'ns:tag', type: 'tag', name: 'ns:tag', children: [ { raw: 'text', data: 'text', type: 'text' } ] } + [ { raw: 'ns:tag', data: 'ns:tag', type: 'tag', name: 'ns:tag', children: [ { raw: 'text', data: 'text', type: 'text' } ], closed: true } ]; })(); diff --git a/tests/18-enforce_empty_tags.js b/tests/18-enforce_empty_tags.js index 3ea3757..0c183b8 100644 --- a/tests/18-enforce_empty_tags.js +++ b/tests/18-enforce_empty_tags.js @@ -36,5 +36,6 @@ exports.expected = { raw: 'link', data: 'link', type: 'tag', name: 'link' } , { raw: 'text', data: 'text', type: 'text' } ]; +exports.expectedHtml = "text"; })(); diff --git a/tests/19-ignore_empty_tags.js b/tests/19-ignore_empty_tags.js index 4f47a59..e8bfdde 100644 --- a/tests/19-ignore_empty_tags.js +++ b/tests/19-ignore_empty_tags.js @@ -35,7 +35,7 @@ exports.expected = [ { raw: 'link', data: 'link', type: 'tag', name: 'link', children: [ { raw: 'text', data: 'text', type: 'text' } - ] } + ], closed: true } ]; })(); diff --git a/tests/22-position_data.js b/tests/22-position_data.js index fcd7c90..2400995 100644 --- a/tests/22-position_data.js +++ b/tests/22-position_data.js @@ -66,7 +66,8 @@ exports.expected = [ line: 3, col: 9 } - }] + }], + closed: true }, { raw: 'body', data: 'body', @@ -84,7 +85,8 @@ exports.expected = [ line: 3, col: 32 } - }] + }], + closed: true }, { raw: '\n\n', data: '\n\n', @@ -93,7 +95,8 @@ exports.expected = [ line: 6, col: 8 } - }] + }], + closed: true } ];