diff --git a/lib/feedparser/index.js b/lib/feedparser/index.js index 85a6ae2..abf17e4 100644 --- a/lib/feedparser/index.js +++ b/lib/feedparser/index.js @@ -443,63 +443,37 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { case('link'): case('atom:link'): case('atom10:link'): - if (Array.isArray(el)) { - el.forEach(function (link){ - if (link['@']['href']) { // Atom - if (_.get(link['@'], 'rel')) { - if (link['@']['rel'] == 'alternate') { - if (!meta.link) meta.link = link['@']['href']; - } - else if (link['@']['rel'] == 'self') { - meta.xmlurl = meta.xmlUrl = link['@']['href']; - if (this.xmlbase && this.xmlbase.length === 0) { - this.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl}); - this.stack[0] = _.reresolve(this.stack[0], meta.xmlurl); - } - } - else if (link['@']['rel'] == 'hub' && !(meta.cloud.href || meta.cloud.domain)) { - meta.cloud.type = 'hub'; - meta.cloud.href = link['@']['href']; - } - } else { + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (link){ + if (link['@']['href']) { // Atom + if (_.get(link['@'], 'rel')) { + if (link['@']['rel'] == 'alternate') { if (!meta.link) meta.link = link['@']['href']; } - } else if (Object.keys(link['@']).length === 0) { // RSS - meta.link = _.get(link); - } - if (meta.link && this.xmlbase && this.xmlbase.length === 0) { - this.xmlbase.unshift({ '#name': 'xml', '#': meta.link}); - this.stack[0] = _.reresolve(this.stack[0], meta.link); - } - }, this); - } else { - if (el['@']['href']) { // Atom - if (_.get(el['@'], 'rel')) { - if (el['@']['rel'] == 'alternate') { - if (!meta.link) meta.link = el['@']['href']; - } - else if (el['@']['rel'] == 'self') { - meta.xmlurl = meta.xmlUrl = el['@']['href']; + else if (link['@']['rel'] == 'self') { + meta.xmlurl = meta.xmlUrl = link['@']['href']; if (this.xmlbase && this.xmlbase.length === 0) { this.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl}); this.stack[0] = _.reresolve(this.stack[0], meta.xmlurl); } } - else if (el['@']['rel'] == 'hub' && !(meta.cloud.href || meta.cloud.domain)) { + else if (link['@']['rel'] == 'hub' && !(meta.cloud.href || meta.cloud.domain)) { meta.cloud.type = 'hub'; - meta.cloud.href = el['@']['href']; + meta.cloud.href = link['@']['href']; } } else { - meta.link = el['@']['href']; + if (!meta.link) meta.link = link['@']['href']; } - } else if (Object.keys(el['@']).length === 0) { // RSS - if (!meta.link) meta.link = _.get(el); + } else if (Object.keys(link['@']).length === 0) { // RSS + meta.link = _.get(link); } if (meta.link && this.xmlbase && this.xmlbase.length === 0) { this.xmlbase.unshift({ '#name': 'xml', '#': meta.link}); this.stack[0] = _.reresolve(this.stack[0], meta.link); } - } + }, this); break; case('managingeditor'): case('webmaster'): @@ -526,20 +500,14 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { meta.cloud = {}; // This will ensure that rssCloud "wins" here, // If pubsubhubbub is also declared, it's still available // in the link elements - if (Array.isArray(el)) { - Object.keys(el[0]['@']).forEach(function (attr) { - if (_.has(el[0]['@'], attr)) { - meta.cloud[attr] = el[0]['@'][attr]; - } - }); - } - else { - Object.keys(el['@']).forEach(function (attr) { - if (_.has(el['@'], attr)) { - meta.cloud[attr] = el['@'][attr]; - } - }); + if(!Array.isArray(el)) { + el = [el]; } + Object.keys(el[0]['@']).forEach(function (attr) { + if (_.has(el[0]['@'], attr)) { + meta.cloud[attr] = el[0]['@'][attr]; + } + }); meta.cloud.type = 'rsscloud'; break; case('language'): @@ -579,88 +547,50 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { var _category = '' , _categories = [] ; - if (Array.isArray(el)) { - el.forEach(function (category){ - var _categoryValue; - if ('category' == name && 'atom' == type) { - if (category['@'] && (_categoryValue = _.safeTrim(_.get(category['@'], 'term')))) { - meta.categories.push(_categoryValue); - } - } - else if ('category' == name && 'rss' == type){ - if ((_categoryValue = _.safeTrim(_.get(category)))) { - meta.categories.push(_categoryValue); - } - } - else if ('dc:subject' == name && (_categoryValue = _.safeTrim(_.get(category)))) { - _categories = _categoryValue.split(' ').map(function (cat){ return cat.trim(); }); - if (_categories.length) { - meta.categories = meta.categories.concat(_categories); - } - } - else if ('itunes:category' == name) { - if (category['@'] && _.safeTrim(_.get(category['@'], 'text'))) _category = _.safeTrim(_.get(category['@'], 'text')); - if (category[name]) { - if (Array.isArray(category[name])) { - category[name].forEach(function (subcategory){ - var _subcategoryValue; - if (subcategory['@'] && (_subcategoryValue = _.safeTrim(_.get(subcategory['@'], 'text')))) { - meta.categories.push(_category + '/' + _subcategoryValue); - } - }); - } - else if (category[name]['@'] && (_categoryValue = _.safeTrim(_.get(category[name]['@'], 'text')))) { - meta.categories.push(_category + '/' + _categoryValue); - } - } - else if (_category) { - meta.categories.push(_category); - } - } - else if ('media:category' == name && (_categoryValue = _.safeTrim(_.get(category)))) { - meta.categories.push(_categoryValue); - } - }); - } else { + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (category){ + var _categoryValue; if ('category' == name && 'atom' == type) { - if ((_category = _.safeTrim(_.get(el['@'], 'term')))) { - meta.categories.push(_category); + if (category['@'] && (_categoryValue = _.safeTrim(_.get(category['@'], 'term')))) { + meta.categories.push(_categoryValue); } } - else if ('category' == name && 'rss' == type) { - if ((_category = _.safeTrim(_.get(el)))) { - meta.categories.push(_category); + else if ('category' == name && 'rss' == type){ + if ((_categoryValue = _.safeTrim(_.get(category)))) { + meta.categories.push(_categoryValue); } } - else if ('dc:subject' == name && (_category = _.safeTrim(_.get(el)))) { - _categories = _category.split(' ').map(function (cat){ return cat.trim(); }); + else if ('dc:subject' == name && (_categoryValue = _.safeTrim(_.get(category)))) { + _categories = _categoryValue.split(' ').map(function (cat){ return cat.trim(); }); if (_categories.length) { meta.categories = meta.categories.concat(_categories); } } else if ('itunes:category' == name) { - if (el['@'] && _.safeTrim(_.get(el['@'], 'text'))) _category = _.safeTrim(_.get(el['@'], 'text')); - if (el[name]) { - if (Array.isArray(el[name])) { - el[name].forEach(function (subcategory){ + if (category['@'] && _.safeTrim(_.get(category['@'], 'text'))) _category = _.safeTrim(_.get(category['@'], 'text')); + if (category[name]) { + if (Array.isArray(category[name])) { + category[name].forEach(function (subcategory){ var _subcategoryValue; if (subcategory['@'] && (_subcategoryValue = _.safeTrim(_.get(subcategory['@'], 'text')))) { meta.categories.push(_category + '/' + _subcategoryValue); } }); } - else if (el[name]['@'] && (_category = _.safeTrim(_.get(el[name]['@'], 'text')))) { - meta.categories.push(_category + '/' + _category); + else if (category[name]['@'] && (_categoryValue = _.safeTrim(_.get(category[name]['@'], 'text')))) { + meta.categories.push(_category + '/' + _categoryValue); } } else if (_category) { meta.categories.push(_category); } } - else if ('media:category' == name && (_category = _.safeTrim(_.get(el)))) { - meta.categories.push(_.get(el)); + else if ('media:category' == name && (_categoryValue = _.safeTrim(_.get(category)))) { + meta.categories.push(_categoryValue); } - } + }); break; } // switch end } @@ -783,53 +713,31 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ item.date = date; break; case('link'): - if (Array.isArray(el)) { - el.forEach(function (link){ - if (link['@']['href']) { // Atom - if (_.get(link['@'], 'rel')) { - if (link['@']['rel'] == 'canonical') item.origlink = link['@']['href']; - if (link['@']['rel'] == 'alternate' && (!link['@']['type'] || link['@']['type'] == 'text/html') && !item.link) item.link = link['@']['href']; - if (link['@']['rel'] == 'self' && (!link['@']['type'] || link['@']['type'] == 'text/html') && !item.link) item.link = link['@']['href']; - if (link['@']['rel'] == 'replies') item.comments = link['@']['href']; - if (link['@']['rel'] == 'enclosure') { - enclosure = {}; - enclosure.url = link['@']['href']; - enclosure.type = _.get(link['@'], 'type'); - enclosure.length = _.get(link['@'], 'length'); - if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) { - item.enclosures.push(enclosure); - } - } - } else { - item.link = link['@']['href']; - } - } else if (Object.keys(link['@']).length === 0) { // RSS - if (!item.link) item.link = _.get(link); - } - }); - } else { - if (el['@']['href']) { // Atom - if (_.get(el['@'], 'rel')) { - if (el['@']['rel'] == 'canonical') item.origlink = el['@']['href']; - if (el['@']['rel'] == 'alternate' && (!el['@']['type'] || el['@']['type'] == 'text/html') && !item.link) item.link = el['@']['href']; - if (el['@']['rel'] == 'self' && (!el['@']['type'] || el['@']['type'] == 'text/html') && !item.link) item.link = el['@']['href']; - if (el['@']['rel'] == 'replies') item.comments = el['@']['href']; - if (el['@']['rel'] == 'enclosure') { + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (link){ + if (link['@']['href']) { // Atom + if (_.get(link['@'], 'rel')) { + if (link['@']['rel'] == 'canonical') item.origlink = link['@']['href']; + if (link['@']['rel'] == 'alternate' && (!link['@']['type'] || link['@']['type'] == 'text/html') && !item.link) item.link = link['@']['href']; + if (link['@']['rel'] == 'self' && (!link['@']['type'] || link['@']['type'] == 'text/html') && !item.link) item.link = link['@']['href']; + if (link['@']['rel'] == 'replies') item.comments = link['@']['href']; + if (link['@']['rel'] == 'enclosure') { enclosure = {}; - enclosure.url = el['@']['href']; - enclosure.type = _.get(el['@'], 'type'); - enclosure.length = _.get(el['@'], 'length'); + enclosure.url = link['@']['href']; + enclosure.type = _.get(link['@'], 'type'); + enclosure.length = _.get(link['@'], 'length'); if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) { item.enclosures.push(enclosure); } } - } else { - item.link = el['@']['href']; } - } else if (Object.keys(el['@']).length === 0) { // RSS - if (!item.link) item.link = _.get(el); + if (!item.link) item.link = link['@']['href']; + } else if (Object.keys(link['@']).length === 0) { // RSS + if (!item.link) item.link = _.get(link); } - } + }); if (!item.guid) item.guid = item.link; break; case('guid'): @@ -889,69 +797,44 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ } break; case('enclosure'): - if (Array.isArray(el)) { - el.forEach(function (enc){ - enclosure = {}; - enclosure.url = _.get(enc['@'], 'url'); - enclosure.type = _.get(enc['@'], 'type'); - enclosure.length = _.get(enc['@'], 'length'); - if (~indexOfObject(item.enclosures, enclosure, ['url', 'type'])) { - item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure); - } else { - item.enclosures.push(enclosure); - } - }); - } else { + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (enc){ enclosure = {}; - enclosure.url = _.get(el['@'], 'url'); - enclosure.type = _.get(el['@'], 'type'); - enclosure.length = _.get(el['@'], 'length'); + enclosure.url = _.get(enc['@'], 'url'); + enclosure.type = _.get(enc['@'], 'type'); + enclosure.length = _.get(enc['@'], 'length'); if (~indexOfObject(item.enclosures, enclosure, ['url', 'type'])) { item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure); } else { item.enclosures.push(enclosure); } - } + }); break; case('media:content'): var optionalAttributes = ['bitrate', 'framerate', 'samplingrate', 'duration', 'height', 'width']; - if (Array.isArray(el)) { - el.forEach(function (enc){ - enclosure = {}; - enclosure.url = _.get(enc['@'], 'url'); - enclosure.type = _.get(enc['@'], 'type') || _.get(enc['@'], 'medium'); - enclosure.length = _.get(enc['@'], 'filesize'); - var index = indexOfObject(item.enclosures, enclosure, ['url', 'type']); - if (index !== -1) { - enclosure = item.enclosures[index]; - } - optionalAttributes.forEach(function (attribute) { - if (!enclosure[attribute] && _.get(enc['@'], attribute)) { - enclosure[attribute] = _.get(enc['@'], attribute); - } - }); - if (index === -1) { - item.enclosures.push(enclosure); - } - }); - } else { + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (enc){ enclosure = {}; - enclosure.url = _.get(el['@'], 'url'); - enclosure.type = _.get(el['@'], 'type') || _.get(el['@'], 'medium'); - enclosure.length = _.get(el['@'], 'filesize'); + enclosure.url = _.get(enc['@'], 'url'); + enclosure.type = _.get(enc['@'], 'type') || _.get(enc['@'], 'medium'); + enclosure.length = _.get(enc['@'], 'filesize'); var index = indexOfObject(item.enclosures, enclosure, ['url', 'type']); if (index !== -1) { enclosure = item.enclosures[index]; } optionalAttributes.forEach(function (attribute) { - if (!enclosure[attribute] && _.get(el['@'], attribute)) { - enclosure[attribute] = _.get(el['@'], attribute); + if (!enclosure[attribute] && _.get(enc['@'], attribute)) { + enclosure[attribute] = _.get(enc['@'], attribute); } }); if (index === -1) { item.enclosures.push(enclosure); } - } + }); break; case('enc:enclosure'): // Can't find this in use for an example to debug. Only example found does not comply with the spec -- can't code THAT! break; @@ -965,59 +848,35 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ var _category = '' , _categories = [] ; - if (Array.isArray(el)) { - el.forEach(function (category){ - if ('category' == name && 'atom' == type) { - if (category['@'] && _.get(category['@'], 'term')) item.categories.push(_.get(category['@'], 'term')); - } else if ('category' == name && _.get(category) && 'rss' == type) { - item.categories.push(_.get(category).trim()); - } else if ('dc:subject' == name && _.get(category)) { - _categories = _.get(category).split(' ').map(function (cat){ return cat.trim(); }); - if (_categories.length) item.categories = item.categories.concat(_categories); - } else if ('itunes:category' == name) { - if (category['@'] && _.get(category['@'], 'text')) _category = _.get(category['@'], 'text'); - if (category[name]) { - if (Array.isArray(category[name])) { - category[name].forEach(function (subcategory){ - if (subcategory['@'] && _.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + _.get(subcategory['@'], 'text')); - }); - } else { - if (category[name]['@'] && _.get(category[name]['@'], 'text')) - item.categories.push(_category + '/' + _.get(category[name]['@'], 'text')); - } - } else { - item.categories.push(_category); - } - } else if ('media:category' == name) { - item.categories.push(_.get(category)); - } - }); - } else { + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (category){ if ('category' == name && 'atom' == type) { - if (_.get(el['@'], 'term')) item.categories.push(_.get(el['@'], 'term')); - } else if ('category' == name && _.get(el) && 'rss' == type) { - item.categories.push(_.get(el).trim()); - } else if ('dc:subject' == name && _.get(el)) { - _categories = _.get(el).split(' ').map(function (cat){ return cat.trim(); }); + if (category['@'] && _.get(category['@'], 'term')) item.categories.push(_.get(category['@'], 'term')); + } else if ('category' == name && _.get(category) && 'rss' == type) { + item.categories.push(_.get(category).trim()); + } else if ('dc:subject' == name && _.get(category)) { + _categories = _.get(category).split(' ').map(function (cat){ return cat.trim(); }); if (_categories.length) item.categories = item.categories.concat(_categories); } else if ('itunes:category' == name) { - if (el['@'] && _.get(el['@'], 'text')) _category = _.get(el['@'], 'text'); - if (el[name]) { - if (Array.isArray(el[name])) { - el[name].forEach(function (subcategory){ + if (category['@'] && _.get(category['@'], 'text')) _category = _.get(category['@'], 'text'); + if (category[name]) { + if (Array.isArray(category[name])) { + category[name].forEach(function (subcategory){ if (subcategory['@'] && _.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + _.get(subcategory['@'], 'text')); }); } else { - if (el[name]['@'] && _.get(el[name]['@'], 'text')) - item.categories.push(_category + '/' + _.get(el[name]['@'], 'text')); + if (category[name]['@'] && _.get(category[name]['@'], 'text')) + item.categories.push(_category + '/' + _.get(category[name]['@'], 'text')); } } else { item.categories.push(_category); } } else if ('media:category' == name) { - item.categories.push(_.get(el)); + item.categories.push(_.get(category)); } - } + }); break; case('feedburner:origlink'): case('pheedo:origlink'): diff --git a/lib/feedparser/index.js.bak b/lib/feedparser/index.js.bak new file mode 100644 index 0000000..3ba8c95 --- /dev/null +++ b/lib/feedparser/index.js.bak @@ -0,0 +1,960 @@ +/********************************************************************** + node-feedparser - A robust RSS, Atom, RDF parser for node. + http://github.com/danmactough/node-feedparser + Copyright (c) 2011-2016 Dan MacTough and contributors + http://mact.me + +**********************************************************************/ + +/** + * Module dependencies. + */ +var sax = require('sax') + , addressparser = require('addressparser') + , indexOfObject = require('array-indexofobject') + , util = require('util') + , TransformStream = require('readable-stream').Transform + , _ = require('../utils'); + +/** + * FeedParser constructor. + * + * Exposes a duplex (transform) stream to parse a feed. + * + * Each article/post in the feed will have the following keys: + * - title {String} + * - description {String} + * - summary {String} + * - date {Date} (or null) + * - pubdate {Date} (or null) + * - link {String} + * - origlink {String} + * - author {String} + * - guid {String} + * - comments {String} + * - image {Object} + * - categories {Array} + * - source {Object} + * - enclosures {Array} + * - meta {Object} + * - Object.keys(meta): + * - #ns {Array} key,value pairs of each namespace declared for the feed + * - #type {String} one of 'atom', 'rss', 'rdf' + * - #version {String} + * - title {String} + * - description {String} + * - date {Date} (or null) + * - pubdate {Date} (or null) + * - link {String} i.e., to the website, not the feed + * - xmlurl {String} the canonical URL of the feed, as declared by the feed + * - author {String} + * - language {String} + * - image {Object} + * - favicon {String} + * - copyright {String} + * - generator {String} + * - categories {Array} + * + * @param {Object} options + * @api public + */ +function FeedParser (options) { + if (!(this instanceof FeedParser)) return new FeedParser(options); + TransformStream.call(this); + this._readableState.objectMode = true; + this._readableState.highWaterMark = 16; // max. # of output nodes buffered + + this.init(); + + // Parse options + this.options = _.assign({}, options); + if (!('strict' in this.options)) this.options.strict = false; + if (!('normalize' in this.options)) this.options.normalize = true; + if (!('addmeta' in this.options)) this.options.addmeta = true; + if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true; + if ('MAX_BUFFER_LENGTH' in this.options) { + sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers + } else { + sax.MAX_BUFFER_LENGTH = 16 * 1024 * 1024; // 16M versus the 64K default + } + if (this.options.feedurl) this.xmlbase.unshift({ '#name': 'xml', '#': this.options.feedurl}); + + // See https://github.com/isaacs/sax-js for more info + this.stream = sax.createStream(this.options.strict /* strict mode - no by default */, {lowercase: true, xmlns: true }); + this.stream.on('error', this.handleSaxError.bind(this)); + this.stream.on('processinginstruction', this.handleProcessingInstruction.bind(this)); + this.stream.on('opentag', this.handleOpenTag.bind(this)); + this.stream.on('closetag',this.handleCloseTag.bind(this)); + this.stream.on('text', this.handleText.bind(this)); + this.stream.on('cdata', this.handleText.bind(this)); + this.stream.on('end', this.handleEnd.bind(this)); +} +util.inherits(FeedParser, TransformStream); + +/* + * Initializes the SAX stream + * + * Initializes the class-variables + */ +FeedParser.prototype.init = function (){ + this.meta = { + '#ns': [] + , '@': [] + , '#xml': {} + }; + this._emitted_meta = false; + this.stack = []; + this.xmlbase = []; + this.in_xhtml = false; + this.xhtml = {}; /* Where to store xhtml elements as associative + array with keys: '#' (containing the text) + and '#name' (containing the XML element name) */ + this.errors = []; +}; + +FeedParser.prototype.handleEnd = function (){ + // We made it to the end without throwing, but let's make sure we were actually + // parsing a feed + if (!(this.meta && this.meta['#type'])) { + var e = new Error('Not a feed'); + return this.handleError(e); + } + this.push(null); +}; + +FeedParser.prototype.handleSaxError = function (e) { + this.emit('error', e); + if (this.options.resume_saxerror) { + this.resumeSaxError(); + } +}; + +FeedParser.prototype.resumeSaxError = function () { + if (this.stream._parser) { + this.stream._parser.error = null; + this.stream._parser.resume(); + } +}; + +FeedParser.prototype.handleError = function (e){ + this.emit('error', e); +}; + +// parses the xml declaration, which looks like: +// +FeedParser.prototype.handleProcessingInstruction = function (node) { + if (node.name === 'xml') { + this.meta['#xml'] = node.body.trim().split(/\s+/).reduce(function (map, attr) { + if (attr.indexOf('=') >= 0) { + var parts = attr.split('='); + map[parts[0]] = parts[1] && parts[1].length > 2 && parts[1].match(/^.(.*?).$/)[1]; + } + return map; + }, this.meta['#xml']); + } +}; + +FeedParser.prototype.handleOpenTag = function (node){ + var n = {}; + n['#name'] = node.name; // Avoid namespace collissions later... + n['#prefix'] = node.prefix; // The current ns prefix + n['#local'] = node.local; // The current element name, sans prefix + n['#uri'] = node.uri; // The current ns uri + n['@'] = {}; + n['#'] = ''; + + if (Object.keys(node.attributes).length) { + n['@'] = this.handleAttributes(node.attributes, n['#name']); + } + + if (this.in_xhtml && this.xhtml['#name'] != n['#name']) { // We are in an xhtml node + // This builds the opening tag, e.g.,
+ this.xhtml['#'] += '<'+n['#name']; + Object.keys(n['@']).forEach(function(name){ + this.xhtml['#'] += ' '+ name +'="'+ n['@'][name] + '"'; + }, this); + this.xhtml['#'] += '>'; + } else if ( this.stack.length === 0 && + (n['#name'] === 'rss' || + (n['#local'] === 'rdf' && _.nslookup([n['#uri']], 'rdf')) || + (n['#local'] === 'feed'&& _.nslookup([n['#uri']], 'atom')) ) ) { + Object.keys(n['@']).forEach(function(name) { + var o = {}; + if (name != 'version') { + o[name] = n['@'][name]; + this.meta['@'].push(o); + } + }, this); + switch(n['#local']) { + case 'rss': + this.meta['#type'] = 'rss'; + this.meta['#version'] = n['@']['version']; + break; + case 'rdf': + this.meta['#type'] = 'rdf'; + this.meta['#version'] = n['@']['version'] || '1.0'; + break; + case 'feed': + this.meta['#type'] = 'atom'; + this.meta['#version'] = n['@']['version'] || '1.0'; + break; + } + } + this.stack.unshift(n); +}; + +FeedParser.prototype.handleCloseTag = function (el){ + var node = { '#name' : el + , '#prefix' : '' + , '#local' : '' } + , stdEl + , item + , baseurl + ; + var n = this.stack.shift(); + el = el.split(':'); + + if (el.length > 1 && el[0] === n['#prefix']) { + if (_.nslookup(n['#uri'], 'atom')) { + node['#prefix'] = el[0]; + node['#local'] = el.slice(1).join(':'); + node['#type'] = 'atom'; + } else if (_.nslookup(n['#uri'], 'rdf')) { + node['#prefix'] = el[0]; + node['#local'] = el.slice(1).join(':'); + node['#type'] = 'rdf'; + } else { + node['#prefix'] = _.nsprefix(n['#uri']) || n['#prefix']; + node['#local'] = el.slice(1).join(':'); + } + } else { + node['#local'] = node['#name']; + node['#type'] = _.nsprefix(n['#uri']) || n['#prefix']; + } + delete n['#name']; + delete n['#local']; + delete n['#prefix']; + delete n['#uri']; + + if (this.xmlbase && this.xmlbase.length) { + baseurl = this.xmlbase[0]['#']; + } + + if (baseurl && (node['#local'] === 'logo' || node['#local'] === 'icon') && node['#type'] === 'atom') { + // Apply xml:base to these elements as they appear + // rather than leaving it to the ultimate parser + n['#'] = _.resolve(baseurl, n['#']); + } + + if (this.xmlbase.length && (el == this.xmlbase[0]['#name'])) { + void this.xmlbase.shift(); + } + + if (this.in_xhtml) { + if (node['#name'] == this.xhtml['#name']) { // The end of the XHTML + + // Add xhtml data to the container element + n['#'] += this.xhtml['#'].trim(); + // Clear xhtml nodes from the tree + for (var key in n) { + if (key != '@' && key != '#') { + delete n[key]; + } + } + this.xhtml = {}; + this.in_xhtml = false; + } else { // Somewhere in the middle of the XHTML + this.xhtml['#'] += ''; + } + } + + if ('#' in n) { + if (n['#'].match(/^\s*$/)) { + // Delete text nodes with nothing by whitespace + delete n['#']; + } else { + n['#'] = n['#'].trim(); + if (Object.keys(n).length === 1) { + // If there is only one text node, hoist it + n = n['#']; + } + } + } + + if (node['#name'] === 'item' || + node['#name'] === 'entry' || + (node['#local'] === 'item' && (node['#prefix'] === '' || node['#type'] === 'rdf')) || + (node['#local'] == 'entry' && (node['#prefix'] === '' || node['#type'] === 'atom'))) { // We have an article! + + if (!this.meta.title) { // We haven't yet parsed all the metadata + _.assign(this.meta, this.handleMeta(this.stack[0], this.meta['#type'], this.options)); + if (!this._emitted_meta) { + this.emit('meta', this.meta); + this._emitted_meta = true; + } + } + if (!baseurl && this.xmlbase && this.xmlbase.length) { // handleMeta was able to infer a baseurl without xml:base or options.feedurl + n = _.reresolve(n, this.xmlbase[0]['#']); + } + item = this.handleItem(n, this.meta['#type'], this.options); + if (this.options.addmeta) { + item.meta = this.meta; + } + if (this.meta.author && !item.author) item.author = this.meta.author; + this.push(item); + } else if (!this.meta.title && // We haven't yet parsed all the metadata + (node['#name'] === 'channel' || + node['#name'] === 'feed' || + (node['#local'] === 'channel' && (node['#prefix'] === '' || node['#type'] === 'rdf')) || + (node['#local'] === 'feed' && (node['#prefix'] === '' || node['#type'] === 'atom')) ) ) { + _.assign(this.meta, this.handleMeta(n, this.meta['#type'], this.options)); + if (!this._emitted_meta) { + this.emit('meta', this.meta); + this._emitted_meta = true; + } + } + + if (this.stack.length > 0) { + if (node['#prefix'] && node['#local'] && !node['#type']) { + stdEl = node['#prefix'] + ':' + node['#local']; + } else if (node['#name'] && node['#type'] && node['#type'] !== this.meta['#type']) { + stdEl = node['#name']; + } else { + stdEl = node['#local'] || node['#name']; + } + if (!this.stack[0].hasOwnProperty(stdEl)) { + this.stack[0][stdEl] = n; + } else if (this.stack[0][stdEl] instanceof Array) { + this.stack[0][stdEl].push(n); + } else { + this.stack[0][stdEl] = [this.stack[0][stdEl], n]; + } + } +}; + +FeedParser.prototype.handleText = function (text){ + if (this.in_xhtml) { + this.xhtml['#'] += text; + } else { + if (this.stack.length) { + if (this.stack[0] && '#' in this.stack[0]) { + this.stack[0]['#'] += text; + } else { + this.stack[0]['#'] = text; + } + } + } +}; + +FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) { + /* + * Using the sax.js option { xmlns: true } + * attrs is an array of objects (not strings) having the following properties + * name - e.g., xmlns:dc or href + * value + * prefix - the first part of the name of the attribute (before the colon) + * local - the second part of the name of the attribute (after the colon) + * uri - the uri of the namespace + * + */ + + var basepath = '' + , simplifiedAttributes = {} + ; + + if (this.xmlbase && this.xmlbase.length) { + basepath = this.xmlbase[0]['#']; + } + + Object.keys(attrs).forEach(function(key){ + var attr = attrs[key] + , ns = {} + , prefix = '' + ; + if (attr.prefix === 'xmlns') { + ns[attr.name] = attr.value; + this.meta['#ns'].push(ns); + } + // If the feed is using a non-default prefix, we'll use it, too + // But we force the use of the 'xml' prefix + if (attr.uri && attr.prefix && !_.nslookup(attr.uri, attr.prefix) || _.nslookup(attr.uri, 'xml')) { + prefix = ( _.nsprefix(attr.uri) || attr.prefix ) + ( attr.local ? ':' : '' ); + } + if (basepath && (attr.local == 'href' || attr.local == 'src' || attr.local == 'uri')) { + // Apply xml:base to these elements as they appear + // rather than leaving it to the ultimate parser + attr.value = _.resolve(basepath, attr.value); + } else if (attr.local === 'base' && _.nslookup(attr.uri, 'xml')) { + // Keep track of the xml:base for the current node + if (basepath) { + attr.value = _.resolve(basepath, attr.value); + } + this.xmlbase.unshift({ '#name': el, '#': attr.value}); + } else if (attr.name === 'type' && attr.value === 'xhtml') { + this.in_xhtml = true; + this.xhtml = {'#name': el, '#': ''}; + } + simplifiedAttributes[prefix + attr.local] = attr.value ? attr.value.trim() : ''; + }, this); + return simplifiedAttributes; +}; + +FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { + if (!type || !node) return {}; + + var meta = {} + , normalize = !options || (options && options.normalize) + ; + + if (normalize) { + ['title','description','date', 'pubdate', 'pubDate','link', 'xmlurl', 'xmlUrl','author','language','favicon','copyright','generator'].forEach(function (property){ + meta[property] = null; + }); + meta.cloud = {}; + meta.image = {}; + meta.categories = []; + } + + Object.keys(node).forEach(function(name){ + var el = node[name]; + + if (normalize) { + switch(name){ + case('title'): + meta.title = _.get(el); + break; + case('description'): + case('subtitle'): + meta.description = _.get(el); + break; + case('pubdate'): + case('lastbuilddate'): + case('published'): + case('modified'): + case('updated'): + case('dc:date'): + var date = _.get(el) ? new Date(_.get(el)) : null; + if (!date) break; + if (meta.pubdate === null || name == 'pubdate' || name == 'published') + meta.pubdate = meta.pubDate = date; + if (meta.date === null || name == 'lastbuilddate' || name == 'modified' || name == 'updated') + meta.date = date; + break; + case('link'): + case('atom:link'): + case('atom10:link'): + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (link){ + if (link['@']['href']) { // Atom + if (_.get(link['@'], 'rel')) { + if (link['@']['rel'] == 'alternate') { + if (!meta.link) meta.link = link['@']['href']; + } + else if (link['@']['rel'] == 'self') { + meta.xmlurl = meta.xmlUrl = link['@']['href']; + if (this.xmlbase && this.xmlbase.length === 0) { + this.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl}); + this.stack[0] = _.reresolve(this.stack[0], meta.xmlurl); + } + } + else if (link['@']['rel'] == 'hub' && !(meta.cloud.href || meta.cloud.domain)) { + meta.cloud.type = 'hub'; + meta.cloud.href = link['@']['href']; + } + } else { + if (!meta.link) meta.link = link['@']['href']; + } + } else if (Object.keys(link['@']).length === 0) { // RSS + meta.link = _.get(link); + } + if (meta.link && this.xmlbase && this.xmlbase.length === 0) { + this.xmlbase.unshift({ '#name': 'xml', '#': meta.link}); + this.stack[0] = _.reresolve(this.stack[0], meta.link); + } + }, this); + break; + case('managingeditor'): + case('webmaster'): + case('author'): + var author = {}; + if (name == 'author') { + meta.author = _.get(el.name) || _.get(el.email) || _.get(el.uri); + } + else if (_.get(el)) { + author = addressparser(_.get(el))[0]; + if (author) { + el['name'] = author.name; + el['email'] = author.address; + } + if (meta.author === null || name == 'managingeditor') { + meta.author = author.name || author.address || _.get(el); + } + } + break; + case('cloud'): + // I can't believe someone actually would put two cloud elements in their channel + // but it happened + // Nevertheless, there can be only one + meta.cloud = {}; // This will ensure that rssCloud "wins" here, + // If pubsubhubbub is also declared, it's still available + // in the link elements + if (Array.isArray(el)) { + Object.keys(el[0]['@']).forEach(function (attr) { + if (_.has(el[0]['@'], attr)) { + meta.cloud[attr] = el[0]['@'][attr]; + } + }); + } + else { + Object.keys(el['@']).forEach(function (attr) { + if (_.has(el['@'], attr)) { + meta.cloud[attr] = el['@'][attr]; + } + }); + } + meta.cloud.type = 'rsscloud'; + break; + case('language'): + meta.language = _.get(el); + break; + case('image'): + case('logo'): + if (el.url) + meta.image.url = _.get(el.url); + if (el.title) + meta.image.title = _.get(el.title); + if (!meta.image.url && _.get(el)) + meta.image.url = _.get(el); + break; + case('icon'): + meta.favicon = _.get(el); + break; + case('copyright'): + case('rights'): + case('dc:rights'): + meta.copyright = _.get(el); + break; + case('generator'): + meta.generator = _.get(el); + if (_.get(el['@'], 'version')) + meta.generator += (meta.generator ? ' ' : '') + 'v' + el['@'].version; + if (_.get(el['@'], 'uri')) + meta.generator += meta.generator ? ' (' + el['@'].uri + ')' : el['@'].uri; + break; + case('category'): + case('dc:subject'): + case('itunes:category'): + case('media:category'): + /* We handle all the kinds of categories within the switch loop because meta.categories + * is an array, unlike the other properties, and therefore can handle multiple values + */ + var _category = '' + , _categories = [] + ; + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (category){ + var _categoryValue; + if ('category' == name && 'atom' == type) { + if (category['@'] && (_categoryValue = _.safeTrim(_.get(category['@'], 'term')))) { + meta.categories.push(_categoryValue); + } + } + else if ('category' == name && 'rss' == type){ + if ((_categoryValue = _.safeTrim(_.get(category)))) { + meta.categories.push(_categoryValue); + } + } + else if ('dc:subject' == name && (_categoryValue = _.safeTrim(_.get(category)))) { + _categories = _categoryValue.split(' ').map(function (cat){ return cat.trim(); }); + if (_categories.length) { + meta.categories = meta.categories.concat(_categories); + } + } + else if ('itunes:category' == name) { + if (category['@'] && _.safeTrim(_.get(category['@'], 'text'))) _category = _.safeTrim(_.get(category['@'], 'text')); + if (category[name]) { + if (Array.isArray(category[name])) { + category[name].forEach(function (subcategory){ + var _subcategoryValue; + if (subcategory['@'] && (_subcategoryValue = _.safeTrim(_.get(subcategory['@'], 'text')))) { + meta.categories.push(_category + '/' + _subcategoryValue); + } + }); + } + else if (category[name]['@'] && (_categoryValue = _.safeTrim(_.get(category[name]['@'], 'text')))) { + meta.categories.push(_category + '/' + _categoryValue); + } + } + else if (_category) { + meta.categories.push(_category); + } + } + else if ('media:category' == name && (_categoryValue = _.safeTrim(_.get(category)))) { + meta.categories.push(_categoryValue); + } + }); + break; + } // switch end + } + // Fill with all native other namespaced properties + if (name.indexOf('#') !== 0) { + if (~name.indexOf(':')) meta[name] = el; + else meta[type + ':' + name] = el; + } + }, this); // forEach end + + if (normalize) { + if (!meta.description) { + if (node['itunes:summary']) meta.description = _.get(node['itunes:summary']); + else if (node['tagline']) meta.description = _.get(node['tagline']); + } + if (!meta.author) { + if (node['itunes:author']) meta.author = _.get(node['itunes:author']); + else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) meta.author = _.get(node['itunes:owner']['itunes:name']); + else if (node['dc:creator']) meta.author = _.get(node['dc:creator']); + else if (node['dc:publisher']) meta.author = _.get(node['dc:publisher']); + } + if (!meta.language) { + if (node['@'] && node['@']['xml:lang']) meta.language = _.get(node['@'], 'xml:lang'); + else if (node['dc:language']) meta.language = _.get(node['dc:language']); + } + if (!meta.image.url) { + if (node['itunes:image']) meta.image.url = _.get(node['itunes:image']['@'], 'href'); + else if (node['media:thumbnail']) { + if (Array.isArray(node['media:thumbnail'])) { + node['media:thumbnail'] = node['media:thumbnail'][0]; + } + meta.image.url = _.get(node['media:thumbnail']['@'], 'url'); + } + } + if (!meta.copyright) { + if (node['media:copyright']) meta.copyright = _.get(node['media:copyright']); + else if (node['dc:rights']) meta.copyright = _.get(node['dc:rights']); + else if (node['creativecommons:license']) meta.copyright = _.get(node['creativecommons:license']); + else if (node['cc:license']) { + if (Array.isArray(node['cc:license']) && node['cc:license'][0]['@'] && node['cc:license'][0]['@']['rdf:resource']) { + meta.copyright = _.get(node['cc:license'][0]['@'], 'rdf:resource'); + } else if (node['cc:license']['@'] && node['cc:license']['@']['rdf:resource']) { + meta.copyright = _.get(node['cc:license']['@'], 'rdf:resource'); + } + } + } + if (!meta.generator) { + if (node['admin:generatoragent']) { + if (Array.isArray(node['admin:generatoragent']) && node['admin:generatoragent'][0]['@'] && node['admin:generatoragent'][0]['@']['rdf:resource']) { + meta.generator = _.get(node['admin:generatoragent'][0]['@'], 'rdf:resource'); + } else if (node['admin:generatoragent']['@'] && node['admin:generatoragent']['@']['rdf:resource']) { + meta.generator = _.get(node['admin:generatoragent']['@'], 'rdf:resource'); + } + } + } + if (meta.categories.length) { + meta.categories = _.uniq(meta.categories); + } + if (!meta.link) { + if (meta['atom:id'] && _.get(meta['atom:id']) && /^https?:/.test(_.get(meta['atom:id']))) { + meta.link = _.get(meta['atom:id']); + } + } + if (!meta.xmlurl && this.options.feedurl) { + meta.xmlurl = meta.xmlUrl = this.options.feedurl; + } + meta.title = meta.title && _.stripHtml(meta.title); + meta.description = meta.description && _.stripHtml(meta.description); + } + + return meta; +}; + +FeedParser.prototype.handleItem = function handleItem (node, type, options){ + if (!type || !node) return {}; + + var item = {} + , normalize = !options || (options && options.normalize) + ; + + if (normalize) { + ['title','description','summary','date','pubdate','pubDate','link','guid','author','comments', 'origlink'].forEach(function (property){ + item[property] = null; + }); + item.image = {}; + item.source = {}; + item.categories = []; + item.enclosures = []; + } + + Object.keys(node).forEach(function(name){ + var el = node[name] + , attrs = _.get(el, '@') + , enclosure; + if (normalize) { + switch(name){ + case('title'): + item.title = _.get(el); + break; + case('description'): + case('summary'): + item.summary = _.get(el); + if (!item.description) item.description = _.get(el); + break; + case('content'): + case('content:encoded'): + item.description = _.get(el); + break; + case('pubdate'): + case('published'): + case('issued'): + case('modified'): + case('updated'): + case('dc:date'): + var date = _.get(el) ? new Date(_.get(el)) : null; + if (!date) break; + if (item.pubdate === null || name == 'pubdate' || name == 'published' || name == 'issued') + item.pubdate = item.pubDate = date; + if (item.date === null || name == 'modified' || name == 'updated') + item.date = date; + break; + case('link'): + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (link){ + if (link['@']['href']) { // Atom + if (_.get(link['@'], 'rel')) { + if (link['@']['rel'] == 'canonical') item.origlink = link['@']['href']; + if (link['@']['rel'] == 'alternate' && (!link['@']['type'] || link['@']['type'] == 'text/html') && !item.link) item.link = link['@']['href']; + if (link['@']['rel'] == 'self' && (!link['@']['type'] || link['@']['type'] == 'text/html') && !item.link) item.link = link['@']['href']; + if (link['@']['rel'] == 'replies') item.comments = link['@']['href']; + if (link['@']['rel'] == 'enclosure') { + enclosure = {}; + enclosure.url = link['@']['href']; + enclosure.type = _.get(link['@'], 'type'); + enclosure.length = _.get(link['@'], 'length'); + if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) { + item.enclosures.push(enclosure); + } + } + } + if (!item.link) item.link = link['@']['href']; + } else if (Object.keys(link['@']).length === 0) { // RSS + if (!item.link) item.link = _.get(link); + } + }); + if (!item.guid) item.guid = item.link; + break; + case('guid'): + case('id'): + item.guid = _.get(el); + // http://cyber.law.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt + // If the guid element has an attribute named "isPermaLink" with a value + // of true, the reader may assume that it is a permalink to the item, + // that is, a url that can be opened in a Web browser, that points to + // the full item described by the element. + // isPermaLink is optional, its default value is true. If its value is + // false, the guid may not be assumed to be a url, or a url to anything + // in particular. + if (item.guid && type == 'rss' && name == 'guid' && !(attrs.ispermalink && attrs.ispermalink.match(/false/i))) { + item.permalink = item.guid; + } + break; + case('author'): + var author = {}; + if (_.get(el)) { // RSS + author = addressparser(_.get(el))[0]; + if (author) { + el['name'] = author.name; + el['email'] = author.address; + item.author = author.name || author.address; + } + // addressparser failed + else { + item.author = _.get(el); + } + } else { + item.author = _.get(el.name) || _.get(el.email) || _.get(el.uri); + } + break; + case('dc:creator'): + item.author = _.get(el); + break; + case('comments'): + item.comments = _.get(el); + break; + case('source'): + if ('rss' == type) { + item.source['title'] = _.get(el); + item.source['url'] = _.get(el['@'], 'url'); + } else if ('atom' == type) { + if (el.title && _.get(el.title)) + item.source['title'] = _.get(el.title); + if (el.link && _.get(el.link['@'], 'href')) + item.source['url'] = _.get(el.link['@'], 'href'); + } + if (item.source['url'] && !this.meta.xmlurl) { + this.meta.xmlurl = this.meta.xmlUrl = item.source['url']; + if (this.xmlbase && this.xmlbase.length === 0) { + this.xmlbase.unshift({ '#name': 'xml', '#': item.source['url']}); + this.stack[0] = _.reresolve(this.stack[0], item.source['url']); + } + } + break; + case('enclosure'): + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (enc){ + enclosure = {}; + enclosure.url = _.get(enc['@'], 'url'); + enclosure.type = _.get(enc['@'], 'type'); + enclosure.length = _.get(enc['@'], 'length'); + if (~indexOfObject(item.enclosures, enclosure, ['url', 'type'])) { + item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure); + } else { + item.enclosures.push(enclosure); + } + }); + break; + case('media:content'): + var optionalAttributes = ['bitrate', 'framerate', 'samplingrate', 'duration', 'height', 'width']; + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (enc){ + enclosure = {}; + enclosure.url = _.get(enc['@'], 'url'); + enclosure.type = _.get(enc['@'], 'type') || _.get(enc['@'], 'medium'); + enclosure.length = _.get(enc['@'], 'filesize'); + var index = indexOfObject(item.enclosures, enclosure, ['url', 'type']); + if (index !== -1) { + enclosure = item.enclosures[index]; + } + optionalAttributes.forEach(function (attribute) { + if (!enclosure[attribute] && _.get(enc['@'], attribute)) { + enclosure[attribute] = _.get(enc['@'], attribute); + } + }); + if (index === -1) { + item.enclosures.push(enclosure); + } + }); + break; + case('enc:enclosure'): // Can't find this in use for an example to debug. Only example found does not comply with the spec -- can't code THAT! + break; + case('category'): + case('dc:subject'): + case('itunes:category'): + case('media:category'): + /* We handle all the kinds of categories within the switch loop because item.categories + * is an array, unlike the other properties, and therefore can handle multiple values + */ + var _category = '' + , _categories = [] + ; + if(!Array.isArray(el)) { + el = [el]; + } + el.forEach(function (category){ + if ('category' == name && 'atom' == type) { + if (category['@'] && _.get(category['@'], 'term')) item.categories.push(_.get(category['@'], 'term')); + } else if ('category' == name && _.get(category) && 'rss' == type) { + item.categories.push(_.get(category).trim()); + } else if ('dc:subject' == name && _.get(category)) { + _categories = _.get(category).split(' ').map(function (cat){ return cat.trim(); }); + if (_categories.length) item.categories = item.categories.concat(_categories); + } else if ('itunes:category' == name) { + if (category['@'] && _.get(category['@'], 'text')) _category = _.get(category['@'], 'text'); + if (category[name]) { + if (Array.isArray(category[name])) { + category[name].forEach(function (subcategory){ + if (subcategory['@'] && _.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + _.get(subcategory['@'], 'text')); + }); + } else { + if (category[name]['@'] && _.get(category[name]['@'], 'text')) + item.categories.push(_category + '/' + _.get(category[name]['@'], 'text')); + } + } else { + item.categories.push(_category); + } + } else if ('media:category' == name) { + item.categories.push(_.get(category)); + } + }); + break; + case('feedburner:origlink'): + case('pheedo:origlink'): + if (!item.origlink) { + item.origlink = _.get(el); + } + break; + } // switch end + } + // Fill with all native other namespaced properties + if (name.indexOf('#') !== 0) { + if (~name.indexOf(':')) item[name] = el; + else item[type + ':' + name] = el; + } + }, this); // forEach end + + if (normalize) { + if (!item.description) { + if (node['itunes:summary']) item.description = _.get(node['itunes:summary']); + } + if (!item.author) { + if (node['itunes:author']) item.author = _.get(node['itunes:author']); + else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) item.author = _.get(node['itunes:owner']['itunes:name']); + else if (node['dc:publisher']) item.author = _.get(node['dc:publisher']); + } + if (!item.image.url) { + if (node['itunes:image']) item.image.url = _.get(node['itunes:image']['@'], 'href'); + else if (node['media:thumbnail']) { + if (Array.isArray(node['media:thumbnail'])) { + item.image.url = _.get(node['media:thumbnail'][0]['@'], 'url'); + } else { + item.image.url = _.get(node['media:thumbnail']['@'], 'url'); + } + } + else if (node['media:content'] && node['media:content']['media:thumbnail']) item.image.url = _.get(node['media:content']['media:thumbnail']['@'], 'url'); + else if (node['media:group'] && node['media:group']['media:thumbnail']) item.image.url = _.get(node['media:group']['media:thumbnail']['@'], 'url'); + else if (node['media:group'] && node['media:group']['media:content'] && node['media:group']['media:content']['media:thumbnail']) item.image.url = _.get(node['media:group']['media:content']['media:thumbnail']['@'], 'url'); + else if (node['g:image_link']) item.image.url = _.get(node['g:image_link']); + } + if (item.categories.length) { + item.categories = _.uniq(item.categories); + } + if (!item.link) { + if (item.guid && /^https?:/.test(item.guid)) { + item.link = item.guid; + } + } + item.title = item.title && _.stripHtml(item.title); + } + return item; +}; + +// Naive Stream API +FeedParser.prototype._transform = function (data, encoding, done) { + try { + this.stream.write(data); + done(); + } + catch (e) { + done(e); + this.push(null); // Manually trigger and end, since we can't reliably do any more parsing + } +}; + +FeedParser.prototype._flush = function (done) { + try { + this.stream.end(); + done(); + } + catch (e) { + done(e); + } +}; + +exports = module.exports = FeedParser;