Skip to content

Commit

Permalink
Change default behavior to not strip html by default
Browse files Browse the repository at this point in the history
Added option `strip_html` to restore old behavior.

Resolves #165, #243
  • Loading branch information
danmactough committed Jul 15, 2018
1 parent f246934 commit 9def5d2
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 6 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
----------------------------------------------------------------------
node-feedparser is released under the MIT License
Copyright (c) 2011-2016 Dan MacTough and contributors
Copyright (c) 2011-2017 Dan MacTough and contributors

Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ You can also check out this nice [working implementation](https://github.com/scr
behavior. If you want total control over handling these errors and optionally
aborting parsing the feed, use this option.

- `strip_html` - Set to `true` to override Feedparser's default behavior, which is
to pass through all substrings that look like html. In older versions, we always
stripped these html-like substrings to help users avoid inadvertently creating
XSS vulnerabilities by reflecting the value of these elements without properly
escaping them. We decided that wasn't particularly helpful because the simple
sanitation we were performing didn't address all cases and did a poor job. However,
if you were relying on the legacy behavior, you can set this option to `true`.

## Examples

See the [`examples`](examples/) directory.
Expand Down Expand Up @@ -206,7 +214,7 @@ the original inspiration and a starting point.

(The MIT License)

Copyright (c) 2011-2016 Dan MacTough and contributors
Copyright (c) 2011-2017 Dan MacTough and contributors

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the 'Software'), to deal in
Expand Down
13 changes: 10 additions & 3 deletions lib/feedparser/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ function FeedParser (options) {
if (!('normalize' in this.options)) this.options.normalize = true;
if (!('addmeta' in this.options)) this.options.addmeta = true;
if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true;
if (!('strip_html' in this.options)) this.options.strip_html = false;
if ('MAX_BUFFER_LENGTH' in this.options) {
sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers
} else {
Expand Down Expand Up @@ -430,6 +431,7 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {

var meta = {}
, normalize = !options || (options && options.normalize)
, stripHtml = !options || (options && options.strip_html)
;

if (normalize) {
Expand Down Expand Up @@ -765,8 +767,10 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
if (!meta.xmlurl && this.options.feedurl) {
meta.xmlurl = meta.xmlUrl = this.options.feedurl;
}
meta.title = meta.title && _.stripHtml(meta.title);
meta.description = meta.description && _.stripHtml(meta.description);
if (stripHtml) {
meta.title = meta.title && _.stripHtml(meta.title);
meta.description = meta.description && _.stripHtml(meta.description);
}
}

return meta;
Expand All @@ -777,6 +781,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){

var item = {}
, normalize = !options || (options && options.normalize)
, stripHtml = !options || (options && options.strip_html)
;

if (normalize) {
Expand Down Expand Up @@ -1106,7 +1111,9 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
item.link = item.guid;
}
}
item.title = item.title && _.stripHtml(item.title);
if (stripHtml) {
item.title = item.title && _.stripHtml(item.title);
}
}
return item;
};
Expand Down
15 changes: 14 additions & 1 deletion test/strip-html.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,21 @@ describe('strip html', function () {

var feed = __dirname + '/feeds/title-with-angle-brackets.xml';

it('should aggressively strip html', function (done) {
it('should NOT aggressively strip html by default', function (done) {
fs.createReadStream(feed).pipe(new FeedParser())
.once('readable', function () {
var stream = this;
assert.equal(stream.read().title, 'RSS <<< Title >>>');
done();
})
.on('error', function (err) {
assert.ifError(err);
done(err);
});
});

it('should aggressively strip html with option `strip_html`', function (done) {
fs.createReadStream(feed).pipe(new FeedParser({ strip_html: true }))
.once('readable', function () {
var stream = this;
assert.equal(stream.read().title, 'RSS ');
Expand Down

0 comments on commit 9def5d2

Please sign in to comment.