Change default behavior to not strip html by default

Added option `strip_html` to restore old behavior. Resolves #165, #243
danmactough · Jul 15, 2018 · 9def5d2 · 9def5d2
1 parent f246934
commit 9def5d2
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 6 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 ----------------------------------------------------------------------
 node-feedparser is released under the MIT License
-Copyright (c) 2011-2016 Dan MacTough and contributors
+Copyright (c) 2011-2017 Dan MacTough and contributors
 
 Permission is hereby granted, free of charge, to any person
 obtaining a copy of this software and associated documentation

diff --git a/README.md b/README.md
@@ -97,6 +97,14 @@ You can also check out this nice [working implementation](https://github.com/scr
   behavior. If you want total control over handling these errors and optionally
   aborting parsing the feed, use this option.
 
+- `strip_html` - Set to `true` to override Feedparser's default behavior, which is
+  to pass through all substrings that look like html. In older versions, we always
+  stripped these html-like substrings to help users avoid inadvertently creating
+  XSS vulnerabilities by reflecting the value of these elements without properly
+  escaping them. We decided that wasn't particularly helpful because the simple
+  sanitation we were performing didn't address all cases and did a poor job. However,
+  if you were relying on the legacy behavior, you can set this option to `true`.
+
 ## Examples
 
 See the [`examples`](examples/) directory.
@@ -206,7 +214,7 @@ the original inspiration and a starting point.
 
 (The MIT License)
 
-Copyright (c) 2011-2016 Dan MacTough and contributors
+Copyright (c) 2011-2017 Dan MacTough and contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the 'Software'), to deal in

diff --git a/lib/feedparser/index.js b/lib/feedparser/index.js
@@ -72,6 +72,7 @@ function FeedParser (options) {
   if (!('normalize' in this.options)) this.options.normalize = true;
   if (!('addmeta' in this.options)) this.options.addmeta = true;
   if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true;
+  if (!('strip_html' in this.options)) this.options.strip_html = false;
   if ('MAX_BUFFER_LENGTH' in this.options) {
     sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers
   } else {
@@ -430,6 +431,7 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
 
   var meta = {}
     , normalize = !options || (options && options.normalize)
+    , stripHtml = !options || (options && options.strip_html)
     ;
 
   if (normalize) {
@@ -765,8 +767,10 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
     if (!meta.xmlurl && this.options.feedurl) {
       meta.xmlurl = meta.xmlUrl = this.options.feedurl;
     }
-    meta.title = meta.title && _.stripHtml(meta.title);
-    meta.description = meta.description && _.stripHtml(meta.description);
+    if (stripHtml) {
+      meta.title = meta.title && _.stripHtml(meta.title);
+      meta.description = meta.description && _.stripHtml(meta.description);
+    }
   }
 
   return meta;
@@ -777,6 +781,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
 
   var item = {}
     , normalize = !options || (options && options.normalize)
+    , stripHtml = !options || (options && options.strip_html)
     ;
 
   if (normalize) {
@@ -1106,7 +1111,9 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
         item.link = item.guid;
       }
     }
-    item.title = item.title && _.stripHtml(item.title);
+    if (stripHtml) {
+      item.title = item.title && _.stripHtml(item.title);
+    }
   }
   return item;
 };

diff --git a/test/strip-html.js b/test/strip-html.js
@@ -2,8 +2,21 @@ describe('strip html', function () {
 
   var feed = __dirname + '/feeds/title-with-angle-brackets.xml';
 
-  it('should aggressively strip html', function (done) {
+  it('should NOT aggressively strip html by default', function (done) {
     fs.createReadStream(feed).pipe(new FeedParser())
+      .once('readable', function () {
+        var stream = this;
+        assert.equal(stream.read().title, 'RSS <<< Title >>>');
+        done();
+      })
+      .on('error', function (err) {
+        assert.ifError(err);
+        done(err);
+      });
+  });
+
+  it('should aggressively strip html with option `strip_html`', function (done) {
+    fs.createReadStream(feed).pipe(new FeedParser({ strip_html: true }))
       .once('readable', function () {
         var stream = this;
         assert.equal(stream.read().title, 'RSS ');