From b67b6ace13f06f813e2e39ab894f990e1d735b94 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 11 May 2023 09:35:04 -0400 Subject: [PATCH 01/10] rubocop: enable Minitest/NoAssertions --- .rubocop.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.rubocop.yml b/.rubocop.yml index fa00f06..b96841e 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -343,3 +343,6 @@ Minitest/SkipEnsure: Minitest/UnreachableAssertion: Enabled: true + +Minitest/NoAssertions: + Enabled: true From 1a3a812b4778426f55584fd412c559638846f82c Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 10 May 2023 16:15:46 -0400 Subject: [PATCH 02/10] style: scrubber tag and attribute arrays --- lib/rails/html/sanitizer.rb | 63 ++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/lib/rails/html/sanitizer.rb b/lib/rails/html/sanitizer.rb index dc50430..73d5468 100644 --- a/lib/rails/html/sanitizer.rb +++ b/lib/rails/html/sanitizer.rb @@ -2,7 +2,7 @@ module Rails module Html - XPATHS_TO_REMOVE = %w{.//script .//form comment()} + XPATHS_TO_REMOVE = [".//script", ".//form", "comment()"] class Sanitizer # :nodoc: def sanitize(html, options = {}) @@ -106,10 +106,63 @@ class << self attr_accessor :allowed_tags attr_accessor :allowed_attributes end - self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub - sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr - acronym a img blockquote del ins)) - self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) + self.allowed_tags = Set.new([ + "a", + "abbr", + "acronym", + "address", + "b", + "big", + "blockquote", + "br", + "cite", + "code", + "dd", + "del", + "dfn", + "div", + "dl", + "dt", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "hr", + "i", + "img", + "ins", + "kbd", + "li", + "ol", + "p", + "pre", + "samp", + "small", + "span", + "strong", + "sub", + "sup", + "tt", + "ul", + "var", + ]) + self.allowed_attributes = Set.new([ + "abbr", + "alt", + "cite", + "class", + "datetime", + "height", + "href", + "name", + "src", + "title", + "width", + "xml:lang", + ]) def initialize(prune: false) @permit_scrubber = PermitScrubber.new(prune: prune) From f3102d15883ae4c50ba66eea6a0ef1517a666e5b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 10 May 2023 17:04:13 -0400 Subject: [PATCH 03/10] feat: SafeListSanitizer allows "time" tag and "lang" attr Reasons: - the "datetime" attr is allowed, but is only valid in a "time" tag. - the "xml:lang" attr is allowed, but is only valid if "lang" attr is also present. The "time" tag and "lang" attribute should be considered safe and are allowed by Loofah. Also backfill tests around the default safe lists. --- CHANGELOG.md | 7 +++++++ lib/rails/html/sanitizer.rb | 2 ++ test/sanitizer_test.rb | 40 +++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9cd3b6..3bfec5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## next / unreleased + +* `SafeListSanitizer` allows `time` tag and `lang` attribute by default. + + *Mike Dalessio* + + ## 1.5.0 / 2023-01-20 * `SafeListSanitizer`, `PermitScrubber`, and `TargetScrubber` now all support pruning of unsafe tags. diff --git a/lib/rails/html/sanitizer.rb b/lib/rails/html/sanitizer.rb index 73d5468..aec89e7 100644 --- a/lib/rails/html/sanitizer.rb +++ b/lib/rails/html/sanitizer.rb @@ -145,6 +145,7 @@ class << self "strong", "sub", "sup", + "time", "tt", "ul", "var", @@ -157,6 +158,7 @@ class << self "datetime", "height", "href", + "lang", "name", "src", "title", diff --git a/test/sanitizer_test.rb b/test/sanitizer_test.rb index 41b274e..6b36876 100644 --- a/test/sanitizer_test.rb +++ b/test/sanitizer_test.rb @@ -223,6 +223,21 @@ def test_allow_colons_in_path_component end end + def test_lang_and_xml_lang + # https://html.spec.whatwg.org/multipage/dom.html#the-lang-and-xml:lang-attributes + # + # 3.2.6.2 The lang and xml:lang attributes + # + # ... Authors must not use the lang attribute in the XML namespace on HTML elements in HTML + # documents. To ease migration to and from XML, authors may specify an attribute in no namespace + # with no prefix and with the literal localname "xml:lang" on HTML elements in HTML documents, + # but such attributes must only be specified if a lang attribute in no namespace is also + # specified, and both attributes must have the same value when compared in an ASCII + # case-insensitive manner. + input = expected = "
foo
" + assert_sanitized(input, expected) + end + def test_should_handle_non_html assert_sanitized "abc" end @@ -517,6 +532,31 @@ def test_allow_data_attribute_if_requested assert_equal %(foo), safe_list_sanitize(text, attributes: ["data-foo"]) end + # https://developer.mozilla.org/en-US/docs/Glossary/Void_element + VOID_ELEMENTS = %w[area base br col embed hr img input keygen link meta param source track wbr] + + %w(strong em b i p code pre tt samp kbd var sub + sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr + acronym a img blockquote del ins time).each do |tag_name| + define_method "test_default_safelist_should_allow_#{tag_name}" do + if VOID_ELEMENTS.include?(tag_name) + assert_sanitized("<#{tag_name}>") + else + assert_sanitized("<#{tag_name}>foo") + end + end + end + + def test_datetime_attribute + assert_sanitized("