From ff9364efdbe6a0e4bfb7a0eaa2d8c662dd2c0d9d Mon Sep 17 00:00:00 2001
From: goromlagche <mrinmoy.das91@gmail.com>
Date: Wed, 8 Jun 2022 17:16:39 +0530
Subject: [PATCH 1/2] add failing test

---
 test/sanitizer_test.rb | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test/sanitizer_test.rb b/test/sanitizer_test.rb
index df8e64b..9920023 100644
--- a/test/sanitizer_test.rb
+++ b/test/sanitizer_test.rb
@@ -515,6 +515,16 @@ def test_allow_data_attribute_if_requested
     assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ['data-foo'])
   end
 
+  def test_sanitize_data_protocol
+    text = "- XSS\"><iframe src=\"data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=\">- XSS\"><iframe src=\"data:application/vnd.wap.xhtml+xml;base64,PHg6c2NyaXB0IHhtbG5zOng9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGh0bWwiPmFsZXJ0KGRvY3VtZW50LmRvbWFpbik8L3g6c2NyaXB0Pg==\">"
+
+    scope_allowed_tags %w(iframe) do
+      scope_allowed_attributes %w(src) do
+        assert_equal %(- XSS\"&gt;<iframe>- XSS\"&gt;<iframe></iframe></iframe>), safe_list_sanitize(text)
+      end
+    end
+  end
+
   def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
     skip if RUBY_VERSION < "2.3"
 

From 97e67bcec72057fbc9ca29d62e0e7a3dba6d43e8 Mon Sep 17 00:00:00 2001
From: goromlagche <mrinmoy.das91@gmail.com>
Date: Wed, 8 Jun 2022 17:17:37 +0530
Subject: [PATCH 2/2] handle data mediatypes better

---
 lib/rails/html/scrubbers.rb | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lib/rails/html/scrubbers.rb b/lib/rails/html/scrubbers.rb
index 13b6d6f..315be4e 100644
--- a/lib/rails/html/scrubbers.rb
+++ b/lib/rails/html/scrubbers.rb
@@ -144,6 +144,13 @@ def scrub_attribute(node, attr_node)
           val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase
           if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::SafeList::PROTOCOL_SEPARATOR)[0])
             attr_node.remove
+          elsif val_unescaped.split(Loofah::HTML5::SafeList::PROTOCOL_SEPARATOR)[0] == "data"
+            # permit only allowed data mediatypes
+            mediatype = val_unescaped.split(Loofah::HTML5::SafeList::PROTOCOL_SEPARATOR)[1]
+            mediatype, _ = mediatype.split(";")[0..1] if mediatype
+            if mediatype && !Loofah::HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
+              attr_node.remove
+            end
           end
         end
         if Loofah::HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)