@@ -78,50 +78,53 @@ def test_remove_xpaths_called_with_enumerable_xpaths
7878
7979 def test_strip_tags_with_quote
8080 input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
81- result = full_sanitize ( input )
8281 acceptable_results = [
8382 # libxml2 >= 2.9.14 and xerces+neko
8483 %{<" hi} ,
8584 # other libxml2
8685 %{ hi} ,
86+ # preserve_whitespace: true
87+ "<" hi" ,
8788 ]
8889
89- assert_includes ( acceptable_results , result )
90+ assert_full_sanitized ( acceptable_results , input )
9091 end
9192
9293 def test_strip_invalid_html
93- assert_equal "<<" , full_sanitize ( "<<<bad html" )
94+ assert_full_sanitized "<<" , "<<<bad html"
9495 end
9596
9697 def test_strip_nested_tags
9798 expected = "Wei<a onclick='alert(document.cookie);'/>rdos"
9899 input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
99- assert_equal expected , full_sanitize ( input )
100+ assert_full_sanitized expected , input
100101 end
101102
102103 def test_strip_tags_multiline
103- expected = %{This is a test.\n \n \n \n It no longer contains any HTML.\n }
104104 input = %{<h1>This is <b>a <a href="" target="_blank">test</a></b>.</h1>\n \n <!-- it has a comment -->\n \n <p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n }
105+ acceptable_results = [
106+ %{This is a test.\n \n \n \n It no longer contains any HTML.\n } ,
107+ # preserve_whitespace: true
108+ %{\n This is a test.\n \n It no longer contains any HTML.\n \n }
109+ ]
105110
106- assert_equal expected , full_sanitize ( input )
111+ assert_full_sanitized acceptable_results , input
107112 end
108113
109114 def test_remove_unclosed_tags
110115 input = "This is <-- not\n a comment here."
111- result = full_sanitize ( input )
112116 acceptable_results = [
113117 # libxml2 >= 2.9.14 and xerces+neko
114118 %{This is <-- not\n a comment here.} ,
115119 # other libxml2
116120 %{This is } ,
117121 ]
118122
119- assert_includes ( acceptable_results , result )
123+ assert_full_sanitized ( acceptable_results , input )
120124 end
121125
122126 def test_strip_cdata
123127 input = "This has a <![CDATA[<section>]]> here."
124- result = full_sanitize ( input )
125128 acceptable_results = [
126129 # libxml2 = 2.9.14
127130 %{This has a <![CDATA[]]> here.} ,
@@ -131,7 +134,7 @@ def test_strip_cdata
131134 %{This has a here.} ,
132135 ]
133136
134- assert_includes ( acceptable_results , result )
137+ assert_full_sanitized ( acceptable_results , input )
135138 end
136139
137140 def test_strip_unclosed_cdata
@@ -153,40 +156,52 @@ def test_strip_unclosed_cdata
153156
154157 def test_strip_blank_string
155158 assert_nil full_sanitize ( nil )
156- assert_equal "" , full_sanitize ( "" )
157- assert_equal " " , full_sanitize ( " " )
159+ assert_nil full_sanitize ( nil , preserve_whitespace : true )
160+ assert_full_sanitized "" , ""
161+ assert_full_sanitized " " , " "
158162 end
159163
160164 def test_strip_tags_with_plaintext
161- assert_equal "Don't touch me" , full_sanitize ( "Don't touch me" )
165+ assert_full_sanitized "Don't touch me" , "Don't touch me"
162166 end
163167
164168 def test_strip_tags_with_tags
165- assert_equal "This is a test." , full_sanitize ( "<p >This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>" )
169+ assert_full_sanitized "This is a test." , "<b >This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</b>"
166170 end
167171
168172 def test_escape_tags_with_many_open_quotes
169- assert_equal "<<" , full_sanitize ( "<<<bad html>" )
173+ assert_full_sanitized "<<" , "<<<bad html>"
170174 end
171175
172176 def test_strip_tags_with_sentence
173- assert_equal "This is a test." , full_sanitize ( "This is a test." )
177+ assert_full_sanitized "This is a test." , "This is a test."
174178 end
175179
176180 def test_strip_tags_with_comment
177- assert_equal "This has a here." , full_sanitize ( "This has a <!-- comment --> here." )
181+ assert_full_sanitized "This has a here." , "This has a <!-- comment --> here."
178182 end
179183
180184 def test_strip_tags_with_frozen_string
181- assert_equal "Frozen string with no tags" , full_sanitize ( "Frozen string with no tags" )
185+ assert_full_sanitized "Frozen string with no tags" , "Frozen string with no tags"
182186 end
183187
184188 def test_full_sanitize_respect_html_escaping_of_the_given_string
185- assert_equal 'test\r\nstring' , full_sanitize ( 'test\r\nstring' )
186- assert_equal "&" , full_sanitize ( "&" )
187- assert_equal "&" , full_sanitize ( "&" )
188- assert_equal "&amp;" , full_sanitize ( "&amp;" )
189- assert_equal "omg <script>BOM</script>" , full_sanitize ( "omg <script>BOM</script>" )
189+ assert_full_sanitized 'test\r\nstring' , 'test\r\nstring'
190+ assert_full_sanitized "&" , "&"
191+ assert_full_sanitized "&" , "&"
192+ assert_full_sanitized "&amp;" , "&amp;"
193+ assert_full_sanitized "omg <script>BOM</script>" , "omg <script>BOM</script>"
194+ end
195+
196+ def test_full_sanitize_preserve_whitespace
197+ assert_equal "\n a\n \n b\n " , full_sanitize ( "<p>a</p><p>b</p>" , preserve_whitespace : true )
198+ end
199+
200+ def test_full_sanitize_preserve_whitespace_ascii_8bit_string
201+ full_sanitize ( "<a>hello</a>" . encode ( "ASCII-8BIT" ) ) . tap do |sanitized |
202+ assert_equal "hello" , sanitized
203+ assert_equal Encoding ::UTF_8 , sanitized . encoding
204+ end
190205 end
191206
192207 def test_strip_links_with_tags_in_tags
@@ -917,6 +932,11 @@ def assert_sanitized(input, expected = nil)
917932 assert_equal ( ( expected || input ) , safe_list_sanitize ( input ) )
918933 end
919934
935+ def assert_full_sanitized ( acceptable_results , input )
936+ assert_includes ( Array ( acceptable_results ) , full_sanitize ( input ) )
937+ assert_includes ( Array ( acceptable_results ) , full_sanitize ( input , preserve_whitespace : true ) )
938+ end
939+
920940 def sanitize_css ( input )
921941 Rails ::Html ::SafeListSanitizer . new . sanitize_css ( input )
922942 end
0 commit comments