@@ -80,50 +80,53 @@ module FullSanitizerTest
80
80
81
81
def test_strip_tags_with_quote
82
82
input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
83
- result = full_sanitize ( input )
84
83
acceptable_results = [
85
84
# libxml2 >= 2.9.14 and xerces+neko
86
85
%{<" hi} ,
87
86
# other libxml2
88
87
%{ hi} ,
88
+ # preserve_whitespace: true
89
+ "<" hi" ,
89
90
]
90
91
91
- assert_includes ( acceptable_results , result )
92
+ assert_full_sanitized ( acceptable_results , input )
92
93
end
93
94
94
95
def test_strip_invalid_html
95
- assert_equal "<<" , full_sanitize ( "<<<bad html" )
96
+ assert_full_sanitized "<<" , "<<<bad html"
96
97
end
97
98
98
99
def test_strip_nested_tags
99
100
expected = "Wei<a onclick='alert(document.cookie);'/>rdos"
100
101
input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
101
- assert_equal expected , full_sanitize ( input )
102
+ assert_full_sanitized expected , input
102
103
end
103
104
104
105
def test_strip_tags_multiline
105
- expected = %{This is a test.\n \n \n \n It no longer contains any HTML.\n }
106
106
input = %{<h1>This is <b>a <a href="" target="_blank">test</a></b>.</h1>\n \n <!-- it has a comment -->\n \n <p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n }
107
+ acceptable_results = [
108
+ %{This is a test.\n \n \n \n It no longer contains any HTML.\n } ,
109
+ # preserve_whitespace: true
110
+ %{\n This is a test.\n \n It no longer contains any HTML.\n \n }
111
+ ]
107
112
108
- assert_equal expected , full_sanitize ( input )
113
+ assert_full_sanitized acceptable_results , input
109
114
end
110
115
111
116
def test_remove_unclosed_tags
112
117
input = "This is <-- not\n a comment here."
113
- result = full_sanitize ( input )
114
118
acceptable_results = [
115
119
# libxml2 >= 2.9.14 and xerces+neko
116
120
%{This is <-- not\n a comment here.} ,
117
121
# other libxml2
118
122
%{This is } ,
119
123
]
120
124
121
- assert_includes ( acceptable_results , result )
125
+ assert_full_sanitized ( acceptable_results , input )
122
126
end
123
127
124
128
def test_strip_cdata
125
129
input = "This has a <![CDATA[<section>]]> here."
126
- result = full_sanitize ( input )
127
130
acceptable_results = [
128
131
# libxml2 = 2.9.14
129
132
%{This has a <![CDATA[]]> here.} ,
@@ -133,51 +136,68 @@ def test_strip_cdata
133
136
%{This has a here.} ,
134
137
]
135
138
136
- assert_includes ( acceptable_results , result )
139
+ assert_full_sanitized ( acceptable_results , input )
137
140
end
138
141
139
142
def test_strip_blank_string
140
143
assert_nil full_sanitize ( nil )
141
- assert_equal "" , full_sanitize ( "" )
142
- assert_equal " " , full_sanitize ( " " )
144
+ assert_nil full_sanitize ( nil , preserve_whitespace : true )
145
+ assert_full_sanitized "" , ""
146
+ assert_full_sanitized " " , " "
143
147
end
144
148
145
149
def test_strip_tags_with_plaintext
146
- assert_equal "Don't touch me" , full_sanitize ( "Don't touch me" )
150
+ assert_full_sanitized "Don't touch me" , "Don't touch me"
147
151
end
148
152
149
153
def test_strip_tags_with_tags
150
- assert_equal "This is a test." , full_sanitize ( "<p >This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>" )
154
+ assert_full_sanitized "This is a test." , "<b >This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</b>"
151
155
end
152
156
153
157
def test_escape_tags_with_many_open_quotes
154
- assert_equal "<<" , full_sanitize ( "<<<bad html>" )
158
+ assert_full_sanitized "<<" , "<<<bad html>"
155
159
end
156
160
157
161
def test_strip_tags_with_sentence
158
- assert_equal "This is a test." , full_sanitize ( "This is a test." )
162
+ assert_full_sanitized "This is a test." , "This is a test."
159
163
end
160
164
161
165
def test_strip_tags_with_comment
162
- assert_equal "This has a here." , full_sanitize ( "This has a <!-- comment --> here." )
166
+ assert_full_sanitized "This has a here." , "This has a <!-- comment --> here."
163
167
end
164
168
165
169
def test_strip_tags_with_frozen_string
166
- assert_equal "Frozen string with no tags" , full_sanitize ( "Frozen string with no tags" )
170
+ assert_full_sanitized "Frozen string with no tags" , "Frozen string with no tags"
167
171
end
168
172
169
173
def test_full_sanitize_respect_html_escaping_of_the_given_string
170
- assert_equal 'test\r\nstring' , full_sanitize ( 'test\r\nstring' )
171
- assert_equal "&" , full_sanitize ( "&" )
172
- assert_equal "&" , full_sanitize ( "&" )
173
- assert_equal "&amp;" , full_sanitize ( "&amp;" )
174
- assert_equal "omg <script>BOM</script>" , full_sanitize ( "omg <script>BOM</script>" )
174
+ assert_full_sanitized 'test\r\nstring' , 'test\r\nstring'
175
+ assert_full_sanitized "&" , "&"
176
+ assert_full_sanitized "&" , "&"
177
+ assert_full_sanitized "&amp;" , "&amp;"
178
+ assert_full_sanitized "omg <script>BOM</script>" , "omg <script>BOM</script>"
179
+ end
180
+
181
+ def test_full_sanitize_preserve_whitespace
182
+ assert_equal "\n Paragraphs\n and \n newlines" , full_sanitize ( "<p>Paragraphs</p> and <br> newlines" , preserve_whitespace : true )
183
+ end
184
+
185
+ def test_full_sanitize_preserve_whitespace_ascii_8bit_string
186
+ full_sanitize ( "<a>hello</a>" . encode ( "ASCII-8BIT" ) ) . tap do |sanitized |
187
+ assert_equal "hello" , sanitized
188
+ assert_equal Encoding ::UTF_8 , sanitized . encoding
189
+ end
175
190
end
176
191
177
192
protected
178
193
def full_sanitize ( input , options = { } )
179
194
module_under_test ::FullSanitizer . new . sanitize ( input , options )
180
195
end
196
+
197
+ def assert_full_sanitized ( acceptable_results , input )
198
+ assert_includes ( Array ( acceptable_results ) , full_sanitize ( input ) )
199
+ assert_includes ( Array ( acceptable_results ) , full_sanitize ( input , preserve_whitespace : true ) )
200
+ end
181
201
end
182
202
183
203
class HTML4FullSanitizerTest < Minitest ::Test
0 commit comments