Fix parsing of invalidly placed closing parentheses

When a closing parentheses is encountered by #next_token, it is invalid because it must always appear as part of a literal string (and in this form is parsed by #parse_literal_string and not #next_token).
gettalong · Sep 16, 2024 · 1250a06 · 1250a06
1 parent 5556991
commit 1250a06
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,7 @@
 * [HexaPDF::DigitalSignature::Signature#signed_data] to work for invalid offsets
 * [HexaPDF::DigitalSignature::Signing::DefaultHandler] to update the document's
   version to 2.0 when using PAdES
+* Parsing of invalid `)` character in PDF objects and content streams
 
 
 ## 0.47.0 - 2024-09-07

diff --git a/lib/hexapdf/content/parser.rb b/lib/hexapdf/content/parser.rb
@@ -112,7 +112,9 @@ def next_token
         elsif byte == 93 # ]
           @ss.pos += 1
           TOKEN_ARRAY_END
-        elsif byte == 123 || byte == 125 # { }
+        elsif byte == 41 # )
+          raise HexaPDF::MalformedPDFError.new("Delimiter ')' found at invalid position", pos: pos)
+        elsif byte == 123 || byte == 125 # { } )
           Token.new(@ss.get_byte)
         elsif byte == 37 # %
           unless @ss.skip_until(/(?=[\r\n])/)

diff --git a/lib/hexapdf/tokenizer.rb b/lib/hexapdf/tokenizer.rb
@@ -144,6 +144,8 @@ def next_token
       elsif byte == 93 # ]
         @ss.pos += 1
         TOKEN_ARRAY_END
+      elsif byte == 41 # )
+        raise HexaPDF::MalformedPDFError.new("Delimiter ')' found at invalid position", pos: pos)
       elsif byte == 123 || byte == 125 # { }
         Token.new(@ss.get_byte)
       elsif byte == 37 # %

diff --git a/test/hexapdf/common_tokenizer_tests.rb b/test/hexapdf/common_tokenizer_tests.rb
@@ -104,6 +104,11 @@ module CommonTokenizerTests
     assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_token }
   end
 
+  it "next_token: fails on a closing parenthesis that is not part of a literal string" do
+    create_tokenizer(" )")
+    assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_token }
+  end
+
   it "next_token: fails on a missing greater than sign in a hex string" do
     create_tokenizer("<ABCD")
     assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_token }