From 448b53feed0e0d8ed3113cc8c76ef75ff0072813 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Mon, 8 Dec 2025 11:49:28 -0800 Subject: [PATCH] Add conformance tests for overlong varints as tags. No wire format should ever contain an overlong varint, so the topic here is only how to react to non-standard and potentially corrupted data. The situation today is that there's 4 main ways that implementations deal when parsing tags: 1) parse up to 10 bytes, cast to uint32 2) parse up to 10 bytes, reject if it is above uint32_max 3) parse up to 5 bytes, cast to uint32 4) parse up to 5 bytes, reject if it is above uint32_max Of our primary supported implementations, these four strategies are used by Java, Go, C++ and upb correspondingly. Based on examining the situation, the decision taken is that: - Coercing down silently ignoring bits in the tag is dangerous to interpretation-confusion / silent misparsing, which means Java approach is dangerous. - Needing to support parsing up to 10 bytes (even when they may just be all 0x80 and no content) would have real performance implications on the upb and C++ parsers. Since it should really never happen taking any performance hit on all parses based on a hypothetical is considered undesirable. For that reason, the conformance test is set to match upb's behavior, which is slight mismatch to C++ and Go behavior today (in different ways), and larger mismatch to the Java behavior today. Because fixing this 'bug' may be disruptive to a customer in theory (though it would probably mean they have some bad data that was accidentally parsing), we may hold back fixing the behavior to a breaking change release; this change to the conformance suite only establishes the decision on preferred behavior. PiperOrigin-RevId: 841856475 --- conformance/binary_json_conformance_suite.cc | 35 ++++++++++++++++++++ conformance/failure_list_cpp.txt | 2 +- conformance/failure_list_csharp.txt | 4 +++ conformance/failure_list_java.txt | 3 ++ conformance/failure_list_java_lite.txt | 3 ++ conformance/failure_list_jruby.txt | 3 ++ conformance/failure_list_objc.txt | 4 +++ conformance/failure_list_php.txt | 4 +++ conformance/failure_list_python.txt | 3 ++ conformance/failure_list_python_cpp.txt | 1 + conformance/failure_list_rust_cc.txt | 1 + 11 files changed, 62 insertions(+), 1 deletion(-) diff --git a/conformance/binary_json_conformance_suite.cc b/conformance/binary_json_conformance_suite.cc index a5d137342b478..ca24002e393fd 100644 --- a/conformance/binary_json_conformance_suite.cc +++ b/conformance/binary_json_conformance_suite.cc @@ -1411,6 +1411,41 @@ void BinaryAndJsonConformanceSuiteImpl::TestIllegalTags() { name.back() += i; ExpectParseFailureForProto(nullfield[i], name, REQUIRED); } + + // Reused for a few cases: this is a single byte tag for a field number 1 + // varint but where the byte has the continuation bit set, so that we can + // easily construct longer tags. + std::string tag_with_continuation_bit = + tag(1, WireFormatLite::WIRETYPE_VARINT); + assert(tag_with_continuation_bit.size() == 1); + tag_with_continuation_bit[0] |= 0b10000000; + + // A varint where field number is far out of range of + // maximum legal field number. The lower 5 bytes of the varint do look like a + // well-formed tag for field number 1. + ExpectParseFailureForProto( + absl::StrCat(tag_with_continuation_bit, "\x80\x80\x80\x80\x80\x0F", + varint(1234)), + "BadTag_FieldNumberTooHigh", REQUIRED); + + // A 5-byte tag varint where the value is above UINT32_MAX (bit 35 is set) + ExpectParseFailureForProto( + absl::StrCat(tag_with_continuation_bit, "\x80\x80\x80\x40", varint(1234)), + "BadTag_FieldNumberSlightlyTooHigh", REQUIRED); + + // A tag where the varint is more than 5 bytes but only because it is an + // overlong varint, so the decoded value is still below UINT32_MAX. + ExpectParseFailureForProto( + absl::StrCat(tag_with_continuation_bit, "\x80\x80\x80\x80\x80\x80\x80", + std::string("\0", 1), varint(1234)), + "BadTag_OverlongVarint", REQUIRED); + + // An overlong varint that is even more than 10 bytes. + ExpectParseFailureForProto( + absl::StrCat(tag_with_continuation_bit, + "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80", + std::string("\0", 1), varint(1234)), + "BadTag_VarintMoreThanTenBytes", REQUIRED); } template diff --git a/conformance/failure_list_cpp.txt b/conformance/failure_list_cpp.txt index b0fc7f364dd8b..c10b405464b11 100644 --- a/conformance/failure_list_cpp.txt +++ b/conformance/failure_list_cpp.txt @@ -34,4 +34,4 @@ Recommended.*.FieldMaskTooManyUnderscore.JsonOutput Recommended.*.JsonInput.FieldMaskInvalidCharacter # Should have failed to parse, but didn't. Required.*.JsonInput.SingleValueForRepeatedFieldInt32 # Should have failed to parse, but didn't. Required.*.JsonInput.SingleValueForRepeatedFieldMessage # Should have failed to parse, but didn't. - +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. diff --git a/conformance/failure_list_csharp.txt b/conformance/failure_list_csharp.txt index e7fb58c42e9f5..19855bbb320d4 100644 --- a/conformance/failure_list_csharp.txt +++ b/conformance/failure_list_csharp.txt @@ -37,3 +37,7 @@ Recommended.Editions_Proto3.ValueRejectNanNumberValue.JsonOutput Required.Editions_Proto2.ProtobufInput.UnknownOrdering.ProtobufOutput Required.Editions_Proto3.ProtobufInput.UnknownOrdering.ProtobufOutput +Required.*.ProtobufInput.BadTag_FieldNumberTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_OverlongVarint # Should have failed to parse, but didn't. + diff --git a/conformance/failure_list_java.txt b/conformance/failure_list_java.txt index 9e2e525568c0d..a258f29f56e63 100644 --- a/conformance/failure_list_java.txt +++ b/conformance/failure_list_java.txt @@ -43,3 +43,6 @@ Required.*.JsonInput.RepeatedFieldWrongElementTypeExpectingStringsGotBool Required.*.JsonInput.RepeatedFieldWrongElementTypeExpectingStringsGotInt # Should have failed to parse, but didn't. Required.*.JsonInput.StringFieldNotAString # Should have failed to parse, but didn't. Required.*.ProtobufInput.UnknownOrdering.ProtobufOutput # Unknown field mismatch +Required.*.ProtobufInput.BadTag_FieldNumberTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_OverlongVarint # Should have failed to parse, but didn't. \ No newline at end of file diff --git a/conformance/failure_list_java_lite.txt b/conformance/failure_list_java_lite.txt index 1494114f0877d..f15fa9206878a 100644 --- a/conformance/failure_list_java_lite.txt +++ b/conformance/failure_list_java_lite.txt @@ -6,3 +6,6 @@ Required.*.ProtobufInput.PrematureEofInDelimitedDataForKnownNonRepeatedValue.MESSAGE # Should have failed to parse, but didn't. Required.*.ProtobufInput.PrematureEofInDelimitedDataForKnownRepeatedValue.MESSAGE # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_OverlongVarint # Should have failed to parse, but didn't. diff --git a/conformance/failure_list_jruby.txt b/conformance/failure_list_jruby.txt index b02f9a5ecf429..cbfaf7f757739 100644 --- a/conformance/failure_list_jruby.txt +++ b/conformance/failure_list_jruby.txt @@ -144,3 +144,6 @@ Required.Editions_Proto2.ProtobufInput.UnknownOrdering.ProtobufOutput Required.Editions_Proto3.ProtobufInput.UnknownOrdering.ProtobufOutput Required.Proto2.ProtobufInput.UnknownOrdering.ProtobufOutput Required.Proto3.ProtobufInput.UnknownOrdering.ProtobufOutput +Required.*.ProtobufInput.BadTag_FieldNumberTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_OverlongVarint # Should have failed to parse, but didn't. diff --git a/conformance/failure_list_objc.txt b/conformance/failure_list_objc.txt index e34501ead7344..f4552f0a5efcd 100644 --- a/conformance/failure_list_objc.txt +++ b/conformance/failure_list_objc.txt @@ -1,2 +1,6 @@ # JSON input or output tests are skipped (in conformance_objc.m) as mobile # platforms don't support JSON wire format to avoid code bloat. + +Required.*.ProtobufInput.BadTag_FieldNumberTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_OverlongVarint # Should have failed to parse, but didn't. diff --git a/conformance/failure_list_php.txt b/conformance/failure_list_php.txt index 3ff79c2c66c6e..0b890e4d7a5aa 100644 --- a/conformance/failure_list_php.txt +++ b/conformance/failure_list_php.txt @@ -45,3 +45,7 @@ Required.*.DurationProtoNanosWrongSignNegativeSecs.JsonOutput # Should have fail Required.*.TimestampProtoNanoTooLarge.JsonOutput # Should have failed to serialize, but didn't. Required.*.TimestampProtoNegativeNanos.JsonOutput # Should have failed to serialize, but didn't. Required.*.JsonInput.SingleValueForRepeatedFieldInt32 # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_OverlongVarint # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_VarintMoreThanTenBytes # Should have failed to parse, but didn't. diff --git a/conformance/failure_list_python.txt b/conformance/failure_list_python.txt index c7b7d93918309..4c9367630a073 100644 --- a/conformance/failure_list_python.txt +++ b/conformance/failure_list_python.txt @@ -1,2 +1,5 @@ Recommended.*.JsonInput.FieldNameDuplicateDifferentCasing1 # Should have failed to parse, but didn't. Recommended.*.JsonInput.FieldNameDuplicateDifferentCasing2 # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_OverlongVarint # Should have failed to parse, but didn't. diff --git a/conformance/failure_list_python_cpp.txt b/conformance/failure_list_python_cpp.txt index f8fe23c940f3e..cb4598cbfb820 100644 --- a/conformance/failure_list_python_cpp.txt +++ b/conformance/failure_list_python_cpp.txt @@ -9,3 +9,4 @@ Recommended.*.JsonInput.FieldNameDuplicateDifferentCasing1 # Should have failed to parse, but didn't. Recommended.*.JsonInput.FieldNameDuplicateDifferentCasing2 # Should have failed to parse, but didn't. +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't. diff --git a/conformance/failure_list_rust_cc.txt b/conformance/failure_list_rust_cc.txt index e69de29bb2d1d..942fd518c5105 100644 --- a/conformance/failure_list_rust_cc.txt +++ b/conformance/failure_list_rust_cc.txt @@ -0,0 +1 @@ +Required.*.ProtobufInput.BadTag_FieldNumberSlightlyTooHigh # Should have failed to parse, but didn't.