Skip to content

Commit f6fade2

Browse files
committed
Fixed bugs in varint encoding / decoding
Changed enum value encoding to unsigned varint
1 parent 8df9b66 commit f6fade2

File tree

3 files changed

+46
-30
lines changed

3 files changed

+46
-30
lines changed

encoding-format.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ The following table describes how schemer encodes different values. Nullable val
3636
| Variable-size Integer | Each byte's most significant bit indicates more bytes follow. Lower 7 bits are concatenated (in big-endian order) to form [ZigZag-encoded integer](https://developers.google.com/protocol-buffers/docs/encoding?csw=1#types). |
3737
| Floating-point Number | Exactly `4 << n` bytes corresponding to the IEEE 754 binary representation of the floating-point number |
3838
| Complex Number | Exactly `4 << n` bytes for each floating-point number `a` and `b` where the complex number is `a + bi`. |
39-
| Enum | Stored as an unsigned fixed-size integer (see above), where `n` is determined by the number of enumerated values |
39+
| Enum | Stored as an unsigned variable-size integer (see above) |
4040
| Boolean | A single boolean value is encoded as 1 byte. 0 indicates `false` and any other value indicates `true`. |
4141
| Fixed-Length String | UTF-8 encoding of the string, padded with spaces to fit within allotted space. |
4242
| Variable-Length String | Length of the string is encoded as an unsigned variable-size integer followed by UTF-8 encoding of the string |
@@ -56,4 +56,4 @@ The following optimizations modify the above rules for encoding values:
5656
* For objects with a fixed number of nullable or boolean fields, a single bit map is placed a the start of the object
5757
* For arrays of a nullable type, each group of 8 elements is preceded by the corresponding null bit map
5858
* Arrays of type boolean are encoded as bit maps. The final byte's least significant bits are padded with zeros.
59-
* Variable-length arrays and objects w/variable fields may be stored in blocks, where each block indicates the number of elements or key-value pairs. A block size of 0 indicates the end of the array or object. A negative block size indicates that the block size is followed by the number of bytes in the block.
59+
* Variable-length arrays and objects w/variable fields may be stored in blocks, where each block indicates the number of elements or key-value pairs. A block size of 0 indicates the end of the array or object. A negative block size indicates that the block size is followed by the number of bytes in the block.

enum.go

+24-21
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,10 @@ func (s *EnumSchema) Encode(w io.Writer, i interface{}) error {
7575

7676
// EncodeValue uses the schema to write the encoded value of v to the output streamtream
7777
func (s *EnumSchema) EncodeValue(w io.Writer, v reflect.Value) error {
78-
varIntSchema := VarIntSchema{Signed: true, SchemaOptions: SchemaOptions{nullable: s.Nullable()}}
78+
varIntSchema := VarIntSchema{
79+
Signed: false,
80+
SchemaOptions: SchemaOptions{nullable: s.Nullable()},
81+
}
7982
return varIntSchema.EncodeValue(w, v)
8083
}
8184

@@ -92,19 +95,22 @@ func (s *EnumSchema) DecodeValue(r io.Reader, v reflect.Value) error {
9295

9396
// first we decode the actual encoded binary value
9497

95-
varIntSchema := VarIntSchema{Signed: true, SchemaOptions: SchemaOptions{nullable: s.Nullable()}}
98+
varIntSchema := VarIntSchema{
99+
Signed: false,
100+
SchemaOptions: SchemaOptions{nullable: s.Nullable()},
101+
}
96102

97-
var decodedVarInt int64
98-
var intPtr *int64 = &decodedVarInt // we pass in a pointer to varIntSchema.Decode so we can potentially
103+
var decodedVal uint64
104+
var valPtr *uint64 = &decodedVal // we pass in a pointer to varIntSchema.Decode so we can potentially
99105
// get back a nil value [in case we are dealing with a nullable type]
100106

101-
err := varIntSchema.Decode(r, &intPtr)
107+
err := varIntSchema.Decode(r, &valPtr)
102108
if err != nil {
103109
return err
104110
}
105111

106112
// now we check to see if varIntSchema.Decode returned us a nil value
107-
if intPtr == nil {
113+
if valPtr == nil {
108114
if v.Kind() == reflect.Ptr || v.Kind() == reflect.Interface {
109115
if v.CanSet() {
110116
v.Set(reflect.Zero(v.Type()))
@@ -147,7 +153,7 @@ func (s *EnumSchema) DecodeValue(r io.Reader, v reflect.Value) error {
147153

148154
// check to see if the decoded value is in our map of enumerated values
149155
if s.Values != nil {
150-
if _, ok := s.Values[int(decodedVarInt)]; !ok {
156+
if _, ok := s.Values[int(decodedVal)]; !ok {
151157
// however, maybe it makes sense to allow this scenario
152158
// when weak decoding is specified??
153159
if !s.WeakDecoding() {
@@ -173,10 +179,11 @@ func (s *EnumSchema) DecodeValue(r io.Reader, v reflect.Value) error {
173179
case reflect.Int32:
174180
fallthrough
175181
case reflect.Int64:
176-
if v.OverflowInt(decodedVarInt) {
177-
return fmt.Errorf("decoded value %d overflows destination %v", decodedVarInt, k)
182+
intVal := int64(decodedVal)
183+
if uint64(intVal) != decodedVal || v.OverflowInt(intVal) {
184+
return fmt.Errorf("decoded value %d overflows destination %v", decodedVal, k)
178185
}
179-
v.SetInt(int64(decodedVarInt))
186+
v.SetInt(intVal)
180187
case reflect.Uint:
181188
fallthrough
182189
case reflect.Uint8:
@@ -186,31 +193,27 @@ func (s *EnumSchema) DecodeValue(r io.Reader, v reflect.Value) error {
186193
case reflect.Uint32:
187194
fallthrough
188195
case reflect.Uint64:
189-
if decodedVarInt < 0 {
190-
return fmt.Errorf("decoded value %d incompatible with %v", decodedVarInt, k)
191-
}
192-
uintVal := uint64(decodedVarInt)
193-
if v.OverflowUint(uintVal) {
194-
return fmt.Errorf("decoded value %d overflows destination %v", uintVal, k)
196+
if v.OverflowUint(decodedVal) {
197+
return fmt.Errorf("decoded value %d overflows destination %v", decodedVal, k)
195198
}
196-
v.SetUint(uintVal)
199+
v.SetUint(decodedVal)
197200
case reflect.String:
198201
if !s.WeakDecoding() {
199202
return fmt.Errorf("cannot decode enum to string without weak decoding enabled")
200203
}
201204

202205
// if we have the map, return the string value of the constant
203206
if s.Values != nil {
204-
if _, ok := s.Values[int(decodedVarInt)]; ok {
205-
v.SetString(s.Values[int(decodedVarInt)])
207+
if _, ok := s.Values[int(decodedVal)]; ok {
208+
v.SetString(s.Values[int(decodedVal)])
206209
return nil
207210
}
208211
}
209212

210213
// otherwise, just return a string version of the decoded integer value
211-
v.SetString(strconv.FormatInt(decodedVarInt, 10))
214+
v.SetString(strconv.FormatUint(decodedVal, 10))
212215
default:
213-
return fmt.Errorf("decoded value %d incompatible with %v", decodedVarInt, k)
216+
return fmt.Errorf("decoded value %d incompatible with %v", decodedVal, k)
214217
}
215218

216219
return nil

varint.go

+20-7
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,19 @@ func (s *VarIntSchema) EncodeValue(w io.Writer, v reflect.Value) error {
8787
case reflect.Int64:
8888
intVal := v.Int()
8989
// Write value
90-
uintVal := uint64(intVal) << 1
91-
if intVal < 0 {
92-
uintVal = ^uintVal
90+
if s.Signed {
91+
uintVal := uint64(intVal) << 1
92+
if intVal < 0 {
93+
uintVal = ^uintVal
94+
}
95+
return WriteUvarint(w, uintVal)
96+
} else {
97+
if intVal < 0 {
98+
return fmt.Errorf("cannot encode negative integer")
99+
}
100+
uintVal := uint64(intVal)
101+
return WriteUvarint(w, uintVal)
93102
}
94-
return WriteUvarint(w, uintVal)
95103
case reflect.Uint:
96104
fallthrough
97105
case reflect.Uint8:
@@ -102,7 +110,11 @@ func (s *VarIntSchema) EncodeValue(w io.Writer, v reflect.Value) error {
102110
fallthrough
103111
case reflect.Uint64:
104112
uintVal := v.Uint()
105-
return WriteUvarint(w, uintVal)
113+
if s.Signed {
114+
return WriteUvarint(w, uintVal<<1)
115+
} else {
116+
return WriteUvarint(w, uintVal)
117+
}
106118
}
107119

108120
return nil
@@ -239,10 +251,11 @@ func (s *VarIntSchema) DecodeValue(r io.Reader, v reflect.Value) error {
239251
case reflect.Int32:
240252
fallthrough
241253
case reflect.Int64:
242-
if v.OverflowUint(uintVal) {
254+
intVal := int64(uintVal)
255+
if uint64(intVal) != uintVal || v.OverflowInt(intVal) {
243256
return fmt.Errorf("decoded value %d overflows destination %v", uintVal, k)
244257
}
245-
v.SetUint(uintVal)
258+
v.SetInt(intVal)
246259
case reflect.Uint:
247260
fallthrough
248261
case reflect.Uint8:

0 commit comments

Comments
 (0)