From 7c4d5359f403deb4159ef360900a816dc7a42236 Mon Sep 17 00:00:00 2001 From: Aaron Barany Date: Mon, 6 Feb 2023 17:41:47 -0800 Subject: [PATCH] Add WRITE_MINIMAL_DOUBLES feature to CBORGenerator Enabling WRITE_MINIMAL_DOUBLES works similarly to WRITE_MINIMAL_INTS, choosing to use a smaller data representation (single-precision float) when the conversion will result in no loss of data. This can provide a substantial savings for data that contains many doubles that don't require full precision. Cleaned up some of the writing code for floats and doubles to avoid code duplication, and reserve the proper amount of space for output for both types. --- .../dataformat/cbor/CBORGenerator.java | 101 +++++++++++------- .../cbor/gen/ArrayGenerationTest.java | 58 ++++++++++ .../cbor/mapper/CBORMapperTest.java | 32 ++++-- 3 files changed, 142 insertions(+), 49 deletions(-) diff --git a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java index 022ac0c19..4601705e0 100644 --- a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java +++ b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java @@ -102,6 +102,19 @@ public enum Feature implements FormatFeature { * @since 2.15 */ STRINGREF(false), + + /** + * Feature that determines whether generator should try to write doubles + * as floats: if {@code true}, will write a {@code double} as a 4-byte float if no + * precision loss will occur; if {@code false}, will always write a {@code double} + * as an 8-byte double. + *

+ * Default value is {@code false} meaning that doubles will always be written as + * 8-byte values. + * + * @since 2.15 + */ + WRITE_MINIMAL_DOUBLES(false), ; protected final boolean _defaultState; @@ -177,6 +190,9 @@ public int getMask() { protected boolean _cfgMinimalInts; + // @since 2.15 + protected boolean _cfgMinimalDoubles; + /* /********************************************************** /* Output state @@ -275,6 +291,7 @@ public CBORGenerator(IOContext ctxt, int stdFeatures, int formatFeatures, _streamWriteContext = CBORWriteContext.createRootContext(dups); _formatFeatures = formatFeatures; _cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(formatFeatures); + _cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(formatFeatures); _ioContext = ctxt; _out = out; _bufferRecyclable = true; @@ -311,6 +328,7 @@ public CBORGenerator(IOContext ctxt, int stdFeatures, int formatFeatures, _streamWriteContext = CBORWriteContext.createRootContext(dups); _formatFeatures = formatFeatures; _cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(formatFeatures); + _cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(formatFeatures); _ioContext = ctxt; _out = out; _bufferRecyclable = bufferRecyclable; @@ -413,6 +431,7 @@ public JsonGenerator overrideFormatFeatures(int values, int mask) { if (oldState != newState) { _formatFeatures = newState; _cfgMinimalInts = Feature.WRITE_MINIMAL_INTS.enabledIn(newState); + _cfgMinimalDoubles = Feature.WRITE_MINIMAL_DOUBLES.enabledIn(newState); } return this; } @@ -458,6 +477,8 @@ public CBORGenerator enable(Feature f) { _formatFeatures |= f.getMask(); if (f == Feature.WRITE_MINIMAL_INTS) { _cfgMinimalInts = true; + } else if (f == Feature.WRITE_MINIMAL_DOUBLES) { + _cfgMinimalDoubles = true; } return this; } @@ -466,6 +487,8 @@ public CBORGenerator disable(Feature f) { _formatFeatures &= ~f.getMask(); if (f == Feature.WRITE_MINIMAL_INTS) { _cfgMinimalInts = false; + } else if (f == Feature.WRITE_MINIMAL_DOUBLES) { + _cfgMinimalDoubles = false; } return this; } @@ -691,8 +714,14 @@ public void writeArray(double[] array, int offset, int length) throws IOExceptio // short-cut, do not create child array context etc _verifyValueWrite("write int array"); _writeLengthMarker(PREFIX_TYPE_ARRAY, length); - for (int i = offset, end = offset+length; i < end; ++i) { - _writeDoubleNoCheck(array[i]); + if (_cfgMinimalDoubles) { + for (int i = offset, end = offset+length; i < end; ++i) { + _writeDoubleMinimal(array[i]); + } + } else { + for (int i = offset, end = offset+length; i < end; ++i) { + _writeDoubleNoCheck(array[i]); + } } } @@ -786,8 +815,24 @@ private final void _writeLongNoCheck(long l) throws IOException _outputBuffer[_outputTail++] = (byte) i; } + private final void _writeFloatNoCheck(float f) throws IOException { + _ensureRoomForOutput(5); + /* + * 17-Apr-2010, tatu: could also use 'floatToIntBits', but it seems more + * accurate to use exact representation; and possibly faster. However, + * if there are cases where collapsing of NaN was needed (for non-Java + * clients), this can be changed + */ + int i = Float.floatToRawIntBits(f); + _outputBuffer[_outputTail++] = BYTE_FLOAT32; + _outputBuffer[_outputTail++] = (byte) (i >> 24); + _outputBuffer[_outputTail++] = (byte) (i >> 16); + _outputBuffer[_outputTail++] = (byte) (i >> 8); + _outputBuffer[_outputTail++] = (byte) i; + } + private final void _writeDoubleNoCheck(double d) throws IOException { - _ensureRoomForOutput(11); + _ensureRoomForOutput(9); // 17-Apr-2010, tatu: could also use 'doubleToIntBits', but it seems // more accurate to use exact representation; and possibly faster. // However, if there are cases where collapsing of NaN was needed (for @@ -807,6 +852,15 @@ private final void _writeDoubleNoCheck(double d) throws IOException { _outputBuffer[_outputTail++] = (byte) i; } + private final void _writeDoubleMinimal(double d) throws IOException { + float f = (float)d; + if (f == d) { + _writeFloatNoCheck(f); + } else { + _writeDoubleNoCheck(d); + } + } + /* /*********************************************************** /* Output method implementations, textual @@ -1178,46 +1232,17 @@ protected void _write(BigInteger v) throws IOException { @Override public void writeNumber(double d) throws IOException { _verifyValueWrite("write number"); - _ensureRoomForOutput(11); - /* - * 17-Apr-2010, tatu: could also use 'doubleToIntBits', but it seems - * more accurate to use exact representation; and possibly faster. - * However, if there are cases where collapsing of NaN was needed (for - * non-Java clients), this can be changed - */ - long l = Double.doubleToRawLongBits(d); - _outputBuffer[_outputTail++] = BYTE_FLOAT64; - - int i = (int) (l >> 32); - _outputBuffer[_outputTail++] = (byte) (i >> 24); - _outputBuffer[_outputTail++] = (byte) (i >> 16); - _outputBuffer[_outputTail++] = (byte) (i >> 8); - _outputBuffer[_outputTail++] = (byte) i; - i = (int) l; - _outputBuffer[_outputTail++] = (byte) (i >> 24); - _outputBuffer[_outputTail++] = (byte) (i >> 16); - _outputBuffer[_outputTail++] = (byte) (i >> 8); - _outputBuffer[_outputTail++] = (byte) i; + if (_cfgMinimalDoubles) { + _writeDoubleMinimal(d); + } else { + _writeDoubleNoCheck(d); + } } @Override public void writeNumber(float f) throws IOException { - // Ok, now, we needed token type byte plus 5 data bytes (7 bits each) - _ensureRoomForOutput(6); _verifyValueWrite("write number"); - - /* - * 17-Apr-2010, tatu: could also use 'floatToIntBits', but it seems more - * accurate to use exact representation; and possibly faster. However, - * if there are cases where collapsing of NaN was needed (for non-Java - * clients), this can be changed - */ - int i = Float.floatToRawIntBits(f); - _outputBuffer[_outputTail++] = BYTE_FLOAT32; - _outputBuffer[_outputTail++] = (byte) (i >> 24); - _outputBuffer[_outputTail++] = (byte) (i >> 16); - _outputBuffer[_outputTail++] = (byte) (i >> 8); - _outputBuffer[_outputTail++] = (byte) i; + _writeFloatNoCheck(f); } @Override diff --git a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/gen/ArrayGenerationTest.java b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/gen/ArrayGenerationTest.java index e12957d4e..a098b5606 100644 --- a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/gen/ArrayGenerationTest.java +++ b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/gen/ArrayGenerationTest.java @@ -150,6 +150,64 @@ public void testMinimalIntValuesForLong() throws Exception p.close(); } + public void testMinimalFloatValuesForDouble() throws Exception + { + // Array with 2 values, one that can be represented as a float without losing precision and + // one that cannot. + final double[] input = new double[] { + 1.5, // can be exactly represented as a float + 0.123456789 // must be kept as double + }; + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = FACTORY.createGenerator(bytes); + assertFalse(gen.isEnabled(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES)); + gen.enable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES); + gen.writeArray(input, 0, 2); + gen.close(); + + // With minimal doubles enabled, should get: + byte[] encoded = bytes.toByteArray(); + assertEquals(15, encoded.length); + + // then verify contents + + CBORParser p = FACTORY.createParser(encoded); + assertToken(JsonToken.START_ARRAY, p.nextToken()); + assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken()); + assertEquals(NumberType.FLOAT, p.getNumberType()); + assertEquals(input[0], p.getDoubleValue()); + assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken()); + assertEquals(NumberType.DOUBLE, p.getNumberType()); + assertEquals(input[1], p.getDoubleValue()); + assertToken(JsonToken.END_ARRAY, p.nextToken()); + p.close(); + + // but then also check without minimization + bytes = new ByteArrayOutputStream(); + gen = FACTORY.createGenerator(bytes); + gen.disable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES); + + gen.writeArray(input, 0, 2); + gen.close(); + + // With default settings, should get: + encoded = bytes.toByteArray(); + assertEquals(19, encoded.length); + + // then verify contents + + p = FACTORY.createParser(encoded); + assertToken(JsonToken.START_ARRAY, p.nextToken()); + assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken()); + assertEquals(NumberType.DOUBLE, p.getNumberType()); + assertEquals(input[0], p.getDoubleValue()); + assertToken(JsonToken.VALUE_NUMBER_FLOAT, p.nextToken()); + assertEquals(NumberType.DOUBLE, p.getNumberType()); + assertEquals(input[1], p.getDoubleValue()); + assertToken(JsonToken.END_ARRAY, p.nextToken()); + p.close(); + } + private void _testIntArray() throws Exception { // first special cases of 0, 1 values _testIntArray(0, 0, 0); diff --git a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/CBORMapperTest.java b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/CBORMapperTest.java index 15dd1b489..a90e9678f 100644 --- a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/CBORMapperTest.java +++ b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/CBORMapperTest.java @@ -4,6 +4,8 @@ import com.fasterxml.jackson.dataformat.cbor.CBORTestBase; import com.fasterxml.jackson.dataformat.cbor.databind.CBORMapper; +import org.junit.Assert; + public class CBORMapperTest extends CBORTestBase { /* @@ -14,23 +16,31 @@ public class CBORMapperTest extends CBORTestBase public void testStreamingFeaturesViaMapper() throws Exception { - final Integer SMALL_INT = Integer.valueOf(3); - CBORMapper mapperWithMinimalInts = CBORMapper.builder() + final int SMALL_INT = 3; + final int BIG_INT = 0x7FFFFFFF; + final double LOW_RPECISION_DOUBLE = 1.5; + final double HIGH_RPECISION_DOUBLE = 0.123456789; + Object[] values = {SMALL_INT, BIG_INT, LOW_RPECISION_DOUBLE, HIGH_RPECISION_DOUBLE}; + Object[] minimalValues = { + SMALL_INT, BIG_INT, (float)LOW_RPECISION_DOUBLE, HIGH_RPECISION_DOUBLE}; + CBORMapper mapperWithMinimal = CBORMapper.builder() .enable(CBORGenerator.Feature.WRITE_MINIMAL_INTS) + .enable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES) .build(); - byte[] encodedMinimal = mapperWithMinimalInts.writeValueAsBytes(SMALL_INT); - assertEquals(1, encodedMinimal.length); + byte[] encodedMinimal = mapperWithMinimal.writeValueAsBytes(values); + assertEquals(21, encodedMinimal.length); - CBORMapper mapperFullInts = CBORMapper.builder() + CBORMapper mapperFull = CBORMapper.builder() .disable(CBORGenerator.Feature.WRITE_MINIMAL_INTS) + .disable(CBORGenerator.Feature.WRITE_MINIMAL_DOUBLES) .build(); - byte[] encodedNotMinimal = mapperFullInts.writeValueAsBytes(SMALL_INT); - assertEquals(5, encodedNotMinimal.length); + byte[] encodedNotMinimal = mapperFull.writeValueAsBytes(values); + assertEquals(29, encodedNotMinimal.length); // And then verify we can read it back, either way - assertEquals(SMALL_INT, mapperWithMinimalInts.readValue(encodedMinimal, Object.class)); - assertEquals(SMALL_INT, mapperWithMinimalInts.readValue(encodedNotMinimal, Object.class)); - assertEquals(SMALL_INT, mapperFullInts.readValue(encodedMinimal, Object.class)); - assertEquals(SMALL_INT, mapperFullInts.readValue(encodedNotMinimal, Object.class)); + Assert.assertArrayEquals(minimalValues, mapperWithMinimal.readValue(encodedMinimal, Object[].class)); + Assert.assertArrayEquals(values, mapperWithMinimal.readValue(encodedNotMinimal, Object[].class)); + Assert.assertArrayEquals(minimalValues, mapperFull.readValue(encodedMinimal, Object[].class)); + Assert.assertArrayEquals(values, mapperFull.readValue(encodedNotMinimal, Object[].class)); } }