diff --git a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORConstants.java b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORConstants.java index 77208290b..8fec6eae9 100644 --- a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORConstants.java +++ b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORConstants.java @@ -57,6 +57,18 @@ public final class CBORConstants */ public final static int TAG_ID_SELF_DESCRIBE = 55799; + /** + * Tag denoting a namespace for string references in the following value. + * @since 2.15 + */ + public final static int TAG_ID_STRINGREF_NAMESPACE = 256; + + /** + * Tag denoting the next integer value should be an index for a previous string. + * @since 2.15 + */ + public final static int TAG_ID_STRINGREF = 25; + /* /********************************************************** /* Actual type and marker bytes @@ -141,4 +153,13 @@ public static boolean hasMajorType(int expType, byte encoded) { int actual = (encoded & MASK_MAJOR_TYPE) >> 5; return (actual == expType); } + + public static boolean shouldReferenceString(int index, int stringBytes) { + // See table in specification: http://cbor.schmorp.de/stringref + // Only support 32-bit indices. + return (index >= 0 && index <= 23 && stringBytes >= 3) || + (index >= 24 && index <= 255 && stringBytes >= 4) || + (index >= 256 && index <= 65535 && stringBytes >= 5) || + (index >= 65536 && stringBytes >= 7); + } } diff --git a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORFactory.java b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORFactory.java index d751e4fa1..386f7462f 100644 --- a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORFactory.java +++ b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORFactory.java @@ -453,6 +453,9 @@ private final CBORGenerator _createCBORGenerator(IOContext ctxt, if (CBORGenerator.Feature.WRITE_TYPE_HEADER.enabledIn(formatFeat)) { gen.writeTag(CBORConstants.TAG_ID_SELF_DESCRIBE); } + if (CBORGenerator.Feature.STRINGREF.enabledIn(formatFeat)) { + gen.writeTag(CBORConstants.TAG_ID_STRINGREF_NAMESPACE); + } return gen; } diff --git a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java index 6d72a9d7a..022ac0c19 100644 --- a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java +++ b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORGenerator.java @@ -1,9 +1,12 @@ package com.fasterxml.jackson.dataformat.cbor; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.io.*; import java.math.BigDecimal; import java.math.BigInteger; +import java.util.HashMap; import com.fasterxml.jackson.core.*; import com.fasterxml.jackson.core.base.GeneratorBase; @@ -78,13 +81,27 @@ public enum Feature implements FormatFeature { * incoming String should fail with an exception or silently be output * as the Unicode 'REPLACEMENT CHARACTER' (U+FFFD) or not; if not, * an exception will be thrown to indicate invalid content. - *

+ *

* Default value is {@code false} (for backwards compatibility) meaning that * an invalid surrogate will result in exception ({@link IllegalArgumentException} * * @since 2.12 */ LENIENT_UTF_ENCODING(false), + + /** + * Feature that determines if string references are generated based on the + * stringref) extension. This can save + * storage space, parsing time, and pool string memory when parsing. Readers of the output + * must also support the stringref extension to properly decode the data. Extra overhead may + * be added to generation time and memory usage to compute the shared binary and text + * strings. + *

+ * Default value is {@code false} meaning that the stringref extension will not be used. + * + * @since 2.15 + */ + STRINGREF(false), ; protected final boolean _defaultState; @@ -175,7 +192,7 @@ public int getMask() { /********************************************************** */ - /** + /** * Intermediate buffer in which contents are buffered before being written * using {@link #_out}. */ @@ -236,6 +253,12 @@ public int getMask() { */ protected boolean _bufferRecyclable; + /** + * Table of previously referenced text and binary strings when the STRINGREF feature is used. + * @since 2.15 + */ + protected HashMap _stringRefs; + /* /********************************************************** /* Life-cycle @@ -255,6 +278,7 @@ public CBORGenerator(IOContext ctxt, int stdFeatures, int formatFeatures, _ioContext = ctxt; _out = out; _bufferRecyclable = true; + _stringRefs = Feature.STRINGREF.enabledIn(formatFeatures) ? new HashMap<>() : null; _outputBuffer = ctxt.allocWriteEncodingBuffer(BYTE_BUFFER_FOR_OUTPUT); _outputEnd = _outputBuffer.length; _charBuffer = ctxt.allocConcatBuffer(); @@ -292,6 +316,7 @@ public CBORGenerator(IOContext ctxt, int stdFeatures, int formatFeatures, _bufferRecyclable = bufferRecyclable; _outputTail = offset; _outputBuffer = outputBuffer; + _stringRefs = Feature.STRINGREF.enabledIn(formatFeatures) ? new HashMap<>() : null; _outputEnd = _outputBuffer.length; _charBuffer = ctxt.allocConcatBuffer(); _charBufferLength = _charBuffer.length; @@ -488,6 +513,17 @@ public final void writeFieldName(SerializableString name) if (len == 0) { _writeByte(BYTE_EMPTY_STRING); return; + } else if (_stringRefs != null) { + // Check for a string reference. + String str = name.getValue(); + Integer index = _stringRefs.get(str); + if (index != null) { + writeTag(TAG_ID_STRINGREF); + _writeIntMinimal(PREFIX_TYPE_INT_POS, index); + return; + } else if (shouldReferenceString(_stringRefs.size(), len)) { + _stringRefs.put(str, _stringRefs.size()); + } } _writeLengthMarker(PREFIX_TYPE_TEXT, len); _writeBytes(raw, 0, len); @@ -501,42 +537,6 @@ public final void writeFieldId(long id) throws IOException { _writeLongNoCheck(id); } - /* - /********************************************************** - /* Overridden methods, copying with tag-awareness - /********************************************************** - */ - - /** - * Specialize {@link JsonGenerator#copyCurrentEvent} to handle tags. - */ - @Override - public void copyCurrentEvent(JsonParser p) throws IOException { - maybeCopyTag(p); - super.copyCurrentEvent(p); - } - - /** - * Specialize {@link JsonGenerator#copyCurrentStructure} to handle tags. - */ - @Override - public void copyCurrentStructure(JsonParser p) throws IOException { - maybeCopyTag(p); - super.copyCurrentStructure(p); - } - - protected void maybeCopyTag(JsonParser p) throws IOException { - if (p instanceof CBORParser) { - if (p.hasCurrentToken()) { - final int currentTag = ((CBORParser) p).getCurrentTag(); - - if (currentTag != -1) { - writeTag(currentTag); - } - } - } - } - /* /********************************************************** /* Output method implementations, structural @@ -831,6 +831,17 @@ public final void writeString(SerializableString sstr) throws IOException { if (len == 0) { _writeByte(BYTE_EMPTY_STRING); return; + } else if (_stringRefs != null) { + // Check for a string reference. + String str = sstr.getValue(); + Integer index = _stringRefs.get(str); + if (index != null) { + writeTag(TAG_ID_STRINGREF); + _writeIntMinimal(PREFIX_TYPE_INT_POS, index); + return; + } else if (shouldReferenceString(_stringRefs.size(), len)) { + _stringRefs.put(str, _stringRefs.size()); + } } _writeLengthMarker(PREFIX_TYPE_TEXT, len); _writeBytes(raw, 0, len); @@ -840,11 +851,24 @@ public final void writeString(SerializableString sstr) throws IOException { public void writeString(char[] text, int offset, int len) throws IOException { _verifyValueWrite("write String value"); + String str = null; if (len == 0) { _writeByte(BYTE_EMPTY_STRING); return; + } else if (_stringRefs != null && len <= MAX_LONG_STRING_CHARS) { + // Check for a string reference. + str = new String(text, offset, len); + Integer index = _stringRefs.get(str); + if (index != null) { + writeTag(TAG_ID_STRINGREF); + _writeIntMinimal(PREFIX_TYPE_INT_POS, index); + return; + } + } + int actual = _writeString(text, offset, len); + if (str != null && shouldReferenceString(_stringRefs.size(), actual)) { + _stringRefs.put(str, _stringRefs.size()); } - _writeString(text, offset, len); } @Override @@ -855,6 +879,17 @@ public void writeRawUTF8String(byte[] raw, int offset, int len) if (len == 0) { _writeByte(BYTE_EMPTY_STRING); return; + } else if (_stringRefs != null) { + // Check for a string reference. + String str = new String(raw, offset, len, StandardCharsets.UTF_8); + Integer index = _stringRefs.get(str); + if (index != null) { + writeTag(TAG_ID_STRINGREF); + _writeIntMinimal(PREFIX_TYPE_INT_POS, index); + return; + } else if (shouldReferenceString(_stringRefs.size(), len)) { + _stringRefs.put(str, _stringRefs.size()); + } } _writeLengthMarker(PREFIX_TYPE_TEXT, len); _writeBytes(raw, 0, len); @@ -924,8 +959,25 @@ public void writeBinary(Base64Variant b64variant, byte[] data, int offset, return; } _verifyValueWrite("write Binary value"); + ByteBuffer bytesRef = null; + if (_stringRefs != null) { + bytesRef = ByteBuffer.wrap(data, offset, len); + Integer index = _stringRefs.get(bytesRef); + if (index != null) { + writeTag(TAG_ID_STRINGREF); + _writeIntMinimal(PREFIX_TYPE_INT_POS, index); + return; + } + } + _writeLengthMarker(PREFIX_TYPE_BYTES, len); _writeBytes(data, offset, len); + + if (bytesRef != null && shouldReferenceString(_stringRefs.size(), len)) { + // Store a copy of the data to ensure that modifications don't corrupt the lookup table. + _stringRefs.put(ByteBuffer.wrap(Arrays.copyOfRange(data, offset, len)), + _stringRefs.size()); + } } @Override @@ -943,8 +995,29 @@ public int writeBinary(InputStream data, int dataLength) throws IOException { _verifyValueWrite("write Binary value"); int missing; - _writeLengthMarker(PREFIX_TYPE_BYTES, dataLength); - missing = _writeBytes(data, dataLength); + if (_stringRefs == null) { + _writeLengthMarker(PREFIX_TYPE_BYTES, dataLength); + missing = _writeBytes(data, dataLength); + } else { + // When computing string references must have the data available ahead of time. + byte[] bytes = new byte[dataLength]; + missing = dataLength - data.read(bytes); + if (missing == 0) { + ByteBuffer bytesRef = ByteBuffer.wrap(bytes); + Integer index = _stringRefs.get(bytesRef); + if (index != null) { + writeTag(TAG_ID_STRINGREF); + _writeIntMinimal(PREFIX_TYPE_INT_POS, index); + } else { + _writeLengthMarker(PREFIX_TYPE_BYTES, dataLength); + _writeBytes(bytes, 0, dataLength); + if (shouldReferenceString(_stringRefs.size(), dataLength)) { + _stringRefs.put(bytesRef, _stringRefs.size()); + } + } + } + } + if (missing > 0) { _reportError("Too few bytes available: missing " + missing + " bytes (out of " + dataLength + ")"); @@ -1083,8 +1156,23 @@ protected void _write(BigInteger v) throws IOException { } byte[] data = v.toByteArray(); final int len = data.length; - _writeLengthMarker(PREFIX_TYPE_BYTES, len); - _writeBytes(data, 0, len); + if (_stringRefs == null) { + _writeLengthMarker(PREFIX_TYPE_BYTES, len); + _writeBytes(data, 0, len); + } else { + ByteBuffer bytesRef = ByteBuffer.wrap(data); + Integer index = _stringRefs.get(bytesRef); + if (index != null) { + writeTag(TAG_ID_STRINGREF); + _writeIntMinimal(PREFIX_TYPE_INT_POS, index); + } else { + _writeLengthMarker(PREFIX_TYPE_BYTES, len); + _writeBytes(data, 0, len); + if (shouldReferenceString(_stringRefs.size(), len)) { + _stringRefs.put(bytesRef, _stringRefs.size()); + } + } + } } @Override @@ -1320,11 +1408,26 @@ protected final void _writeString(String name) throws IOException { _writeByte(BYTE_EMPTY_STRING); return; } + + // Check if this is a previously referenced string. This will only be done for strings that + // have a definite length. + if (_stringRefs != null && len <= MAX_LONG_STRING_CHARS) { + Integer index = _stringRefs.get(name); + if (index != null) { + writeTag(TAG_ID_STRINGREF); + _writeIntMinimal(PREFIX_TYPE_INT_POS, index); + return; + } + } + // Actually, let's not bother with copy for shortest strings if (len <= MAX_SHORT_STRING_CHARS) { - _ensureSpace(MAX_SHORT_STRING_BYTES); // can afford approximate - // length + _ensureSpace(MAX_SHORT_STRING_BYTES); // can afford approximate length int actual = _encode(_outputTail + 1, name, len); + // Store reference for later if valid to do so. + if (_stringRefs != null && shouldReferenceString(_stringRefs.size(), actual)) { + _stringRefs.put(name, _stringRefs.size()); + } final byte[] buf = _outputBuffer; int ix = _outputTail; if (actual <= MAX_SHORT_STRING_CHARS) { // fits in prefix byte @@ -1346,7 +1449,13 @@ protected final void _writeString(String name) throws IOException { .max(_charBuffer.length + 32, len)]; } name.getChars(0, len, cbuf, 0); - _writeString(cbuf, 0, len); + int actual = _writeString(cbuf, 0, len); + // Store reference for later if valid to do so. Actual length will be negative if an + // indefinite length string was written. + if (actual >= 0 && _stringRefs != null && + shouldReferenceString(_stringRefs.size(), actual)) { + _stringRefs.put(name, _stringRefs.size()); + } } protected final void _ensureSpace(int needed) throws IOException { @@ -1355,7 +1464,7 @@ protected final void _ensureSpace(int needed) throws IOException { } } - protected final void _writeString(char[] text, int offset, int len) + protected final int _writeString(char[] text, int offset, int len) throws IOException { if (len <= MAX_SHORT_STRING_CHARS) { // possibly short string (not necessarily) @@ -1366,14 +1475,14 @@ protected final void _writeString(char[] text, int offset, int len) if (actual <= MAX_SHORT_STRING_CHARS) { // fits in prefix byte buf[ix++] = (byte) (PREFIX_TYPE_TEXT + actual); _outputTail = ix + actual; - return; + return actual; } // no, have to move. Blah. System.arraycopy(buf, ix + 1, buf, ix + 2, actual); buf[ix++] = BYTE_STRING_1BYTE_LEN; buf[ix++] = (byte) actual; _outputTail = ix + actual; - return; + return actual; } if (len <= MAX_MEDIUM_STRING_CHARS) { _ensureSpace(MAX_MEDIUM_STRING_BYTES); // short enough, can approximate @@ -1384,7 +1493,7 @@ protected final void _writeString(char[] text, int offset, int len) buf[ix++] = BYTE_STRING_1BYTE_LEN; buf[ix++] = (byte) actual; _outputTail = ix + actual; - return; + return actual; } // no, have to move. Blah. System.arraycopy(buf, ix + 2, buf, ix + 3, actual); @@ -1392,22 +1501,23 @@ protected final void _writeString(char[] text, int offset, int len) buf[ix++] = (byte) (actual >> 8); buf[ix++] = (byte) actual; _outputTail = ix + actual; - return; + return actual; } if (len <= MAX_LONG_STRING_CHARS) { // no need to chunk yet // otherwise, long but single chunk _ensureSpace(MAX_LONG_STRING_BYTES); // calculate accurate length to // avoid extra flushing int ix = _outputTail; - int actual = _encode(ix + 3, text, offset, offset+len); + int actual = _encode(ix + 3, text, offset, offset + len); final byte[] buf = _outputBuffer; buf[ix++] = BYTE_STRING_2BYTE_LEN; buf[ix++] = (byte) (actual >> 8); buf[ix++] = (byte) actual; _outputTail = ix + actual; - return; + return actual; } _writeChunkedString(text, offset, len); + return -1; } protected final void _writeChunkedString(char[] text, int offset, int len) diff --git a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORParser.java b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORParser.java index 1dbf429c8..e8603b916 100644 --- a/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORParser.java +++ b/cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORParser.java @@ -5,7 +5,9 @@ import java.math.BigInteger; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Stack; import com.fasterxml.jackson.core.*; import com.fasterxml.jackson.core.base.ParserMinimalBase; @@ -57,6 +59,90 @@ private Feature(boolean defaultState) { @Override public boolean enabledIn(int flags) { return (flags & _mask) != 0; } } + /** + * Class for keeping track of tags in an optimized manner. + * + * @since 2.15 + */ + public static final class TagList + { + public TagList() { + _tags = new int[8]; + _tagCount = 0; + } + + /** + * Gets the number of tags available. + * + * @return The number of tags. + */ + public int size() { + return _tagCount; + } + + /** + * Checks whether the tag list is empty. + * + * @return {@code true} if there are no tags, {@code false} if there are tags.. + */ + public boolean isEmpty() { + return _tagCount == 0; + } + + /** + * Clears the tags from the list. + */ + public void clear() { + _tagCount = 0; + } + + /** + * Adds a tag to the list. + * + * @param tag The tag to add. + */ + public void add(int tag) { + if (_tagCount == _tags.length) { + // Linear growth since we expect a small number of tags. + int[] newTags = new int[_tagCount + 8]; + System.arraycopy(_tags, 0, newTags, 0, _tagCount); + _tags = newTags; + } + + _tags[_tagCount++] = tag; + } + + /** + * Checks if a tag is present. + * + * @param tag The tag to check. + * @return {@code true} if the tag is present, {@code false} if it is not. + */ + public boolean contains(int tag) { + for (int i = 0; i < _tagCount; ++i) { + if (_tags[i] == tag) { + return true; + } + } + return false; + } + + /** + * Gets the first tag in the list. This is primarily to support the legacy API. + * + * @return The first tag or -1 if there are no tags. + */ + public int getFirstTag() { + if (_tagCount == 0) { + return -1; + } + return _tags[0]; + } + + private int[] _tags; + private int _tagCount; + } + private final static Charset UTF8 = StandardCharsets.UTF_8; private final static int[] UTF8_UNIT_CODES = CBORConstants.sUtf8UnitLengths; @@ -183,7 +269,7 @@ private Feature(boolean defaultState) { /** * Information about parser context, context in which * the next token is to be parsed (root, array, object). - *

+ *

* NOTE: before 2.13 was "_parsingContext" */ protected CBORReadContext _streamReadContext; @@ -229,9 +315,10 @@ private Feature(boolean defaultState) { private int _chunkLeft, _chunkEnd; /** - * We will keep track of tag value for possible future use. + * We will keep track of tag values for possible future use. + * @since 2.15 */ - protected int _tagValue = -1; + protected TagList _tagValues = new TagList(); /** * Flag that indicates that the current token has not yet @@ -245,6 +332,66 @@ private Feature(boolean defaultState) { */ protected int _typeByte; + /** + * Type to keep track of a list of string references. A depth is stored to know when to pop the + * references off the stack for nested namespaces. + * + * @since 2.15 + */ + protected static final class StringRefList + { + public StringRefList(int depth) { + this.depth = depth; + } + + public ArrayList stringRefs = new ArrayList<>(); + public int depth; + } + + /** + * Type to keep a stack of string refs based on namespaces within the document. + * + * @since 2.15 + */ + protected static final class StringRefListStack { + public void push(boolean hasNamespace) { + if (hasNamespace) { + _stringRefs.push(new StringRefList(_nestedDepth)); + } + ++_nestedDepth; + } + + public void pop() { + --_nestedDepth; + if (!_stringRefs.empty() && _stringRefs.peek().depth == _nestedDepth) { + _stringRefs.pop(); + } + } + + public StringRefList peek() { + return _stringRefs.peek(); + } + + public boolean empty() { + return _stringRefs.empty(); + } + + private Stack _stringRefs = _stringRefs = new Stack<>(); + private int _nestedDepth = 0; + } + + /** + * Stack of text and binary string references. + * @since 2.15 + */ + protected StringRefListStack _stringRefs = new StringRefListStack(); + + /** + * Shared string that should be used in place of _textBuffer when a string reference is used. + * @since 2.15 + */ + protected String _sharedString; + /* /********************************************************** /* Input source config, state (from ex StreamBasedParserBase) @@ -268,7 +415,7 @@ private Feature(boolean defaultState) { /** * Flag that indicates whether the input buffer is recycable (and * needs to be returned to recycler once we are done) or not. - *

+ *

* If it is not, it also means that parser can NOT modify underlying * buffer. */ @@ -447,7 +594,18 @@ public JacksonFeatureSet getReadCapabilities() { * @since 2.5 */ public int getCurrentTag() { - return _tagValue; + return _tagValues.getFirstTag(); + } + + /** + * Method that can be used to access all tag ids associated with + * the most recently decoded value (whether completely, for + * scalar values, or partially, for Objects/Arrays), if any. + * + * @since 2.15 + */ + public TagList getCurrentTags() { + return _tagValues; } /* @@ -564,7 +722,7 @@ public boolean hasTextCharacters() { if (_currToken == JsonToken.VALUE_STRING) { // yes; is or can be made available efficiently as char[] - return _textBuffer.hasTextAsCharacters(); + return _sharedString != null || _textBuffer.hasTextAsCharacters(); } if (_currToken == JsonToken.FIELD_NAME) { // not necessarily; possible but: @@ -582,19 +740,19 @@ public boolean hasTextCharacters() */ protected void _releaseBuffers() throws IOException { - if (_bufferRecyclable) { - byte[] buf = _inputBuffer; - if (buf != null) { - _inputBuffer = null; - _ioContext.releaseReadIOBuffer(buf); - } - } - _textBuffer.releaseBuffers(); - char[] buf = _nameCopyBuffer; - if (buf != null) { - _nameCopyBuffer = null; - _ioContext.releaseNameCopyBuffer(buf); - } + if (_bufferRecyclable) { + byte[] buf = _inputBuffer; + if (buf != null) { + _inputBuffer = null; + _ioContext.releaseReadIOBuffer(buf); + } + } + _textBuffer.releaseBuffers(); + char[] buf = _nameCopyBuffer; + if (buf != null) { + _nameCopyBuffer = null; + _ioContext.releaseNameCopyBuffer(buf); + } } /* @@ -620,9 +778,10 @@ public JsonToken nextToken() throws IOException // as well as handle names for Object entries. if (_streamReadContext.inObject()) { if (_currToken != JsonToken.FIELD_NAME) { - _tagValue = -1; + _tagValues.clear(); // completed the whole Object? if (!_streamReadContext.expectMoreValues()) { + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); return (_currToken = JsonToken.END_OBJECT); } @@ -630,7 +789,8 @@ public JsonToken nextToken() throws IOException } } else { if (!_streamReadContext.expectMoreValues()) { - _tagValue = -1; + _stringRefs.pop(); + _tagValues.clear(); _streamReadContext = _streamReadContext.getParent(); return (_currToken = JsonToken.END_ARRAY); } @@ -645,8 +805,9 @@ public JsonToken nextToken() throws IOException int lowBits = ch & 0x1F; // One special case: need to consider tag as prefix first: - if (type == 6) { - _tagValue = Integer.valueOf(_decodeTag(lowBits)); + _tagValues.clear(); + while (type == 6) { + _tagValues.add(_decodeTag(lowBits)); if (_inputPtr >= _inputEnd) { if (!loadMore()) { return _eofAsNextToken(); @@ -655,9 +816,10 @@ public JsonToken nextToken() throws IOException ch = _inputBuffer[_inputPtr++] & 0xFF; type = (ch >> 5); lowBits = ch & 0x1F; - } else { - _tagValue = -1; } + + boolean stringrefNamespace = _tagValues.contains(TAG_ID_STRINGREF_NAMESPACE); + switch (type) { case 0: // positive int _numTypesValid = NR_INT; @@ -701,6 +863,9 @@ public JsonToken nextToken() throws IOException _invalidToken(ch); } } + if (!_tagValues.isEmpty()) { + return _handleTaggedInt(_tagValues); + } return (_currToken = JsonToken.VALUE_NUMBER_INT); case 1: // negative int _numTypesValid = NR_INT; @@ -749,8 +914,8 @@ public JsonToken nextToken() throws IOException case 2: // byte[] _typeByte = ch; _tokenIncomplete = true; - if (_tagValue >= 0) { - return _handleTaggedBinary(_tagValue); + if (!_tagValues.isEmpty()) { + return _handleTaggedBinary(_tagValues); } return (_currToken = JsonToken.VALUE_EMBEDDED_OBJECT); @@ -760,16 +925,18 @@ public JsonToken nextToken() throws IOException return (_currToken = JsonToken.VALUE_STRING); case 4: // Array + _stringRefs.push(stringrefNamespace); { int len = _decodeExplicitLength(lowBits); - if (_tagValue >= 0) { - return _handleTaggedArray(_tagValue, len); + if (!_tagValues.isEmpty()) { + return _handleTaggedArray(_tagValues, len); } _streamReadContext = _streamReadContext.createChildArrayContext(len); } return (_currToken = JsonToken.START_ARRAY); case 5: // Object + _stringRefs.push(stringrefNamespace); _currToken = JsonToken.START_OBJECT; { int len = _decodeExplicitLength(lowBits); @@ -777,9 +944,6 @@ public JsonToken nextToken() throws IOException } return _currToken; - case 6: // another tag; not allowed - _reportError("Multiple tags not allowed per value (first tag: "+_tagValue+")"); - case 7: default: // misc: tokens, floats switch (lowBits) { @@ -812,6 +976,7 @@ public JsonToken nextToken() throws IOException case 31: // Break if (_streamReadContext.inArray()) { if (!_streamReadContext.hasExpectedLength()) { + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); return (_currToken = JsonToken.END_ARRAY); } @@ -823,8 +988,9 @@ public JsonToken nextToken() throws IOException } } - protected String _numberToName(int ch, boolean neg) throws IOException + protected String _numberToName(int ch, boolean neg, TagList tags) throws IOException { + boolean isStringref = tags.contains(TAG_ID_STRINGREF); final int lowBits = ch & 0x1F; int i; if (lowBits <= 23) { @@ -842,6 +1008,9 @@ protected String _numberToName(int ch, boolean neg) throws IOException // [dataformats-binary#269] (and earlier [dataformats-binary#30]), // got some edge case to consider if (i < 0) { + if (isStringref) { + _reportError("String reference index too large"); + } long l; if (neg) { long unsignedBase = (long) i & 0xFFFFFFFFL; @@ -855,6 +1024,9 @@ protected String _numberToName(int ch, boolean neg) throws IOException break; case 27: { + if (isStringref) { + _reportError("String reference index too large"); + } long l = _decode64Bits(); if (neg) { l = -l - 1L; @@ -869,16 +1041,60 @@ protected String _numberToName(int ch, boolean neg) throws IOException if (neg) { i = -i - 1; } + + if (isStringref) { + if (_stringRefs.empty()) { + _reportError("String reference outside of a namespace"); + } + + StringRefList stringRefs = _stringRefs.peek(); + if (i < 0 || i >= stringRefs.stringRefs.size()) { + _reportError("String reference (" + i + ") out of range"); + } + + Object str = stringRefs.stringRefs.get(i); + if (str instanceof String) { + return (String) str; + } + return new String((byte[]) str, UTF8); + } return String.valueOf(i); } - protected JsonToken _handleTaggedBinary(int tag) throws IOException + protected JsonToken _handleTaggedInt(TagList tags) throws IOException { + // For now all we should get is stringref + if (!tags.contains(TAG_ID_STRINGREF)) { + return (_currToken = JsonToken.VALUE_NUMBER_INT); + } + + if (_stringRefs.empty()) { + _reportError("String reference outside of a namespace"); + } else if (_numTypesValid != NR_INT) { + _reportError("String reference index too large"); + } + + StringRefList stringRefs = _stringRefs.peek(); + + if (_numberInt < 0 || _numberInt >= stringRefs.stringRefs.size()) { + _reportError("String reference (" + _numberInt + ") out of range"); + } + + Object str = stringRefs.stringRefs.get(_numberInt); + if (str instanceof String) { + _sharedString = (String) str; + return (_currToken = JsonToken.VALUE_STRING); + } + _binaryValue = (byte[]) str; + return _handleTaggedBinary(tags); + } + + protected JsonToken _handleTaggedBinary(TagList tags) throws IOException { // For now all we should get is BigInteger boolean neg; - if (tag == TAG_BIGNUM_POS) { + if (tags.contains(TAG_BIGNUM_POS)) { neg = false; - } else if (tag == TAG_BIGNUM_NEG) { + } else if (tags.contains(TAG_BIGNUM_NEG)) { neg = true; } else { // 12-May-2016, tatu: Since that's all we know, let's otherwise @@ -887,7 +1103,9 @@ protected JsonToken _handleTaggedBinary(int tag) throws IOException } // First: get the data - _finishToken(); + if (_tokenIncomplete) { + _finishToken(); + } // [dataformats-binar#261]: handle this special case if (_binaryValue.length == 0) { @@ -901,11 +1119,11 @@ protected JsonToken _handleTaggedBinary(int tag) throws IOException _numberBigInt = nr; } _numTypesValid = NR_BIGINT; - _tagValue = -1; + _tagValues.clear(); return (_currToken = JsonToken.VALUE_NUMBER_INT); } - protected JsonToken _handleTaggedArray(int tag, int len) throws IOException + protected JsonToken _handleTaggedArray(TagList tags, int len) throws IOException { // For simplicity, let's create matching array context -- in perfect // world that wouldn't be necessarily, but in this one there are @@ -913,7 +1131,7 @@ protected JsonToken _handleTaggedArray(int tag, int len) throws IOException _streamReadContext = _streamReadContext.createChildArrayContext(len); // BigDecimal is the only thing we know for sure - if (tag != CBORConstants.TAG_DECIMAL_FRACTION) { + if (!tags.contains(CBORConstants.TAG_DECIMAL_FRACTION)) { return (_currToken = JsonToken.START_ARRAY); } _currToken = JsonToken.START_ARRAY; @@ -960,7 +1178,7 @@ protected JsonToken _handleTaggedArray(int tag, int len) throws IOException * only (1) determine that we are getting {@code JsonToken.VALUE_NUMBER_INT} (if not, * return with no processing) and (2) if so, prepare state so that number accessor * method will work). - *

+ *

* Note that in particular this method DOES NOT reset state that {@code nextToken()} would do, * but will change current token type to allow access. */ @@ -968,7 +1186,8 @@ protected final boolean _checkNextIsIntInArray(final String typeDesc) throws IOE { // We know we are in array, with length prefix so: if (!_streamReadContext.expectMoreValues()) { - _tagValue = -1; + _tagValues.clear(); + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); _currToken = JsonToken.END_ARRAY; return false; @@ -986,9 +1205,12 @@ protected final boolean _checkNextIsIntInArray(final String typeDesc) throws IOE // 01-Nov-2019, tatu: We may actually need tag so decode it, but do not assign // (that'd override tag we already have) - int tagValue = -1; - if (type == 6) { - tagValue = _decodeTag(lowBits); + TagList tagValues = null; + while (type == 6) { + if (tagValues == null) { + tagValues = new TagList(); + } + tagValues.add(_decodeTag(lowBits)); if ((_inputPtr >= _inputEnd) && !loadMore()) { _eofAsNextToken(); return false; @@ -1039,7 +1261,11 @@ protected final boolean _checkNextIsIntInArray(final String typeDesc) throws IOE _invalidToken(ch); } } - _currToken = JsonToken.VALUE_NUMBER_INT; + if (tagValues == null) { + _currToken = JsonToken.VALUE_NUMBER_INT; + } else { + _handleTaggedInt(tagValues); + } return true; case 1: // negative int _numTypesValid = NR_INT; @@ -1088,16 +1314,13 @@ protected final boolean _checkNextIsIntInArray(final String typeDesc) throws IOE case 2: // byte[] // ... but we only really care about very specific case of `BigInteger` - if (tagValue < 0) { + if (tagValues == null) { break; } _typeByte = ch; _tokenIncomplete = true; - _currToken = _handleTaggedBinary(tagValue); + _currToken = _handleTaggedBinary(tagValues); return (_currToken == JsonToken.VALUE_NUMBER_INT); - - case 6: // another tag; not allowed - _reportError("Multiple tags not allowed per value (first tag: "+tagValue+")"); } // Important! Need to push back the last byte read (but not consumed) @@ -1111,7 +1334,8 @@ protected final boolean _checkNextIsEndArray() throws IOException { // We know we are in array, with length prefix, and this is where we should be: if (!_streamReadContext.expectMoreValues()) { - _tagValue = -1; + _tagValues.clear(); + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); _currToken = JsonToken.END_ARRAY; return true; @@ -1124,19 +1348,13 @@ protected final boolean _checkNextIsEndArray() throws IOException int type = (ch >> 5) & 0x7; // No use for tag but removing it is necessary - int tagValue = -1; - if (type == 6) { - tagValue = _decodeTag(ch & 0x1F); + while (type == 6) { if ((_inputPtr >= _inputEnd) && !loadMore()) { _eofAsNextToken(); return false; } ch = _inputBuffer[_inputPtr++]; type = (ch >> 5) & 0x7; - // including but not limited to nested tags (which we do not allow) - if (type == 6) { - _reportError("Multiple tags not allowed per value (first tag: "+tagValue+")"); - } } // and that's what we need to do for safety; now can drop to generic handling: @@ -1177,9 +1395,10 @@ public boolean nextFieldName(SerializableString str) throws IOException } _tokenInputTotal = _currInputProcessed + _inputPtr; _binaryValue = null; - _tagValue = -1; + _tagValues.clear(); // completed the whole Object? if (!_streamReadContext.expectMoreValues()) { + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); _currToken = JsonToken.END_OBJECT; return false; @@ -1189,34 +1408,39 @@ public boolean nextFieldName(SerializableString str) throws IOException // fine; require room for up to 2-byte marker, data itself int ptr = _inputPtr; if ((ptr + byteLen + 1) < _inputEnd) { - final int ch = _inputBuffer[ptr++]; - // only handle usual textual type - if (((ch >> 5) & 0x7) == CBORConstants.MAJOR_TYPE_TEXT) { - int lenMarker = ch & 0x1F; - if (lenMarker <= 24) { - if (lenMarker == 23) { - lenMarker = _inputBuffer[ptr++] & 0xFF; - } - if (lenMarker == byteLen) { - int i = 0; - while (true) { - if (i == lenMarker) { - _inputPtr = ptr+i; - _streamReadContext.setCurrentName(str.getValue()); - _currToken = JsonToken.FIELD_NAME; - return true; - } - if (nameBytes[i] != _inputBuffer[ptr+i]) { - break; - } - ++i; - } - } - } + final int ch = _inputBuffer[ptr++]; + // only handle usual textual type + if (((ch >> 5) & 0x7) == MAJOR_TYPE_TEXT) { + int lenMarker = ch & 0x1F; + if (lenMarker <= 24) { + if (lenMarker == 23) { + lenMarker = _inputBuffer[ptr++] & 0xFF; + } + if (lenMarker == byteLen) { + int i = 0; + while (true) { + if (i == lenMarker) { + _inputPtr = ptr + i; + String strValue = str.getValue(); + if (!_stringRefs.empty() && + shouldReferenceString(_stringRefs.peek().stringRefs.size(), + byteLen)) { + _stringRefs.peek().stringRefs.add(strValue); + } + _streamReadContext.setCurrentName(strValue); + _currToken = JsonToken.FIELD_NAME; + return true; + } + if (nameBytes[i] != _inputBuffer[ptr + i]) { + break; + } + ++i; + } + } + } + } } } - - } // otherwise just fall back to default handling; should occur rarely return (nextToken() == JsonToken.FIELD_NAME) && str.getValue().equals(getCurrentName()); } @@ -1231,9 +1455,10 @@ public String nextFieldName() throws IOException } _tokenInputTotal = _currInputProcessed + _inputPtr; _binaryValue = null; - _tagValue = -1; + _tagValues.clear(); // completed the whole Object? if (!_streamReadContext.expectMoreValues()) { + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); _currToken = JsonToken.END_OBJECT; return null; @@ -1245,25 +1470,43 @@ public String nextFieldName() throws IOException _eofAsNextToken(); } } - final int ch = _inputBuffer[_inputPtr++]; - final int type = ((ch >> 5) & 0x7); + int ch = _inputBuffer[_inputPtr++] & 0xFF; + int type = (ch >> 5); + int lowBits = ch & 0x1F; + + // One special case: need to consider tag as prefix first: + while (type == 6) { + _tagValues.add(_decodeTag(lowBits)); + if (_inputPtr >= _inputEnd) { + if (!loadMore()) { + _eofAsNextToken(); + return null; + } + } + ch = _inputBuffer[_inputPtr++] & 0xFF; + type = (ch >> 5); + lowBits = ch & 0x1F; + } // offline non-String cases, as they are expected to be rare if (type != CBORConstants.MAJOR_TYPE_TEXT) { - if (ch == -1) { // end-of-object, common + if (ch == 0xFF) { // end-of-object, common if (!_streamReadContext.hasExpectedLength()) { + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); _currToken = JsonToken.END_OBJECT; return null; } _reportUnexpectedBreak(); } - _decodeNonStringName(ch); + _decodeNonStringName(ch, _tagValues); _currToken = JsonToken.FIELD_NAME; return getText(); } final int lenMarker = ch & 0x1F; + _sharedString = null; String name; + boolean chunked = false; if (lenMarker <= 23) { if (lenMarker == 0) { name = ""; @@ -1286,11 +1529,17 @@ public String nextFieldName() throws IOException } else { final int actualLen = _decodeExplicitLength(lenMarker); if (actualLen < 0) { + chunked = true; name = _decodeChunkedName(); } else { name = _decodeLongerName(actualLen); } } + if (!chunked && !_stringRefs.empty() && + shouldReferenceString(_stringRefs.peek().stringRefs.size(), lenMarker)) { + _stringRefs.peek().stringRefs.add(name); + _sharedString = name; + } _streamReadContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return name; @@ -1308,13 +1557,13 @@ public String nextTextValue() throws IOException } _tokenInputTotal = _currInputProcessed + _inputPtr; _binaryValue = null; - _tagValue = -1; + _tagValues.clear(); if (_streamReadContext.inObject()) { if (_currToken != JsonToken.FIELD_NAME) { - _tagValue = -1; // completed the whole Object? if (!_streamReadContext.expectMoreValues()) { + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); _currToken = JsonToken.END_OBJECT; return null; @@ -1324,7 +1573,7 @@ public String nextTextValue() throws IOException } } else { if (!_streamReadContext.expectMoreValues()) { - _tagValue = -1; + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); _currToken = JsonToken.END_ARRAY; return null; @@ -1341,8 +1590,8 @@ public String nextTextValue() throws IOException int lowBits = ch & 0x1F; // One special case: need to consider tag as prefix first: - if (type == 6) { - _tagValue = Integer.valueOf(_decodeTag(lowBits)); + while (type == 6) { + _tagValues.add(_decodeTag(lowBits)); if (_inputPtr >= _inputEnd) { if (!loadMore()) { _eofAsNextToken(); @@ -1352,8 +1601,6 @@ public String nextTextValue() throws IOException ch = _inputBuffer[_inputPtr++] & 0xFF; type = (ch >> 5); lowBits = ch & 0x1F; - } else { - _tagValue = -1; } switch (type) { @@ -1399,7 +1646,9 @@ public String nextTextValue() throws IOException _invalidToken(ch); } } - _currToken = JsonToken.VALUE_NUMBER_INT; + if (_handleTaggedInt(_tagValues) == JsonToken.VALUE_STRING) { + return getText(); + } return null; case 1: // negative int _numTypesValid = NR_INT; @@ -1474,9 +1723,6 @@ public String nextTextValue() throws IOException } return null; - case 6: // another tag; not allowed - _reportError("Multiple tags not allowed per value (first tag: "+_tagValue+")"); - case 7: default: // misc: tokens, floats switch (lowBits) { @@ -1516,6 +1762,7 @@ public String nextTextValue() throws IOException case 31: // Break if (_streamReadContext.inArray()) { if (!_streamReadContext.hasExpectedLength()) { + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); _currToken = JsonToken.END_ARRAY; return null; @@ -1582,7 +1829,7 @@ public String getText() throws IOException } } if (t == JsonToken.VALUE_STRING) { - return _textBuffer.contentsAsString(); + return _sharedString == null ? _textBuffer.contentsAsString() : _sharedString; } if (t == null) { // null only before/after document return null; @@ -1604,7 +1851,7 @@ public char[] getTextCharacters() throws IOException _finishToken(); } if (_currToken == JsonToken.VALUE_STRING) { - return _textBuffer.getTextBuffer(); + return _sharedString == null ? _textBuffer.getTextBuffer() : _sharedString.toCharArray(); } if (_currToken == JsonToken.FIELD_NAME) { return _streamReadContext.getCurrentName().toCharArray(); @@ -1626,7 +1873,7 @@ public int getTextLength() throws IOException _finishToken(); } if (_currToken == JsonToken.VALUE_STRING) { - return _textBuffer.size(); + return _sharedString == null ? _textBuffer.size() : _sharedString.length(); } if (_currToken == JsonToken.FIELD_NAME) { return _streamReadContext.getCurrentName().length(); @@ -1655,7 +1902,7 @@ public String getValueAsString() throws IOException } } if (_currToken == JsonToken.VALUE_STRING) { - return _textBuffer.contentsAsString(); + return _sharedString == null ? _textBuffer.contentsAsString() : _sharedString; } if (_currToken == null || _currToken == JsonToken.VALUE_NULL || !_currToken.isScalarValue()) { return null; @@ -1682,7 +1929,12 @@ public int getText(Writer writer) throws IOException } JsonToken t = _currToken; if (t == JsonToken.VALUE_STRING) { - return _textBuffer.contentsToWriter(writer); + if (_sharedString == null) { + return _textBuffer.contentsToWriter(writer); + } else { + writer.write(_sharedString); + return _sharedString.length(); + } } if (t == JsonToken.FIELD_NAME) { String n = _streamReadContext.getCurrentName(); @@ -1691,7 +1943,12 @@ public int getText(Writer writer) throws IOException } if (t != null) { if (t.isNumeric()) { - return _textBuffer.contentsToWriter(writer); + if (_sharedString == null) { + return _textBuffer.contentsToWriter(writer); + } else { + writer.write(_sharedString); + return _sharedString.length(); + } } char[] ch = t.asCharArray(); writer.write(ch); @@ -1762,6 +2019,11 @@ public int readBinaryValue(Base64Variant b64variant, OutputStream out) throws IO _tokenIncomplete = false; int len = _decodeExplicitLength(_typeByte & 0x1F); + if (!_stringRefs.empty()) { + out.write(_finishBytes(len)); + return len; + } + if (len >= 0) { // non-chunked return _readAndWriteBytes(out, len); } @@ -2175,6 +2437,7 @@ protected void convertNumberToBigDecimal() throws IOException protected void _finishToken() throws IOException { _tokenIncomplete = false; + _sharedString = null; int ch = _typeByte; final int type = ((ch >> 5) & 0x7); ch &= 0x1F; @@ -2223,6 +2486,7 @@ && _tryToLoadToHaveAtLeast(needed))) { protected String _finishTextToken(int ch) throws IOException { _tokenIncomplete = false; + _sharedString = null; final int type = ((ch >> 5) & 0x7); ch &= 0x1F; @@ -2261,8 +2525,7 @@ && _tryToLoadToHaveAtLeast(needed))) { return _finishShortText(len); } // If not enough space, need handling similar to chunked - _finishLongText(len); - return _textBuffer.contentsAsString(); + return _finishLongText(len); } private final String _finishShortText(int len) throws IOException @@ -2272,6 +2535,12 @@ private final String _finishShortText(int len) throws IOException outBuf = _textBuffer.expandCurrentSegment(len); } + StringRefList stringRefs = null; + if (!_stringRefs.empty() && + shouldReferenceString(_stringRefs.peek().stringRefs.size(), len)) { + stringRefs = _stringRefs.peek(); + } + int outPtr = 0; int inPtr = _inputPtr; _inputPtr += len; @@ -2284,7 +2553,12 @@ private final String _finishShortText(int len) throws IOException while ((i = inputBuf[inPtr]) >= 0) { outBuf[outPtr++] = (char) i; if (++inPtr == end) { - return _textBuffer.setCurrentAndReturn(outPtr); + String str = _textBuffer.setCurrentAndReturn(outPtr); + if (stringRefs != null) { + stringRefs.stringRefs.add(str); + _sharedString = str; + } + return str; } } final int[] codes = UTF8_UNIT_CODES; @@ -2331,10 +2605,15 @@ private final String _finishShortText(int len) throws IOException } outBuf[outPtr++] = (char) i; } while (inPtr < end); - return _textBuffer.setCurrentAndReturn(outPtr); + String str = _textBuffer.setCurrentAndReturn(outPtr); + if (stringRefs != null) { + stringRefs.stringRefs.add(str); + _sharedString = str; + } + return str; } - private final void _finishLongText(int len) throws IOException + private final String _finishLongText(int len) throws IOException { char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int outPtr = 0; @@ -2392,7 +2671,13 @@ private final void _finishLongText(int len) throws IOException // Ok, let's add char to output: outBuf[outPtr++] = (char) c; } - _textBuffer.setCurrentLength(outPtr); + String str = _textBuffer.setCurrentAndReturn(outPtr); + if (!_stringRefs.empty() && + shouldReferenceString(_stringRefs.peek().stringRefs.size(), len)) { + _stringRefs.peek().stringRefs.add(str); + _sharedString = str; + } + return str; } private final void _finishChunkedText() throws IOException @@ -2578,10 +2863,21 @@ protected byte[] _finishBytes(int len) throws IOException } return _finishChunkedBytes(); } + + StringRefList stringRefs = null; + if (!_stringRefs.empty() && + shouldReferenceString(_stringRefs.peek().stringRefs.size(), len)) { + stringRefs = _stringRefs.peek(); + } + // Non-chunked, contiguous if (len > LONGEST_NON_CHUNKED_BINARY) { // [dataformats-binary#186]: avoid immediate allocation for longest - return _finishLongContiguousBytes(len); + byte[] b = _finishLongContiguousBytes(len); + if (stringRefs != null) { + stringRefs.stringRefs.add(b); + } + return b; } final byte[] b = new byte[len]; @@ -2600,6 +2896,9 @@ protected byte[] _finishBytes(int len) throws IOException ptr += toAdd; len -= toAdd; if (len <= 0) { + if (stringRefs != null) { + stringRefs.stringRefs.add(b); + } return b; } if (!loadMore()) { @@ -2686,23 +2985,40 @@ protected final JsonToken _decodePropertyName() throws IOException _eofAsNextToken(); } } - final int ch = _inputBuffer[_inputPtr++]; - final int type = ((ch >> 5) & 0x7); + int ch = _inputBuffer[_inputPtr++] & 0xFF; + int type = (ch >> 5); + int lowBits = ch & 0x1F; + + // One special case: need to consider tag as prefix first: + while (type == 6) { + _tagValues.add(_decodeTag(lowBits)); + if (_inputPtr >= _inputEnd) { + if (!loadMore()) { + _eofAsNextToken(); + return null; + } + } + ch = _inputBuffer[_inputPtr++] & 0xFF; + type = (ch >> 5); + lowBits = ch & 0x1F; + } // Expecting a String, but may need to allow other types too if (type != CBORConstants.MAJOR_TYPE_TEXT) { // the usual case - if (ch == -1) { + if (ch == 0xFF) { if (!_streamReadContext.hasExpectedLength()) { + _stringRefs.pop(); _streamReadContext = _streamReadContext.getParent(); return JsonToken.END_OBJECT; } _reportUnexpectedBreak(); } // offline non-String cases, as they are expected to be rare - _decodeNonStringName(ch); + _decodeNonStringName(ch, _tagValues); return JsonToken.FIELD_NAME; } final int lenMarker = ch & 0x1F; + boolean chunked = false; String name; if (lenMarker <= 23) { if (lenMarker == 0) { @@ -2726,11 +3042,17 @@ protected final JsonToken _decodePropertyName() throws IOException } else { final int actualLen = _decodeExplicitLength(lenMarker); if (actualLen < 0) { + chunked = true; name = _decodeChunkedName(); } else { name = _decodeLongerName(actualLen); } } + if (!chunked && !_stringRefs.empty() && + shouldReferenceString(_stringRefs.peek().stringRefs.size(), lenMarker)) { + _stringRefs.peek().stringRefs.add(name); + _sharedString = name; + } _streamReadContext.setCurrentName(name); return JsonToken.FIELD_NAME; } @@ -2830,8 +3152,7 @@ private final String _decodeLongerName(int len) throws IOException // or if not, could we read? if (len >= _inputBuffer.length) { // If not enough space, need handling similar to chunked - _finishLongText(len); - return _textBuffer.contentsAsString(); + return _finishLongText(len); } _loadToHaveAtLeast(len); } @@ -2857,14 +3178,14 @@ private final String _decodeChunkedName() throws IOException * Method that handles initial token type recognition for token * that has to be either FIELD_NAME or END_OBJECT. */ - protected final void _decodeNonStringName(int ch) throws IOException + protected final void _decodeNonStringName(int ch, TagList tags) throws IOException { final int type = ((ch >> 5) & 0x7); String name; if (type == CBORConstants.MAJOR_TYPE_INT_POS) { - name = _numberToName(ch, false); + name = _numberToName(ch, false, tags); } else if (type == CBORConstants.MAJOR_TYPE_INT_NEG) { - name = _numberToName(ch, true); + name = _numberToName(ch, true, tags); } else if (type == CBORConstants.MAJOR_TYPE_BYTES) { // 08-Sep-2014, tatu: As per [Issue#5], there are codecs // (f.ex. Perl module "CBOR::XS") that use Binary data... @@ -2886,7 +3207,7 @@ protected final void _decodeNonStringName(int ch) throws IOException /** * Helper method for trying to find specified encoded UTF-8 byte sequence * from symbol table; if successful avoids actual decoding to String. - *

+ *

* NOTE: caller MUST ensure input buffer has enough content. */ private final String _findDecodedFromSymbols(final int len) throws IOException @@ -3356,7 +3677,7 @@ protected JsonToken _decodeUndefinedValue() throws IOException { /** * Helper method that deals with details of decoding unallocated "simple values" * and exposing them as expected token. - *

+ *

* As of Jackson 2.12, simple values are exposed as * {@link JsonToken#VALUE_NUMBER_INT}s, * but in later versions this is planned to be changed to separate value type. @@ -3638,7 +3959,7 @@ protected void _handleEOF() throws JsonParseException { protected JsonToken _eofAsNextToken() throws IOException { // NOTE: here we can and should close input, release buffers, since // this is "hard" EOF, not a boundary imposed by header token. - _tagValue = -1; + _tagValues.clear(); close(); // 30-Jan-2021, tatu: But also MUST verify that end-of-content is actually // allowed (see [dataformats-binary#240] for example) diff --git a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/CBORTestBase.java b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/CBORTestBase.java index 8948ac556..36e635703 100644 --- a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/CBORTestBase.java +++ b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/CBORTestBase.java @@ -95,6 +95,16 @@ protected CBORGenerator lenientUnicodeCborGenerator(ByteArrayOutputStream result .createGenerator(result); } + // @since 2.15 + protected CBORGenerator stringrefCborGenerator(ByteArrayOutputStream result) + throws IOException + { + return cborFactoryBuilder() + .enable(CBORGenerator.Feature.STRINGREF) + .build() + .createGenerator(result); + } + /* /********************************************************** /* Additional assertion methods diff --git a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/ParserWithJsonOrgSampleTest.java b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/ParserWithJsonOrgSampleTest.java index 342c0564b..35b66fc07 100644 --- a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/ParserWithJsonOrgSampleTest.java +++ b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/ParserWithJsonOrgSampleTest.java @@ -47,6 +47,17 @@ public void testJsonSampleDoc() throws IOException verifyJsonSpecSampleDoc(cborParser(data), false, true); } + public void testJsonSampleDocStringref() throws IOException + { + byte[] data = cborDoc( + cborFactoryBuilder().enable(CBORGenerator.Feature.STRINGREF).build(), + SAMPLE_DOC_JSON_SPEC); + verifyJsonSpecSampleDoc(cborParser(data), true, true); + verifyJsonSpecSampleDoc(cborParser(data), true, false); + verifyJsonSpecSampleDoc(cborParser(data), false, false); + verifyJsonSpecSampleDoc(cborParser(data), false, true); + } + protected void verifyJsonSpecSampleDoc(JsonParser p, boolean verifyContents, boolean requireNumbers) throws IOException { diff --git a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/StringrefTest.java b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/StringrefTest.java new file mode 100644 index 000000000..5a305d8e2 --- /dev/null +++ b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/StringrefTest.java @@ -0,0 +1,816 @@ +package com.fasterxml.jackson.dataformat.cbor; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; + +import com.fasterxml.jackson.core.*; +import com.fasterxml.jackson.core.io.SerializedString; + +import static org.junit.Assert.assertArrayEquals; + +/** + * Basic testing for string reference generation added in 2.15. + */ +public class StringrefTest extends CBORTestBase +{ + public void testSimpleObject() throws Exception { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = stringrefCborGenerator(bytes); + assertTrue(gen.isEnabled(CBORGenerator.Feature.STRINGREF)); + + gen.writeStartArray(); + gen.writeStartObject(); + gen.writeNumberField("rank", 4); + gen.writeNumberField("count", 417); + gen.writeStringField("name", "Cocktail"); + gen.writeEndObject(); + gen.writeStartObject(); + gen.writeStringField("name", "Bath"); + gen.writeNumberField("count", 312); + gen.writeNumberField("rank", 4); + gen.writeEndObject(); + gen.writeStartObject(); + gen.writeStringField("name", "Food"); + gen.writeNumberField("count", 691); + gen.writeNumberField("rank", 4); + gen.writeEndObject(); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + assertArrayEquals(_simpleObjectBytes, encoded); + + CBORParser parser = cborParser(encoded); + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + + assertToken(JsonToken.START_OBJECT, parser.nextToken()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + String rankStr = parser.getCurrentName(); + assertEquals("rank", rankStr); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(4, parser.getValueAsInt()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + String countStr = parser.getCurrentName(); + assertEquals("count", countStr); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(417, parser.getValueAsInt()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + String nameStr = parser.getCurrentName(); + assertEquals("name", nameStr); + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertEquals("Cocktail", parser.getText()); + assertToken(JsonToken.END_OBJECT, parser.nextToken()); + + assertToken(JsonToken.START_OBJECT, parser.nextToken()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + assertSame(nameStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertEquals("Bath", parser.getText()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + assertSame(countStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(312, parser.getValueAsInt()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + assertSame(rankStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(4, parser.getValueAsInt()); + assertToken(JsonToken.END_OBJECT, parser.nextToken()); + + assertToken(JsonToken.START_OBJECT, parser.nextToken()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + assertSame(nameStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertEquals("Food", parser.getText()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + assertSame(countStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(691, parser.getValueAsInt()); + assertToken(JsonToken.FIELD_NAME, parser.nextToken()); + assertSame(rankStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(4, parser.getValueAsInt()); + assertToken(JsonToken.END_OBJECT, parser.nextToken()); + + assertToken(JsonToken.END_ARRAY, parser.nextToken()); + } + + public void testSimpleObjectSerializedStrings() throws Exception { + // SerializableString interface takes different code paths. + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = stringrefCborGenerator(bytes); + assertTrue(gen.isEnabled(CBORGenerator.Feature.STRINGREF)); + + gen.writeStartArray(); + gen.writeStartObject(); + gen.writeFieldName(new SerializedString("rank")); + gen.writeNumber(4); + gen.writeFieldName(new SerializedString("count")); + gen.writeNumber(417); + gen.writeFieldName(new SerializedString("name")); + gen.writeString(new SerializedString("Cocktail")); + gen.writeEndObject(); + gen.writeStartObject(); + gen.writeFieldName(new SerializedString("name")); + gen.writeString(new SerializedString("Bath")); + gen.writeFieldName(new SerializedString("count")); + gen.writeNumber(312); + gen.writeFieldName(new SerializedString("rank")); + gen.writeNumber(4); + gen.writeEndObject(); + gen.writeStartObject(); + gen.writeStringField("name", "Food"); + gen.writeFieldName(new SerializedString("count")); + gen.writeNumber(691); + gen.writeFieldName(new SerializedString("rank")); + gen.writeNumber(4); + gen.writeEndObject(); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + assertArrayEquals(_simpleObjectBytes, encoded); + + CBORParser parser = cborParser(encoded); + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + + assertToken(JsonToken.START_OBJECT, parser.nextToken()); + assertTrue(parser.nextFieldName(new SerializedString("rank"))); + String rankStr = parser.getCurrentName(); + assertEquals("rank", rankStr); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(4, parser.getValueAsInt()); + assertTrue(parser.nextFieldName(new SerializedString("count"))); + String countStr = parser.getCurrentName(); + assertEquals("count", countStr); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(417, parser.getValueAsInt()); + assertTrue(parser.nextFieldName(new SerializedString("name"))); + String nameStr = parser.getCurrentName(); + assertEquals("name", nameStr); + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertEquals("Cocktail", parser.getText()); + assertToken(JsonToken.END_OBJECT, parser.nextToken()); + + assertToken(JsonToken.START_OBJECT, parser.nextToken()); + assertTrue(parser.nextFieldName(new SerializedString("name"))); + assertSame(nameStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertEquals("Bath", parser.getText()); + assertTrue(parser.nextFieldName(new SerializedString("count"))); + assertSame(countStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(312, parser.getValueAsInt()); + assertTrue(parser.nextFieldName(new SerializedString("rank"))); + assertSame(rankStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(4, parser.getValueAsInt()); + assertToken(JsonToken.END_OBJECT, parser.nextToken()); + + assertToken(JsonToken.START_OBJECT, parser.nextToken()); + assertTrue(parser.nextFieldName(new SerializedString("name"))); + assertSame(nameStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertEquals("Food", parser.getText()); + assertTrue(parser.nextFieldName(new SerializedString("count"))); + assertSame(countStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(691, parser.getValueAsInt()); + assertTrue(parser.nextFieldName(new SerializedString("rank"))); + assertSame(rankStr, parser.getCurrentName()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(4, parser.getValueAsInt()); + assertToken(JsonToken.END_OBJECT, parser.nextToken()); + + assertToken(JsonToken.END_ARRAY, parser.nextToken()); + } + + public void testStringArray() throws Exception { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = stringrefCborGenerator(bytes); + assertTrue(gen.isEnabled(CBORGenerator.Feature.STRINGREF)); + + gen.writeStartArray(); + gen.writeString("1"); + gen.writeString("222"); + gen.writeString("333"); + gen.writeString("4"); + gen.writeString("555"); + gen.writeString("666"); + gen.writeString("777"); + gen.writeString("888"); + gen.writeString("999"); + gen.writeString("aaa"); + gen.writeString("bbb"); + gen.writeString("ccc"); + gen.writeString("ddd"); + gen.writeString("eee"); + gen.writeString("fff"); + gen.writeString("ggg"); + gen.writeString("hhh"); + gen.writeString("iii"); + gen.writeString("jjj"); + gen.writeString("kkk"); + gen.writeString("lll"); + gen.writeString("mmm"); + gen.writeString("nnn"); + gen.writeString("ooo"); + gen.writeString("ppp"); + gen.writeString("qqq"); + gen.writeString("rrr"); + gen.writeString("333"); + gen.writeString("ssss"); + gen.writeString("qqq"); + gen.writeString("rrr"); + gen.writeString("ssss"); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + verifyStringArray(encoded); + } + + public void testStringArrayFromChars() throws Exception { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = stringrefCborGenerator(bytes); + assertTrue(gen.isEnabled(CBORGenerator.Feature.STRINGREF)); + + gen.writeStartArray(); + writeStringAsCharArray(gen, "1"); + writeStringAsCharArray(gen, "222"); + writeStringAsCharArray(gen, "333"); + writeStringAsCharArray(gen, "4"); + writeStringAsCharArray(gen, "555"); + writeStringAsCharArray(gen, "666"); + writeStringAsCharArray(gen, "777"); + writeStringAsCharArray(gen, "888"); + writeStringAsCharArray(gen, "999"); + writeStringAsCharArray(gen, "aaa"); + writeStringAsCharArray(gen, "bbb"); + writeStringAsCharArray(gen, "ccc"); + writeStringAsCharArray(gen, "ddd"); + writeStringAsCharArray(gen, "eee"); + writeStringAsCharArray(gen, "fff"); + writeStringAsCharArray(gen, "ggg"); + writeStringAsCharArray(gen, "hhh"); + writeStringAsCharArray(gen, "iii"); + writeStringAsCharArray(gen, "jjj"); + writeStringAsCharArray(gen, "kkk"); + writeStringAsCharArray(gen, "lll"); + writeStringAsCharArray(gen, "mmm"); + writeStringAsCharArray(gen, "nnn"); + writeStringAsCharArray(gen, "ooo"); + writeStringAsCharArray(gen, "ppp"); + writeStringAsCharArray(gen, "qqq"); + writeStringAsCharArray(gen, "rrr"); + writeStringAsCharArray(gen, "333"); + writeStringAsCharArray(gen, "ssss"); + writeStringAsCharArray(gen, "qqq"); + writeStringAsCharArray(gen, "rrr"); + writeStringAsCharArray(gen, "ssss"); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + verifyStringArray(encoded); + } + + public void testStringArraySerializedString() throws Exception { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = stringrefCborGenerator(bytes); + assertTrue(gen.isEnabled(CBORGenerator.Feature.STRINGREF)); + + gen.writeStartArray(); + gen.writeString(new SerializedString("1")); + gen.writeString(new SerializedString("222")); + gen.writeString(new SerializedString("333")); + gen.writeString(new SerializedString("4")); + gen.writeString(new SerializedString("555")); + gen.writeString(new SerializedString("666")); + gen.writeString(new SerializedString("777")); + gen.writeString(new SerializedString("888")); + gen.writeString(new SerializedString("999")); + gen.writeString(new SerializedString("aaa")); + gen.writeString(new SerializedString("bbb")); + gen.writeString(new SerializedString("ccc")); + gen.writeString(new SerializedString("ddd")); + gen.writeString(new SerializedString("eee")); + gen.writeString(new SerializedString("fff")); + gen.writeString(new SerializedString("ggg")); + gen.writeString(new SerializedString("hhh")); + gen.writeString(new SerializedString("iii")); + gen.writeString(new SerializedString("jjj")); + gen.writeString(new SerializedString("kkk")); + gen.writeString(new SerializedString("lll")); + gen.writeString(new SerializedString("mmm")); + gen.writeString(new SerializedString("nnn")); + gen.writeString(new SerializedString("ooo")); + gen.writeString(new SerializedString("ppp")); + gen.writeString(new SerializedString("qqq")); + gen.writeString(new SerializedString("rrr")); + gen.writeString(new SerializedString("333")); + gen.writeString(new SerializedString("ssss")); + gen.writeString(new SerializedString("qqq")); + gen.writeString(new SerializedString("rrr")); + gen.writeString(new SerializedString("ssss")); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + verifyStringArray(encoded); + } + + public void testStringArrayUTF8() throws Exception { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = stringrefCborGenerator(bytes); + assertTrue(gen.isEnabled(CBORGenerator.Feature.STRINGREF)); + + gen.writeStartArray(); + writeStringAsUTF8(gen, "1"); + writeStringAsUTF8(gen, "222"); + writeStringAsUTF8(gen, "333"); + writeStringAsUTF8(gen, "4"); + writeStringAsUTF8(gen, "555"); + writeStringAsUTF8(gen, "666"); + writeStringAsUTF8(gen, "777"); + writeStringAsUTF8(gen, "888"); + writeStringAsUTF8(gen, "999"); + writeStringAsUTF8(gen, "aaa"); + writeStringAsUTF8(gen, "bbb"); + writeStringAsUTF8(gen, "ccc"); + writeStringAsUTF8(gen, "ddd"); + writeStringAsUTF8(gen, "eee"); + writeStringAsUTF8(gen, "fff"); + writeStringAsUTF8(gen, "ggg"); + writeStringAsUTF8(gen, "hhh"); + writeStringAsUTF8(gen, "iii"); + writeStringAsUTF8(gen, "jjj"); + writeStringAsUTF8(gen, "kkk"); + writeStringAsUTF8(gen, "lll"); + writeStringAsUTF8(gen, "mmm"); + writeStringAsUTF8(gen, "nnn"); + writeStringAsUTF8(gen, "ooo"); + writeStringAsUTF8(gen, "ppp"); + writeStringAsUTF8(gen, "qqq"); + writeStringAsUTF8(gen, "rrr"); + writeStringAsUTF8(gen, "333"); + writeStringAsUTF8(gen, "ssss"); + writeStringAsUTF8(gen, "qqq"); + writeStringAsUTF8(gen, "rrr"); + writeStringAsUTF8(gen, "ssss"); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + verifyStringArray(encoded); + } + + public void testStringArrayNextTextValue() throws Exception { + // nextTextValue() takes a separate code path. Use the expected encoded bytes since there's + // no special overload we want to test for encoding. + CBORParser parser = cborParser(_stringArrayBytes); + + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + verifyNextTextValue("1", parser); + verifyNextTextValue("222", parser); + String str333 = verifyNextTextValue("333", parser); + verifyNextTextValue("4", parser); + verifyNextTextValue("555", parser); + verifyNextTextValue("666", parser); + verifyNextTextValue("777", parser); + verifyNextTextValue("888", parser); + verifyNextTextValue("999", parser); + verifyNextTextValue("aaa", parser); + verifyNextTextValue("bbb", parser); + verifyNextTextValue("ccc", parser); + verifyNextTextValue("ddd", parser); + verifyNextTextValue("eee", parser); + verifyNextTextValue("fff", parser); + verifyNextTextValue("ggg", parser); + verifyNextTextValue("hhh", parser); + verifyNextTextValue("iii", parser); + verifyNextTextValue("jjj", parser); + verifyNextTextValue("kkk", parser); + verifyNextTextValue("lll", parser); + verifyNextTextValue("mmm", parser); + verifyNextTextValue("nnn", parser); + verifyNextTextValue("ooo", parser); + verifyNextTextValue("ppp", parser); + String qqqStr = verifyNextTextValue("qqq", parser); + String rrrStr = verifyNextTextValue("rrr", parser); + verifyNextTextValueRef(str333, parser); + String ssssStr = verifyNextTextValue("ssss", parser); + verifyNextTextValueRef(qqqStr, parser); + verifyNextTextValueNotRef(rrrStr, parser); + verifyNextTextValueRef(ssssStr, parser); + } + + public void testBinaryStringArray() throws Exception { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = stringrefCborGenerator(bytes); + assertTrue(gen.isEnabled(CBORGenerator.Feature.STRINGREF)); + + gen.writeStartArray(); + writeStringAsBinary(gen, "1"); + writeStringAsBinary(gen, "222"); + writeStringAsBinary(gen, "333"); + writeStringAsBinary(gen, "4"); + writeStringAsBinary(gen, "555"); + writeStringAsBinary(gen, "666"); + writeStringAsBinary(gen, "777"); + writeStringAsBinary(gen, "888"); + writeStringAsBinary(gen, "999"); + writeStringAsBinary(gen, "aaa"); + writeStringAsBinary(gen, "bbb"); + writeStringAsBinary(gen, "ccc"); + writeStringAsBinary(gen, "ddd"); + writeStringAsBinary(gen, "eee"); + writeStringAsBinary(gen, "fff"); + writeStringAsBinary(gen, "ggg"); + writeStringAsBinary(gen, "hhh"); + writeStringAsBinary(gen, "iii"); + writeStringAsBinary(gen, "jjj"); + writeStringAsBinary(gen, "kkk"); + writeStringAsBinary(gen, "lll"); + writeStringAsBinary(gen, "mmm"); + writeStringAsBinary(gen, "nnn"); + writeStringAsBinary(gen, "ooo"); + writeStringAsBinary(gen, "ppp"); + writeStringAsBinary(gen, "qqq"); + writeStringAsBinary(gen, "rrr"); + writeStringAsBinary(gen, "333"); + writeStringAsBinary(gen, "ssss"); + writeStringAsBinary(gen, "qqq"); + writeStringAsBinary(gen, "rrr"); + writeStringAsBinary(gen, "ssss"); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + assertArrayEquals(_binaryStringArrayBytes, encoded); + + CBORParser parser = cborParser(encoded); + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + verifyNextTokenBinary("1", parser); + verifyNextTokenBinary("222", parser); + byte[] str333 = verifyNextTokenBinary("333", parser); + verifyNextTokenBinary("4", parser); + verifyNextTokenBinary("555", parser); + verifyNextTokenBinary("666", parser); + verifyNextTokenBinary("777", parser); + verifyNextTokenBinary("888", parser); + verifyNextTokenBinary("999", parser); + verifyNextTokenBinary("aaa", parser); + verifyNextTokenBinary("bbb", parser); + verifyNextTokenBinary("ccc", parser); + verifyNextTokenBinary("ddd", parser); + verifyNextTokenBinary("eee", parser); + verifyNextTokenBinary("fff", parser); + verifyNextTokenBinary("ggg", parser); + verifyNextTokenBinary("hhh", parser); + verifyNextTokenBinary("iii", parser); + verifyNextTokenBinary("jjj", parser); + verifyNextTokenBinary("kkk", parser); + verifyNextTokenBinary("lll", parser); + verifyNextTokenBinary("mmm", parser); + verifyNextTokenBinary("nnn", parser); + verifyNextTokenBinary("ooo", parser); + verifyNextTokenBinary("ppp", parser); + byte[] qqqStr = verifyNextTokenBinary("qqq", parser); + byte[] rrrStr = verifyNextTokenBinary("rrr", parser); + verifyNextTokenBinaryRef(str333, parser); + byte[] ssssStr = verifyNextTokenBinary("ssss", parser); + verifyNextTokenBinaryRef(qqqStr, parser); + verifyNextTokenBinaryNotRef(rrrStr, parser); + verifyNextTokenBinaryRef(ssssStr, parser); + } + + public void testBinaryStringArrayStream() throws Exception { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = stringrefCborGenerator(bytes); + assertTrue(gen.isEnabled(CBORGenerator.Feature.STRINGREF)); + + gen.writeStartArray(); + writeStringAsBinaryStream(gen, "1"); + writeStringAsBinaryStream(gen, "222"); + writeStringAsBinaryStream(gen, "333"); + writeStringAsBinaryStream(gen, "4"); + writeStringAsBinaryStream(gen, "555"); + writeStringAsBinaryStream(gen, "666"); + writeStringAsBinaryStream(gen, "777"); + writeStringAsBinaryStream(gen, "888"); + writeStringAsBinaryStream(gen, "999"); + writeStringAsBinaryStream(gen, "aaa"); + writeStringAsBinaryStream(gen, "bbb"); + writeStringAsBinaryStream(gen, "ccc"); + writeStringAsBinaryStream(gen, "ddd"); + writeStringAsBinaryStream(gen, "eee"); + writeStringAsBinaryStream(gen, "fff"); + writeStringAsBinaryStream(gen, "ggg"); + writeStringAsBinaryStream(gen, "hhh"); + writeStringAsBinaryStream(gen, "iii"); + writeStringAsBinaryStream(gen, "jjj"); + writeStringAsBinaryStream(gen, "kkk"); + writeStringAsBinaryStream(gen, "lll"); + writeStringAsBinaryStream(gen, "mmm"); + writeStringAsBinaryStream(gen, "nnn"); + writeStringAsBinaryStream(gen, "ooo"); + writeStringAsBinaryStream(gen, "ppp"); + writeStringAsBinaryStream(gen, "qqq"); + writeStringAsBinaryStream(gen, "rrr"); + writeStringAsBinaryStream(gen, "333"); + writeStringAsBinaryStream(gen, "ssss"); + writeStringAsBinaryStream(gen, "qqq"); + writeStringAsBinaryStream(gen, "rrr"); + writeStringAsBinaryStream(gen, "ssss"); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + assertArrayEquals(_binaryStringArrayBytes, encoded); + + CBORParser parser = cborParser(encoded); + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + verifyNextTokenBinaryStream("1", parser); + verifyNextTokenBinaryStream("222", parser); + verifyNextTokenBinaryStream("333", parser); + verifyNextTokenBinaryStream("4", parser); + verifyNextTokenBinaryStream("555", parser); + verifyNextTokenBinaryStream("666", parser); + verifyNextTokenBinaryStream("777", parser); + verifyNextTokenBinaryStream("888", parser); + verifyNextTokenBinaryStream("999", parser); + verifyNextTokenBinaryStream("aaa", parser); + verifyNextTokenBinaryStream("bbb", parser); + verifyNextTokenBinaryStream("ccc", parser); + verifyNextTokenBinaryStream("ddd", parser); + verifyNextTokenBinaryStream("eee", parser); + verifyNextTokenBinaryStream("fff", parser); + verifyNextTokenBinaryStream("ggg", parser); + verifyNextTokenBinaryStream("hhh", parser); + verifyNextTokenBinaryStream("iii", parser); + verifyNextTokenBinaryStream("jjj", parser); + verifyNextTokenBinaryStream("kkk", parser); + verifyNextTokenBinaryStream("lll", parser); + verifyNextTokenBinaryStream("mmm", parser); + verifyNextTokenBinaryStream("nnn", parser); + verifyNextTokenBinaryStream("ooo", parser); + verifyNextTokenBinaryStream("ppp", parser); + verifyNextTokenBinaryStream("qqq", parser); + verifyNextTokenBinaryStream("rrr", parser); + verifyNextTokenBinaryStream("333", parser); + verifyNextTokenBinaryStream("ssss", parser); + verifyNextTokenBinaryStream("qqq", parser); + verifyNextTokenBinaryStream("rrr", parser); + verifyNextTokenBinaryStream("ssss", parser); + } + + public void testNestedNamespaces() throws Exception { + byte[] nestedNamespaceBytes = new byte[]{ + (byte) 0xD9, 0x01, 0x00, (byte) 0x85, 0x63, 0x61, 0x61, 0x61, (byte) 0xD8, 0x19, + 0x00, (byte) 0xD9, 0x01, 0x00, (byte) 0x83, 0x63, 0x62, 0x62, 0x62, 0x63, 0x61, + 0x61, 0x61, (byte) 0xD8, 0x19, 0x01, (byte) 0xD9, 0x01, 0x00, (byte) 0x82, 0x63, + 0x63, 0x63, 0x63, (byte) 0xD8, 0x19, 0x00, (byte) 0xD8, 0x19, 0x00 + }; + CBORParser parser = cborParser(nestedNamespaceBytes); + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + String aaaStrOuter = verifyNextTokenString("aaa", parser); + verifyNextTokenStringRef(aaaStrOuter, parser); + + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + verifyNextTokenString("bbb", parser); + String aaaStrInner = verifyNextTokenString("aaa", parser); + assertNotSame(aaaStrOuter, aaaStrInner); + verifyNextTokenStringRef(aaaStrInner, parser); + assertToken(JsonToken.END_ARRAY, parser.nextToken()); + + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + String cccStrInner = verifyNextTokenString("ccc", parser); + verifyNextTokenStringRef(cccStrInner, parser); + assertToken(JsonToken.END_ARRAY, parser.nextToken()); + + verifyNextTokenStringRef(aaaStrOuter, parser); + assertToken(JsonToken.END_ARRAY, parser.nextToken()); + } + + public void testNestedTags() throws Exception { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = new CBORFactory() + .enable(CBORGenerator.Feature.WRITE_TYPE_HEADER) + .enable(CBORGenerator.Feature.STRINGREF) + .createGenerator(bytes); + + gen.writeStartArray(); + gen.writeNumber(new BigInteger("1234567890", 16)); + gen.writeNumber(new BigInteger("9876543210", 16)); + gen.writeNumber(new BigInteger("1234567890", 16)); + gen.writeEndArray(); + gen.close(); + + byte[] encoded = bytes.toByteArray(); + + assertArrayEquals(_nestedTagBytes, encoded); + + CBORParser parser = cborParser(encoded); + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + assertTrue(parser.getCurrentTags().contains(CBORConstants.TAG_ID_SELF_DESCRIBE)); + assertTrue(parser.getCurrentTags().contains(CBORConstants.TAG_ID_STRINGREF_NAMESPACE)); + assertEquals(CBORConstants.TAG_ID_SELF_DESCRIBE, parser.getCurrentTag()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(new BigInteger("1234567890", 16), parser.getBigIntegerValue()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(new BigInteger("9876543210", 16), parser.getBigIntegerValue()); + assertToken(JsonToken.VALUE_NUMBER_INT, parser.nextToken()); + assertEquals(new BigInteger("1234567890", 16), parser.getBigIntegerValue()); + assertToken(JsonToken.END_ARRAY, parser.nextToken()); + } + + public void testNestedTagsRounddTrip() throws Exception { + CBORParser parser = cborParser(_nestedTagBytes); + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + CBORGenerator gen = cborGenerator(bytes); + parser.nextToken(); + gen.copyCurrentStructure(parser); + gen.close(); + + byte[] expectedExpandedBytes = new byte[]{ + (byte) 0x9F, (byte) 0xC2, 0x45, 0x12, 0x34, 0x56, 0x78, (byte) 0x90, (byte) 0xC2, + 0x46, 0x00, (byte) 0x98, 0x76, 0x54, 0x32, 0x10, (byte) 0xC2, 0x45, 0x12, 0x34, + 0x56, 0x78, (byte) 0x90, (byte) 0xFF + }; + byte[] encoded = bytes.toByteArray(); + assertArrayEquals(expectedExpandedBytes, encoded); + + bytes.reset(); + parser = cborParser(encoded); + gen = new CBORFactory() + .enable(CBORGenerator.Feature.WRITE_TYPE_HEADER) + .enable(CBORGenerator.Feature.STRINGREF) + .createGenerator(bytes); + parser.nextToken(); + gen.copyCurrentStructure(parser); + gen.close(); + + assertArrayEquals(_nestedTagBytes, bytes.toByteArray()); + } + + private void verifyStringArray(byte[] encoded) throws IOException { + assertArrayEquals(_stringArrayBytes, encoded); + + CBORParser parser = cborParser(encoded); + assertToken(JsonToken.START_ARRAY, parser.nextToken()); + verifyNextTokenString("1", parser); + verifyNextTokenString("222", parser); + String str333 = verifyNextTokenString("333", parser); + verifyNextTokenString("4", parser); + verifyNextTokenString("555", parser); + verifyNextTokenString("666", parser); + verifyNextTokenString("777", parser); + verifyNextTokenString("888", parser); + verifyNextTokenString("999", parser); + verifyNextTokenString("aaa", parser); + verifyNextTokenString("bbb", parser); + verifyNextTokenString("ccc", parser); + verifyNextTokenString("ddd", parser); + verifyNextTokenString("eee", parser); + verifyNextTokenString("fff", parser); + verifyNextTokenString("ggg", parser); + verifyNextTokenString("hhh", parser); + verifyNextTokenString("iii", parser); + verifyNextTokenString("jjj", parser); + verifyNextTokenString("kkk", parser); + verifyNextTokenString("lll", parser); + verifyNextTokenString("mmm", parser); + verifyNextTokenString("nnn", parser); + verifyNextTokenString("ooo", parser); + verifyNextTokenString("ppp", parser); + String qqqStr = verifyNextTokenString("qqq", parser); + String rrrStr = verifyNextTokenString("rrr", parser); + verifyNextTokenStringRef(str333, parser); + String ssssStr = verifyNextTokenString("ssss", parser); + verifyNextTokenStringRef(qqqStr, parser); + verifyNextTokenStringNotRef(rrrStr, parser); + verifyNextTokenStringRef(ssssStr, parser); + } + + private void writeStringAsCharArray(CBORGenerator gen, String str) throws IOException { + char[] chars = str.toCharArray(); + gen.writeString(chars, 0, chars.length); + } + + private void writeStringAsUTF8(CBORGenerator gen, String str) throws IOException { + byte[] encoded = str.getBytes(StandardCharsets.UTF_8); + gen.writeUTF8String(encoded, 0, encoded.length); + } + + private void writeStringAsBinary(CBORGenerator gen, String str) throws IOException { + gen.writeBinary(str.getBytes(StandardCharsets.UTF_8)); + } + + private void writeStringAsBinaryStream(CBORGenerator gen, String str) throws IOException { + byte[] encoded = str.getBytes(StandardCharsets.UTF_8); + gen.writeBinary(new ByteArrayInputStream(encoded), encoded.length); + } + + private String verifyNextTokenString(String expected, CBORParser parser) throws IOException { + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertEquals(expected, parser.getText()); + return parser.getText(); + } + + private void verifyNextTokenStringRef(String expected, CBORParser parser) throws IOException { + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertSame(expected, parser.getText()); + } + + private void verifyNextTokenStringNotRef(String expected, CBORParser parser) throws IOException { + assertToken(JsonToken.VALUE_STRING, parser.nextToken()); + assertEquals(expected, parser.getText()); + assertNotSame(expected, parser.getText()); + } + + private String verifyNextTextValue(String expected, CBORParser parser) throws IOException { + assertEquals(expected, parser.nextTextValue()); + return parser.getText(); + } + + private void verifyNextTextValueRef(String expected, CBORParser parser) throws IOException { + assertSame(expected, parser.nextTextValue()); + } + + private void verifyNextTextValueNotRef(String expected, CBORParser parser) throws IOException { + assertEquals(expected, parser.nextTextValue()); + assertNotSame(expected, parser.getText()); + } + + private byte[] verifyNextTokenBinary(String expected, CBORParser parser) throws IOException { + assertToken(JsonToken.VALUE_EMBEDDED_OBJECT, parser.nextToken()); + byte[] binary = parser.getBinaryValue(); + assertArrayEquals(expected.getBytes(StandardCharsets.UTF_8), binary); + return binary; + } + + private void verifyNextTokenBinaryRef(byte[] expected, CBORParser parser) throws IOException { + assertToken(JsonToken.VALUE_EMBEDDED_OBJECT, parser.nextToken()); + assertSame(expected, parser.getBinaryValue()); + } + + private void verifyNextTokenBinaryNotRef(byte[] expected, CBORParser parser) throws IOException { + assertToken(JsonToken.VALUE_EMBEDDED_OBJECT, parser.nextToken()); + assertArrayEquals(expected, parser.getBinaryValue()); + assertNotSame(expected, parser.getBinaryValue()); + } + + private void verifyNextTokenBinaryStream(String expected, CBORParser parser) throws IOException { + assertToken(JsonToken.VALUE_EMBEDDED_OBJECT, parser.nextToken()); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + parser.readBinaryValue(stream); + assertArrayEquals(expected.getBytes(StandardCharsets.UTF_8), stream.toByteArray()); + } + + private static final byte[] _simpleObjectBytes = new byte[]{ + (byte) 0xD9, 0x01, 0x00, (byte) 0x9F, (byte) 0xBF, 0x64, 0x72, 0x61, 0x6E, 0x6B, + 0x04, 0x65, 0x63, 0x6F, 0x75, 0x6E, 0x74, 0x19, 0x01, (byte) 0xA1, 0x64, 0x6E, 0x61, + 0x6D, 0x65, 0x68, 0x43, 0x6F, 0x63, 0x6B, 0x74, 0x61, 0x69, 0x6C, (byte) 0xFF, + (byte) 0xBF, (byte) 0xD8, 0x19, 0x02, 0x64, 0x42, 0x61, 0x74, 0x68, (byte) 0xD8, + 0x19, 0x01, 0x19, 0x01, 0x38, (byte) 0xD8, 0x19, 0x00, 0x04, (byte) 0xFF, + (byte) 0xBF, (byte) 0xD8, 0x19, 0x02, 0x64, 0x46, 0x6F, 0x6F, 0x64, (byte) 0xD8, + 0x19, 0x01, 0x19, 0x02, (byte) 0xB3, (byte) 0xD8, 0x19, 0x00, 0x04, (byte) 0xFF, + (byte) 0xFF + }; + + private static final byte[] _stringArrayBytes = new byte[]{ + (byte) 0xD9, 0x01, 0x00, (byte) 0x9F, 0x61, 0x31, 0x63, 0x32, 0x32, 0x32, 0x63, 0x33, + 0x33, 0x33, 0x61, 0x34, 0x63, 0x35, 0x35, 0x35, 0x63, 0x36, 0x36, 0x36, 0x63, 0x37, + 0x37, 0x37, 0x63, 0x38, 0x38, 0x38, 0x63, 0x39, 0x39, 0x39, 0x63, 0x61, 0x61, 0x61, + 0x63, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63, 0x63, 0x63, 0x64, 0x64, 0x64, 0x63, 0x65, + 0x65, 0x65, 0x63, 0x66, 0x66, 0x66, 0x63, 0x67, 0x67, 0x67, 0x63, 0x68, 0x68, 0x68, + 0x63, 0x69, 0x69, 0x69, 0x63, 0x6A, 0x6A, 0x6A, 0x63, 0x6B, 0x6B, 0x6B, 0x63, 0x6C, + 0x6C, 0x6C, 0x63, 0x6D, 0x6D, 0x6D, 0x63, 0x6E, 0x6E, 0x6E, 0x63, 0x6F, 0x6F, 0x6F, + 0x63, 0x70, 0x70, 0x70, 0x63, 0x71, 0x71, 0x71, 0x63, 0x72, 0x72, 0x72, (byte) 0xD8, + 0x19, 0x01, 0x64, 0x73, 0x73, 0x73, 0x73, (byte) 0xD8, 0x19, 0x17, 0x63, 0x72, 0x72, + 0x72, (byte) 0xD8, 0x19, 0x18, 0x18, (byte) 0xFF + }; + + private static final byte[] _binaryStringArrayBytes = new byte[]{ + (byte) 0xD9, 0x01, 0x00, (byte) 0x9F, 0x41, 0x31, 0x43, 0x32, 0x32, 0x32, 0x43, 0x33, + 0x33, 0x33, 0x41, 0x34, 0x43, 0x35, 0x35, 0x35, 0x43, 0x36, 0x36, 0x36, 0x43, 0x37, + 0x37, 0x37, 0x43, 0x38, 0x38, 0x38, 0x43, 0x39, 0x39, 0x39, 0x43, 0x61, 0x61, 0x61, + 0x43, 0x62, 0x62, 0x62, 0x43, 0x63, 0x63, 0x63, 0x43, 0x64, 0x64, 0x64, 0x43, 0x65, + 0x65, 0x65, 0x43, 0x66, 0x66, 0x66, 0x43, 0x67, 0x67, 0x67, 0x43, 0x68, 0x68, 0x68, + 0x43, 0x69, 0x69, 0x69, 0x43, 0x6A, 0x6A, 0x6A, 0x43, 0x6B, 0x6B, 0x6B, 0x43, 0x6C, + 0x6C, 0x6C, 0x43, 0x6D, 0x6D, 0x6D, 0x43, 0x6E, 0x6E, 0x6E, 0x43, 0x6F, 0x6F, 0x6F, + 0x43, 0x70, 0x70, 0x70, 0x43, 0x71, 0x71, 0x71, 0x43, 0x72, 0x72, 0x72, (byte) 0xD8, + 0x19, 0x01, 0x44, 0x73, 0x73, 0x73, 0x73, (byte) 0xD8, 0x19, 0x17, 0x43, 0x72, 0x72, + 0x72, (byte) 0xD8, 0x19, 0x18, 0x18, (byte) 0xFF + }; + + private static final byte[] _nestedTagBytes = new byte[]{ + (byte) 0xD9, (byte) 0xD9, (byte) 0xF7, (byte) 0xD9, 0x01, 0x00, (byte) 0x9F, + (byte) 0xC2, 0x45, 0x12, 0x34, 0x56, 0x78, (byte) 0x90, (byte) 0xC2, 0x46, 0x00, + (byte) 0x98, 0x76, 0x54, 0x32, 0x10, (byte) 0xC2, (byte) 0xD8, 0x19, 0x00, + (byte) 0xFF + }; +} \ No newline at end of file diff --git a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/BiggerDataTest.java b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/BiggerDataTest.java index 8a8377448..ed2dbcf22 100644 --- a/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/BiggerDataTest.java +++ b/cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/mapper/BiggerDataTest.java @@ -3,7 +3,9 @@ import java.util.*; import com.fasterxml.jackson.databind.*; +import com.fasterxml.jackson.dataformat.cbor.CBORGenerator; import com.fasterxml.jackson.dataformat.cbor.CBORTestBase; +import com.fasterxml.jackson.dataformat.cbor.databind.CBORMapper; /** * Bigger test to try to do smoke-testing of overall functionality, @@ -137,4 +139,33 @@ public void testRoundTrip() throws Exception assertEquals(citm.topicSubTopics.size(), citm2.topicSubTopics.size()); assertEquals(citm.venueNames.size(), citm2.venueNames.size()); } + + public void testRoundTripStringref() throws Exception + { + Citm citm0 = MAPPER.readValue(getClass().getResourceAsStream("/data/citm_catalog.json"), + Citm.class); + ObjectMapper mapper = new CBORMapper( + cborFactoryBuilder().enable(CBORGenerator.Feature.STRINGREF).build()); + byte[] cbor = mapper.writeValueAsBytes(citm0); + + Citm citm = mapper.readValue(cbor, Citm.class); + + byte[] smile1 = mapper.writeValueAsBytes(citm); + Citm citm2 = mapper.readValue(smile1, Citm.class); + byte[] smile2 = mapper.writeValueAsBytes(citm2); + + assertEquals(smile1.length, smile2.length); + + assertNotNull(citm.areaNames); + assertEquals(17, citm.areaNames.size()); + assertNotNull(citm.events); + assertEquals(184, citm.events.size()); + + assertEquals(citm.seatCategoryNames.size(), citm2.seatCategoryNames.size()); + assertEquals(citm.subTopicNames.size(), citm2.subTopicNames.size()); + assertEquals(citm.subjectNames.size(), citm2.subjectNames.size()); + assertEquals(citm.topicNames.size(), citm2.topicNames.size()); + assertEquals(citm.topicSubTopics.size(), citm2.topicSubTopics.size()); + assertEquals(citm.venueNames.size(), citm2.venueNames.size()); + } }