Skip to content

Commit 95a4300

Browse files
Add an optional extended parser subclass (YAMLAnchorReplayingFactory) able to inline anchors (#502)
1 parent 2c26e1e commit 95a4300

File tree

6 files changed

+647
-3
lines changed

6 files changed

+647
-3
lines changed

release-notes/CREDITS-2.x

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ Heiko Boettger (@HeikoBoettger)
276276
277277
* Contributed #482: (yaml) Allow passing `ParserImpl` by a subclass or overwrite the events
278278
(2.18.0)
279+
* Contributed #502: (yaml) Add an optional extended parser subclass (`YAMLAnchorReplayingFactory`)
280+
able to inline anchors
281+
(2.19.0)
279282
280283
Burdyug Pavel (@Pavel38l)
281284

release-notes/VERSION-2.x

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ Active Maintainers:
1616

1717
2.19.0 (not yet released)
1818

19-
-
19+
#502: Add an optional extended parser subclass (`YAMLAnchorReplayingFactory`)
20+
able to inline anchors
21+
(contributed by Heiko B)
2022

2123
2.18.2 (27-Nov-2024)
2224

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package com.fasterxml.jackson.dataformat.yaml;
2+
3+
import java.io.CharArrayReader;
4+
import java.io.InputStream;
5+
import java.io.IOException;
6+
import java.io.Reader;
7+
8+
import com.fasterxml.jackson.core.JsonEncoding;
9+
import com.fasterxml.jackson.core.ObjectCodec;
10+
import com.fasterxml.jackson.core.io.IOContext;
11+
12+
/**
13+
* A subclass of YAMLFactory with the only purpose to replace the YAMLParser by
14+
* the YAMLAnchorReplayingParser subclass.
15+
*
16+
* @since 2.19
17+
*/
18+
public class YAMLAnchorReplayingFactory extends YAMLFactory {
19+
private static final long serialVersionUID = 1L;
20+
21+
public YAMLAnchorReplayingFactory() {
22+
super();
23+
}
24+
25+
public YAMLAnchorReplayingFactory(ObjectCodec oc) {
26+
super(oc);
27+
}
28+
29+
public YAMLAnchorReplayingFactory(YAMLFactory src, ObjectCodec oc) {
30+
super(src, oc);
31+
}
32+
33+
protected YAMLAnchorReplayingFactory(YAMLFactoryBuilder b) {
34+
super(b);
35+
}
36+
37+
@Override
38+
public YAMLAnchorReplayingFactory copy() {
39+
_checkInvalidCopy(YAMLAnchorReplayingFactory.class);
40+
return new YAMLAnchorReplayingFactory(this, (ObjectCodec) null);
41+
}
42+
43+
@Override
44+
protected Object readResolve() {
45+
return new YAMLAnchorReplayingFactory(this, _objectCodec);
46+
}
47+
48+
@Override
49+
protected YAMLParser _createParser(InputStream input, IOContext ctxt) throws IOException {
50+
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
51+
_loaderOptions, _objectCodec,
52+
_createReader(input, (JsonEncoding) null, ctxt));
53+
}
54+
55+
@Override
56+
protected YAMLParser _createParser(Reader r, IOContext ctxt) throws IOException {
57+
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
58+
_loaderOptions, _objectCodec, r);
59+
}
60+
61+
@Override
62+
protected YAMLParser _createParser(char[] data, int offset, int len, IOContext ctxt, boolean recyclable) throws IOException {
63+
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
64+
_loaderOptions, _objectCodec, new CharArrayReader(data, offset, len));
65+
}
66+
67+
@Override
68+
protected YAMLParser _createParser(byte[] data, int offset, int len, IOContext ctxt) throws IOException {
69+
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
70+
_loaderOptions, _objectCodec, _createReader(data, offset, len, (JsonEncoding) null, ctxt));
71+
}
72+
}
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
package com.fasterxml.jackson.dataformat.yaml;
2+
3+
import java.io.Reader;
4+
import java.io.IOException;
5+
6+
import java.util.*;
7+
8+
import org.yaml.snakeyaml.LoaderOptions;
9+
import org.yaml.snakeyaml.events.*;
10+
11+
import com.fasterxml.jackson.core.JsonParseException;
12+
import com.fasterxml.jackson.core.ObjectCodec;
13+
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
14+
import com.fasterxml.jackson.core.io.IOContext;
15+
16+
/**
17+
* A parser that remembers the events of anchored parts in yaml and repeats them
18+
* to inline these parts when an alias if found instead of only returning an alias.
19+
*<p>
20+
* Note: this overwrites the getEvent() since the base `super.nextToken()` manages to much state and
21+
* it seems to be much simpler to re-emit the events.
22+
*
23+
* @since 2.19
24+
*/
25+
public class YAMLAnchorReplayingParser extends YAMLParser
26+
{
27+
private static class AnchorContext {
28+
public final String anchor;
29+
public final List<Event> events = new ArrayList<>();
30+
public int depth = 1;
31+
32+
public AnchorContext(String anchor) {
33+
this.anchor = anchor;
34+
}
35+
}
36+
37+
/**
38+
* the maximum number of events that can be replayed
39+
*/
40+
public static final int MAX_EVENTS = 9999;
41+
42+
/**
43+
* the maximum limit of anchors to remember
44+
*/
45+
public static final int MAX_ANCHORS = 9999;
46+
47+
/**
48+
* the maximum limit of merges to follow
49+
*/
50+
public static final int MAX_MERGES = 9999;
51+
52+
/**
53+
* the maximum limit of references to remember
54+
*/
55+
public static final int MAX_REFS = 9999;
56+
57+
/**
58+
* Remembers when a merge has been started in order to skip the corresponding
59+
* sequence end which needs to be excluded
60+
*/
61+
private final ArrayDeque<Integer> mergeStack = new ArrayDeque<>();
62+
63+
/**
64+
* Collects nested anchor definitions
65+
*/
66+
private final ArrayDeque<AnchorContext> tokenStack = new ArrayDeque<>();
67+
68+
/**
69+
* Keeps track of the last sequentially found definition of each anchor
70+
*/
71+
private final Map<String, List<Event>> referencedObjects = new HashMap<>();
72+
73+
/**
74+
* Keeps track of events that have been insert when processing alias
75+
*/
76+
private final ArrayDeque<Event> refEvents = new ArrayDeque<>();
77+
78+
/**
79+
* keeps track of the global depth of nested collections
80+
*/
81+
private int globalDepth = 0;
82+
83+
public YAMLAnchorReplayingParser(IOContext ctxt, int parserFeatures, int formatFeatures, LoaderOptions loaderOptions, ObjectCodec codec, Reader reader) {
84+
super(ctxt, parserFeatures, formatFeatures, loaderOptions, codec, reader);
85+
}
86+
87+
private void finishContext(AnchorContext context) throws StreamConstraintsException {
88+
if (referencedObjects.size() + 1 > MAX_REFS) throw new StreamConstraintsException("too many references in the document");
89+
referencedObjects.put(context.anchor, context.events);
90+
if (!tokenStack.isEmpty()) {
91+
List<Event> events = tokenStack.peek().events;
92+
if (events.size() + context.events.size() > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
93+
events.addAll(context.events);
94+
}
95+
}
96+
97+
protected Event trackDepth(Event event) {
98+
if (event instanceof CollectionStartEvent) {
99+
++globalDepth;
100+
} else if (event instanceof CollectionEndEvent) {
101+
--globalDepth;
102+
}
103+
return event;
104+
}
105+
106+
protected Event filterEvent(Event event) {
107+
if (event instanceof MappingEndEvent) {
108+
if (!mergeStack.isEmpty()) {
109+
if (mergeStack.peek() > globalDepth) {
110+
mergeStack.pop();
111+
return null;
112+
}
113+
}
114+
}
115+
return event;
116+
}
117+
118+
@Override
119+
protected Event getEvent() throws IOException {
120+
while(!refEvents.isEmpty()) {
121+
Event event = filterEvent(trackDepth(refEvents.removeFirst()));
122+
if (event != null) return event;
123+
}
124+
125+
Event event = null;
126+
while (event == null) {
127+
event = trackDepth(super.getEvent());
128+
if (event == null) return null;
129+
event = filterEvent(event);
130+
}
131+
132+
if (event instanceof AliasEvent) {
133+
AliasEvent alias = (AliasEvent) event;
134+
List<Event> events = referencedObjects.get(alias.getAnchor());
135+
if (events != null) {
136+
if (refEvents.size() + events.size() > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
137+
refEvents.addAll(events);
138+
return refEvents.removeFirst();
139+
}
140+
throw new JsonParseException("invalid alias " + alias.getAnchor());
141+
}
142+
143+
if (event instanceof NodeEvent) {
144+
String anchor = ((NodeEvent) event).getAnchor();
145+
if (anchor != null) {
146+
AnchorContext context = new AnchorContext(anchor);
147+
context.events.add(event);
148+
if (event instanceof CollectionStartEvent) {
149+
if (tokenStack.size() + 1 > MAX_ANCHORS) throw new StreamConstraintsException("too many anchors in the document");
150+
tokenStack.push(context);
151+
} else {
152+
// directly store it
153+
finishContext(context);
154+
}
155+
return event;
156+
}
157+
}
158+
159+
if (event instanceof ScalarEvent) {
160+
ScalarEvent scalarEvent = (ScalarEvent) event;
161+
if (scalarEvent.getValue().equals( "<<")) {
162+
// expect next node to be a map
163+
Event next = getEvent();
164+
if (next instanceof MappingStartEvent) {
165+
if (mergeStack.size() + 1 > MAX_MERGES) throw new StreamConstraintsException("too many merges in the document");
166+
mergeStack.push(globalDepth);
167+
return getEvent();
168+
}
169+
throw new JsonParseException("found field '<<' but value isn't a map");
170+
}
171+
}
172+
173+
if (!tokenStack.isEmpty()) {
174+
AnchorContext context = tokenStack.peek();
175+
if (context.events.size() + 1 > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
176+
context.events.add(event);
177+
if (event instanceof CollectionStartEvent) {
178+
++context.depth;
179+
} else if (event instanceof CollectionEndEvent) {
180+
--context.depth;
181+
if (context.depth == 0) {
182+
tokenStack.pop();
183+
finishContext(context);
184+
}
185+
}
186+
}
187+
return event;
188+
}
189+
}

yaml/src/main/java/com/fasterxml/jackson/dataformat/yaml/YAMLParser.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,13 +580,16 @@ public JsonToken nextToken() throws IOException
580580
/**
581581
* Since the parserImpl cannot be replaced allow subclasses to at least be able to
582582
* influence the events being consumed.
583-
*
583+
*<p>
584584
* A particular use case is working around the lack of anchor and alias support to
585585
* emit additional events.
586+
*<p>
587+
* NOTE: since 2.18, declared to throw {@link IOException} to allow sub-classes
588+
* to do so.
586589
*
587590
* @since 2.18
588591
*/
589-
protected Event getEvent() {
592+
protected Event getEvent() throws IOException {
590593
return _yamlParser.getEvent();
591594
}
592595

0 commit comments

Comments
 (0)