|  | 
|  | 1 | +package org.simdjson; | 
|  | 2 | + | 
|  | 3 | +import java.util.HashMap; | 
|  | 4 | +import java.util.Map; | 
|  | 5 | + | 
|  | 6 | +import lombok.Data; | 
|  | 7 | +import lombok.RequiredArgsConstructor; | 
|  | 8 | + | 
|  | 9 | +public class SimdJsonParserWithFixPath { | 
|  | 10 | + | 
|  | 11 | +    @Data | 
|  | 12 | +    @RequiredArgsConstructor | 
|  | 13 | +    static class JsonNode { | 
|  | 14 | +        private long version = 0; | 
|  | 15 | +        private boolean isLeaf = false; | 
|  | 16 | +        private final String name; | 
|  | 17 | +        private String value = null; | 
|  | 18 | +        private JsonNode parent = null; | 
|  | 19 | +        private Map<String, JsonNode> children = new HashMap<>(); | 
|  | 20 | +        private int start = -1; | 
|  | 21 | +        private int end = -1; | 
|  | 22 | +    } | 
|  | 23 | + | 
|  | 24 | +    private final SimdJsonParser parser; | 
|  | 25 | +    private BitIndexes bitIndexes; | 
|  | 26 | +    private final JsonNode root = new JsonNode(null); | 
|  | 27 | +    private final JsonNode[] row; | 
|  | 28 | +    private final String[] result; | 
|  | 29 | +    private final String[] emptyResult; | 
|  | 30 | +    private JsonNode ptr; | 
|  | 31 | +    private byte[] buffer; | 
|  | 32 | +    private final int expectParseCols; | 
|  | 33 | +    // every time json string is processed, currentVersion will be incremented by 1 | 
|  | 34 | +    private long currentVersion = 0; | 
|  | 35 | +    // pruning, when alreadyProcessedCols == NUM | 
|  | 36 | + | 
|  | 37 | +    public SimdJsonParserWithFixPath(String... args) { | 
|  | 38 | +        parser = new SimdJsonParser(); | 
|  | 39 | +        expectParseCols = args.length; | 
|  | 40 | +        row = new JsonNode[expectParseCols]; | 
|  | 41 | +        result = new String[expectParseCols]; | 
|  | 42 | +        emptyResult = new String[expectParseCols]; | 
|  | 43 | +        for (int i = 0; i < args.length; i++) { | 
|  | 44 | +            emptyResult[i] = null; | 
|  | 45 | +        } | 
|  | 46 | +        for (int i = 0; i < expectParseCols; i++) { | 
|  | 47 | +            JsonNode cur = root; | 
|  | 48 | +            String[] paths = args[i].split("\\."); | 
|  | 49 | +            for (int j = 0; j < paths.length; j++) { | 
|  | 50 | +                if (!cur.getChildren().containsKey(paths[j])) { | 
|  | 51 | +                    JsonNode child = new JsonNode(paths[j]); | 
|  | 52 | +                    cur.getChildren().put(paths[j], child); | 
|  | 53 | +                    child.setParent(cur); | 
|  | 54 | +                } | 
|  | 55 | +                cur = cur.getChildren().get(paths[j]); | 
|  | 56 | +            } | 
|  | 57 | +            cur.setLeaf(true); | 
|  | 58 | +            row[i] = cur; | 
|  | 59 | +        } | 
|  | 60 | + | 
|  | 61 | +    } | 
|  | 62 | + | 
|  | 63 | +    public String[] parse(byte[] buffer, int len) { | 
|  | 64 | +        this.bitIndexes = parser.buildBitIndex(buffer, len); | 
|  | 65 | +        if (buffer == null || buffer.length == 0) { | 
|  | 66 | +            return emptyResult; | 
|  | 67 | +        } | 
|  | 68 | +        this.currentVersion++; | 
|  | 69 | +        this.ptr = root; | 
|  | 70 | +        this.buffer = buffer; | 
|  | 71 | + | 
|  | 72 | +        switch (buffer[bitIndexes.peek()]) { | 
|  | 73 | +            case '{' -> { | 
|  | 74 | +                parseMap(); | 
|  | 75 | +            } | 
|  | 76 | +            case '[' -> { | 
|  | 77 | +                parseList(); | 
|  | 78 | +            } | 
|  | 79 | +            default -> { | 
|  | 80 | +                throw new RuntimeException("invalid json format"); | 
|  | 81 | +            } | 
|  | 82 | +        } | 
|  | 83 | +        return getResult(); | 
|  | 84 | +    } | 
|  | 85 | + | 
|  | 86 | +    private String parseValue() { | 
|  | 87 | +        int start = bitIndexes.advance(); | 
|  | 88 | +        int next = bitIndexes.peek(); | 
|  | 89 | +        String field = new String(buffer, start, next - start).trim(); | 
|  | 90 | +        if ("null".equalsIgnoreCase(field)) { | 
|  | 91 | +            return null; | 
|  | 92 | +        } | 
|  | 93 | +        // field type is string or type is decimal | 
|  | 94 | +        if (field.startsWith("\"")) { | 
|  | 95 | +            field = field.substring(1, field.length() - 1); | 
|  | 96 | +        } | 
|  | 97 | +        return field; | 
|  | 98 | +    } | 
|  | 99 | + | 
|  | 100 | +    private void parseElement(String expectFieldName) { | 
|  | 101 | +        // if expectFieldName is null, parent is map, else is list | 
|  | 102 | +        if (expectFieldName == null) { | 
|  | 103 | +            expectFieldName = parseValue(); | 
|  | 104 | +            bitIndexes.advance(); // skip : | 
|  | 105 | +        } | 
|  | 106 | +        if (!ptr.getChildren().containsKey(expectFieldName)) { | 
|  | 107 | +            skip(false); | 
|  | 108 | +            return; | 
|  | 109 | +        } | 
|  | 110 | +        ptr = ptr.getChildren().get(expectFieldName); | 
|  | 111 | +        switch (buffer[bitIndexes.peek()]) { | 
|  | 112 | +            case '{' -> { | 
|  | 113 | +                parseMap(); | 
|  | 114 | +            } | 
|  | 115 | +            case '[' -> { | 
|  | 116 | +                parseList(); | 
|  | 117 | +            } | 
|  | 118 | +            default -> { | 
|  | 119 | +                ptr.setValue(skip(true)); | 
|  | 120 | +                ptr.setVersion(currentVersion); | 
|  | 121 | +            } | 
|  | 122 | +        } | 
|  | 123 | +        ptr = ptr.getParent(); | 
|  | 124 | +    } | 
|  | 125 | + | 
|  | 126 | +    private void parseMap() { | 
|  | 127 | +        if (ptr.getChildren() == null) { | 
|  | 128 | +            ptr.setValue(skip(true)); | 
|  | 129 | +            ptr.setVersion(currentVersion); | 
|  | 130 | +            return; | 
|  | 131 | +        } | 
|  | 132 | +        ptr.setStart(bitIndexes.peek()); | 
|  | 133 | +        bitIndexes.advance(); | 
|  | 134 | +        while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}') { | 
|  | 135 | +            parseElement(null); | 
|  | 136 | +            if (buffer[bitIndexes.peek()] == ',') { | 
|  | 137 | +                bitIndexes.advance(); | 
|  | 138 | +            } | 
|  | 139 | +        } | 
|  | 140 | +        ptr.setEnd(bitIndexes.peek()); | 
|  | 141 | +        if (ptr.isLeaf()) { | 
|  | 142 | +            ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1)); | 
|  | 143 | +            ptr.setVersion(currentVersion); | 
|  | 144 | +        } | 
|  | 145 | +        bitIndexes.advance(); | 
|  | 146 | +    } | 
|  | 147 | + | 
|  | 148 | +    private void parseList() { | 
|  | 149 | +        if (ptr.getChildren() == null) { | 
|  | 150 | +            ptr.setValue(skip(true)); | 
|  | 151 | +            ptr.setVersion(currentVersion); | 
|  | 152 | +            return; | 
|  | 153 | +        } | 
|  | 154 | +        ptr.setStart(bitIndexes.peek()); | 
|  | 155 | +        bitIndexes.advance(); | 
|  | 156 | +        int i = 0; | 
|  | 157 | +        while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']') { | 
|  | 158 | +            parseElement("" + i); | 
|  | 159 | +            if (buffer[bitIndexes.peek()] == ',') { | 
|  | 160 | +                bitIndexes.advance(); | 
|  | 161 | +            } | 
|  | 162 | +            i++; | 
|  | 163 | +        } | 
|  | 164 | +        ptr.setEnd(bitIndexes.peek()); | 
|  | 165 | +        if (ptr.isLeaf()) { | 
|  | 166 | +            ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1)); | 
|  | 167 | +            ptr.setVersion(currentVersion); | 
|  | 168 | +        } | 
|  | 169 | +        bitIndexes.advance(); | 
|  | 170 | +    } | 
|  | 171 | + | 
|  | 172 | +    private String skip(boolean retainValue) { | 
|  | 173 | +        int i = 0; | 
|  | 174 | +        int start = retainValue ? bitIndexes.peek() : 0; | 
|  | 175 | +        switch (buffer[bitIndexes.peek()]) { | 
|  | 176 | +            case '{' -> { | 
|  | 177 | +                i++; | 
|  | 178 | +                while (i > 0) { | 
|  | 179 | +                    bitIndexes.advance(); | 
|  | 180 | +                    if (buffer[bitIndexes.peek()] == '{') { | 
|  | 181 | +                        i++; | 
|  | 182 | +                    } else if (buffer[bitIndexes.peek()] == '}') { | 
|  | 183 | +                        i--; | 
|  | 184 | +                    } | 
|  | 185 | +                } | 
|  | 186 | +                int end = bitIndexes.peek(); | 
|  | 187 | +                bitIndexes.advance(); | 
|  | 188 | +                return retainValue ? new String(buffer, start, end - start + 1) : null; | 
|  | 189 | +            } | 
|  | 190 | +            case '[' -> { | 
|  | 191 | +                i++; | 
|  | 192 | +                while (i > 0) { | 
|  | 193 | +                    bitIndexes.advance(); | 
|  | 194 | +                    if (buffer[bitIndexes.peek()] == '[') { | 
|  | 195 | +                        i++; | 
|  | 196 | +                    } else if (buffer[bitIndexes.peek()] == ']') { | 
|  | 197 | +                        i--; | 
|  | 198 | +                    } | 
|  | 199 | +                } | 
|  | 200 | +                int end = bitIndexes.peek(); | 
|  | 201 | +                bitIndexes.advance(); | 
|  | 202 | +                return retainValue ? new String(buffer, start, end - start + 1) : null; | 
|  | 203 | +            } | 
|  | 204 | +            default -> { | 
|  | 205 | +                return parseValue(); | 
|  | 206 | +            } | 
|  | 207 | +        } | 
|  | 208 | +    } | 
|  | 209 | + | 
|  | 210 | +    private String[] getResult() { | 
|  | 211 | +        for (int i = 0; i < expectParseCols; i++) { | 
|  | 212 | +            if (row[i].getVersion() < currentVersion) { | 
|  | 213 | +                result[i] = null; | 
|  | 214 | +                continue; | 
|  | 215 | +            } | 
|  | 216 | +            result[i] = row[i].getValue(); | 
|  | 217 | +        } | 
|  | 218 | +        return result; | 
|  | 219 | +    } | 
|  | 220 | +} | 
0 commit comments