29
29
#include <fluent-bit/flb_time.h>
30
30
#include <fluent-bit/flb_pack.h>
31
31
#include <fluent-bit/flb_unescape.h>
32
+ #include <fluent-bit/flb_simd.h>
32
33
33
34
#include <fluent-bit/flb_log_event_encoder.h>
34
35
#include <fluent-bit/flb_log_event_decoder.h>
@@ -53,6 +54,59 @@ static int flb_pack_set_null_as_nan(int b) {
53
54
return convert_nan_to_null ;
54
55
}
55
56
57
+ /* -----------------------------------------------------------------------------
58
+ * SIMD helpers
59
+ * -----------------------------------------------------------------------------
60
+ *
61
+ * json_find_escapable_simd:
62
+ * Returns a pointer to the first byte in s[0..n) that requires JSON string
63
+ * escaping (any of '"' or '\' or control chars < 0x20). If none, returns NULL.
64
+ *
65
+ * Fast-path skips whole vector-width blocks when there is no match. On hit,
66
+ * it falls back to a short scalar check within the block to find the exact
67
+ * offending index. Works with SSE2 / NEON / RVV through flb_simd.h.
68
+ */
69
+ static inline const char * json_find_escapable_simd (const char * s , size_t n )
70
+ {
71
+ const char * p = s ;
72
+ uint8_t c ;
73
+ #ifdef FLB_HAVE_SIMD
74
+ const size_t vlen = FLB_SIMD_VEC8_INST_LEN ;
75
+ flb_vector8 dq = flb_vector8_broadcast ((uint8_t )'"' );
76
+ flb_vector8 bs = flb_vector8_broadcast ((uint8_t )'\\' );
77
+ size_t i ;
78
+
79
+ while (n >= vlen ) {
80
+ flb_vector8 v ;
81
+ flb_vector8_load (& v , (const uint8_t * )p );
82
+ /* If neither '"' nor '\' appears in this block, it is very likely
83
+ * safe; control chars are rare, handle them in the fallback check. */
84
+ bool has_dq = flb_vector8_is_highbit_set (flb_vector8_eq (v , dq ));
85
+ bool has_bs = flb_vector8_is_highbit_set (flb_vector8_eq (v , bs ));
86
+ if (has_dq || has_bs ) {
87
+ /* Narrow down to the exact position in this block */
88
+ for (i = 0 ; i < vlen ; i ++ ) {
89
+ c = (uint8_t )p [i ];
90
+ if (c == '"' || c == '\\' || c < 0x20 ) {
91
+ return p + i ;
92
+ }
93
+ }
94
+ }
95
+ p += vlen ;
96
+ n -= vlen ;
97
+ }
98
+ #endif
99
+ /* Scalar tail / generic fallback, also checks control chars */
100
+ while (n -- ) {
101
+ c = (uint8_t )* p ;
102
+ if (c == '"' || c == '\\' || c < 0x20 ) {
103
+ return p ;
104
+ }
105
+ p ++ ;
106
+ }
107
+ return NULL ;
108
+ }
109
+
56
110
int flb_json_tokenise (const char * js , size_t len ,
57
111
struct flb_pack_state * state )
58
112
{
@@ -105,11 +159,58 @@ static inline int is_float(const char *buf, int len)
105
159
const char * end = buf + len ;
106
160
const char * p = buf ;
107
161
108
- while (p <= end ) {
109
- if ((* p == 'e' || * p == 'E' ) && p < end && (* (p + 1 ) == '-' || * (p + 1 ) == '+' )) {
162
+ #ifdef FLB_HAVE_SIMD
163
+ {
164
+ const size_t vlen = FLB_SIMD_VEC8_INST_LEN ;
165
+ flb_vector8 vdot = flb_vector8_broadcast ((uint8_t )'.' );
166
+ flb_vector8 ve = flb_vector8_broadcast ((uint8_t )'e' );
167
+ flb_vector8 vE = flb_vector8_broadcast ((uint8_t )'E' );
168
+ flb_vector8 v ;
169
+ char c ;
170
+ char * q ;
171
+
172
+ while ((size_t )(end - p ) >= vlen ) {
173
+ flb_vector8_load (& v , (const uint8_t * )p );
174
+
175
+ /* If the block contains '.', it's definitely a float */
176
+ if (flb_vector8_is_highbit_set (flb_vector8_eq (v , vdot ))) {
177
+ return 1 ;
178
+ }
179
+
180
+ /* If the block contains 'e' or 'E', check the immediate next char. */
181
+ if (flb_vector8_is_highbit_set (flb_vector8_eq (v , ve )) ||
182
+ flb_vector8_is_highbit_set (flb_vector8_eq (v , vE ))) {
183
+ /* Narrow inside this vector to the first e/E and verify next char */
184
+ for (size_t i = 0 ; i < vlen ; i ++ ) {
185
+ c = p [i ];
186
+ if (c == 'e' || c == 'E' ) {
187
+ q = p + i + 1 ;
188
+ if (q < end && (* q == '+' || * q == '-' )) {
189
+ return 1 ; /* e- / e+ / E- / E+ */
190
+ }
191
+ /* Not signed exponent here; fall through to precise check below.
192
+ Set p at the e/E position so the scalar loop sees it. */
193
+ p += i ;
194
+ goto scalar_check ;
195
+ }
196
+ }
197
+ /* Should not reach (we had a mask), but continue safely */
198
+ }
199
+
200
+ /* No candidates in this block; skip it entirely */
201
+ p += vlen ;
202
+ }
203
+ }
204
+ #endif
205
+
206
+ scalar_check :
207
+ /* Precise scalar check for the remaining tail (and for cases we broke early). */
208
+ while (p < end ) {
209
+ if (* p == '.' ) {
110
210
return 1 ;
111
211
}
112
- else if (* p == '.' ) {
212
+ if ((* p == 'e' || * p == 'E' ) &&
213
+ (p + 1 ) < end && (p [1 ] == '-' || p [1 ] == '+' )) {
113
214
return 1 ;
114
215
}
115
216
p ++ ;
@@ -165,24 +266,32 @@ static inline int pack_string_token(struct flb_pack_state *state,
165
266
char * tmp ;
166
267
char * out_buf ;
167
268
269
+ /* Fast path: if the JSON string does not contain '"' or '\' or any control
270
+ * chars (<0x20), we can pack it as-is without unescaping. */
271
+ const char * bad = json_find_escapable_simd (str , (size_t )len );
272
+ if (bad == NULL ) {
273
+ msgpack_pack_str (pck , len );
274
+ msgpack_pack_str_body (pck , str , len );
275
+ return len ;
276
+ }
277
+
278
+ /* Slow path: unescape into a temporary buffer as before. */
168
279
if (state -> buf_size < len + 1 ) {
169
280
s = len + 1 ;
170
281
tmp = flb_realloc (state -> buf_data , s );
171
282
if (!tmp ) {
172
283
flb_errno ();
173
284
return -1 ;
174
285
}
175
- else {
176
- state -> buf_data = tmp ;
177
- state -> buf_size = s ;
178
- }
286
+ state -> buf_data = tmp ;
287
+ state -> buf_size = s ;
179
288
}
180
289
out_buf = state -> buf_data ;
181
290
182
- /* Always decode any UTF-8 or special characters */
291
+ /* Always decode UTF-8 escape sequences and specials when needed */
183
292
out_len = flb_unescape_string_utf8 (str , len , out_buf );
184
293
185
- /* Pack decoded text */
294
+ /* Pack the decoded text */
186
295
msgpack_pack_str (pck , out_len );
187
296
msgpack_pack_str_body (pck , out_buf , out_len );
188
297
0 commit comments