From 72f2ad09bb1531062475548ad149f5cd5012742f Mon Sep 17 00:00:00 2001 From: wenshao Date: Mon, 20 Jan 2025 13:58:32 +0800 Subject: [PATCH 01/11] isASCII --- .../fastjson2/benchmark/BytesAsciiCheck.java | 86 +++++++++++++------ .../benchmark/BytesAsciiCheckTest.java | 22 ++++- .../com/alibaba/fastjson2/util/IOUtils.java | 32 +++++-- .../com/alibaba/fastjson2/util/JDKUtils.java | 17 +--- .../alibaba/fastjson2/util/JDKUtilsTest.java | 11 ++- 5 files changed, 115 insertions(+), 53 deletions(-) diff --git a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java index 1136e727e8..1698039ea3 100644 --- a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java +++ b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java @@ -1,5 +1,6 @@ package com.alibaba.fastjson2.benchmark; +import com.alibaba.fastjson2.JSONException; import com.alibaba.fastjson2.benchmark.eishay.EishayParseBinaryArrayMapping; import com.alibaba.fastjson2.util.JDKUtils; import org.apache.commons.io.IOUtils; @@ -11,68 +12,100 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; import java.io.InputStream; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; import java.util.concurrent.TimeUnit; -import static com.alibaba.fastjson2.util.JDKUtils.ARRAY_BYTE_BASE_OFFSET; -import static com.alibaba.fastjson2.util.JDKUtils.UNSAFE; - public class BytesAsciiCheck { static byte[] bytes; + static char[] chars; + static String str; + static final MethodHandle INDEX_OF_CHAR; static { + MethodHandle indexOfChar = null; + try { + try { + Class cStringLatin1 = Class.forName("java.lang.StringLatin1"); + MethodHandles.Lookup lookup = JDKUtils.trustedLookup(cStringLatin1); + indexOfChar = lookup.findStatic( + cStringLatin1, + "indexOfChar", + MethodType.methodType(int.class, byte[].class, int.class, int.class, int.class)); + } catch (Throwable ignored) { + // ignore + } + } catch (Exception e) { + e.printStackTrace(); + } + + INDEX_OF_CHAR = indexOfChar; try { InputStream is = EishayParseBinaryArrayMapping.class.getClassLoader().getResourceAsStream("data/eishay.json"); - String str = IOUtils.toString(is, "UTF-8"); + str = IOUtils.toString(is, "UTF-8"); bytes = str.getBytes(); + chars = str.toCharArray(); } catch (Exception e) { e.printStackTrace(); } } - @Benchmark +// @Benchmark public void handler(Blackhole bh) throws Throwable { bh.consume( JDKUtils.METHOD_HANDLE_HAS_NEGATIVE.invoke(bytes, 0, bytes.length) ); } - @Benchmark +// @Benchmark public void lambda(Blackhole bh) throws Throwable { bh.consume( JDKUtils.PREDICATE_IS_ASCII.test(bytes) ); } - @Benchmark + // @Benchmark public void direct(Blackhole bh) throws Throwable { bh.consume(hasNegatives(bytes, 0, bytes.length)); } @Benchmark - public void direct8(Blackhole bh) throws Throwable { - bh.consume(hasNegatives_8(bytes, 0, bytes.length)); + public void isASCII(Blackhole bh) throws Throwable { + bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length)); } - public static boolean hasNegatives(byte[] ba, int off, int len) { - for (int i = off; i < off + len; i++) { - if (ba[i] < 0) { - return true; - } - } - return false; + @Benchmark + public void indexOfSlash(Blackhole bh) throws Throwable { + bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlash(bytes, 0, bytes.length)); } - public static boolean hasNegatives_8(byte[] bytes, int off, int len) { - int i = off; - while (i + 8 <= off + len) { - if ((UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + i) & 0x8080808080808080L) != 0) { - return true; - } - i += 8; + @Benchmark + public void indexOfChar(Blackhole bh) throws Throwable { + bh.consume(indexOfChar(bytes, '\'', 0, bytes.length)); + } + + @Benchmark + public void indexOfString(Blackhole bh) throws Throwable { + bh.consume(str.indexOf('\\')); + } + + private static int indexOfChar(byte[] bytes, int ch, int fromIndex, int toIndex) { + try { + return (int) INDEX_OF_CHAR.invokeExact(bytes, ch, fromIndex, toIndex); + } catch (Throwable ignored) { + throw new JSONException(""); } + } + +// @Benchmark + public void isASCII_chars(Blackhole bh) throws Throwable { + bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(chars, 0, chars.length)); + } - for (; i < off + len; i++) { - if (bytes[i] < 0) { + public static boolean hasNegatives(byte[] ba, int off, int len) { + for (int i = off; i < off + len; i++) { + if (ba[i] < 0) { return true; } } @@ -85,7 +118,8 @@ public static void main(String[] args) throws Exception { .mode(Mode.Throughput) .timeUnit(TimeUnit.MILLISECONDS) .warmupIterations(3) - .forks(1) + .threads(1) + .forks(3) .build(); new Runner(options).run(); } diff --git a/benchmark/src/test/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheckTest.java b/benchmark/src/test/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheckTest.java index cbfe501104..297fb31abf 100644 --- a/benchmark/src/test/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheckTest.java +++ b/benchmark/src/test/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheckTest.java @@ -34,6 +34,19 @@ public static void lambda() throws Throwable { } } + public static void isASCII() throws Throwable { + for (int j = 0; j < 5; j++) { + long start = System.currentTimeMillis(); + for (int i = 0; i < LOOP_COUNT; ++i) { + benchmark.isASCII(BH); + } + long millis = System.currentTimeMillis() - start; + System.out.println("BytesAsciiCheck-isASCII : " + millis); + + // zulu17.40.19 : 118 + } + } + public static void direct() throws Throwable { for (int j = 0; j < 5; j++) { long start = System.currentTimeMillis(); @@ -47,14 +60,14 @@ public static void direct() throws Throwable { } } - public static void direct8() throws Throwable { + public static void isASCII_chars() throws Throwable { for (int j = 0; j < 5; j++) { long start = System.currentTimeMillis(); for (int i = 0; i < LOOP_COUNT; ++i) { - benchmark.direct8(BH); + benchmark.isASCII_chars(BH); } long millis = System.currentTimeMillis() - start; - System.out.println("BytesAsciiCheck-direct8 : " + millis); + System.out.println("BytesAsciiCheck-isASCII_chars : " + millis); // zulu17.40.19 : 478 } @@ -75,6 +88,7 @@ public static void main(String[] args) throws Throwable { // handler(); // lambda(); // direct(); -// direct8(); + isASCII_chars(); +// isASCII(); } } diff --git a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java index 63f7b3ae66..46d6ff1828 100644 --- a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java +++ b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java @@ -1833,16 +1833,36 @@ static short convEndian(boolean big, short n) { return big == BIG_ENDIAN ? n : Short.reverseBytes(n); } - public static boolean isASCII(char[] chars, int coff, int strlen) { - int i = coff; - for (int upperBound = coff + (strlen & ~3); i < upperBound; i += 4) { - if ((getLongLE(chars, i) & 0xFF00FF00FF00FF00L) != 0) { + public static boolean isASCII(char[] chars, int off, int len) { + int upperBound = off + (len & ~7); + int end = off + len; + long address = ARRAY_BYTE_BASE_OFFSET + ((long) off << 1); + while (off < upperBound + && (convEndian(false, UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8)) & 0xFF80FF80FF80FF80L) == 0 + ) { + address += 16; + off += 8; + } + + while (off < end) { + if (chars[off++] >= 0x7F) { return false; } } + return true; + } + + public static boolean isASCII(byte[] bytes, int off, int len) { + int upperBound = off + (len & ~7); + int end = off + len; + long address = ARRAY_BYTE_BASE_OFFSET + off; + while (off < upperBound && (UNSAFE.getLong(bytes, address) & 0x8080808080808080L) == 0) { + address += 8; + off += 8; + } - for (; i < strlen; ++i) { - if (chars[i] > 0x00FF) { + while (off < end) { + if ((bytes[off++] & 0x80) != 0) { return false; } } diff --git a/core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java b/core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java index c600799116..818f2a0471 100644 --- a/core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java +++ b/core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java @@ -510,20 +510,7 @@ public static String latin1StringJDK8(byte[] bytes, int offset, int strlen) { return STRING_CREATOR_JDK8.apply(chars, Boolean.TRUE); } - public static boolean isASCII(byte[] chars) { - int i = 0; - int strlen = chars.length; - for (int upperBound = (strlen & ~7); i < upperBound; i += 8) { - if ((UNSAFE.getLong(chars, ARRAY_BYTE_BASE_OFFSET + i) & 0x8080808080808080L) != 0) { - return false; - } - } - - for (; i < strlen; ++i) { - if (UNSAFE.getByte(chars, ARRAY_BYTE_BASE_OFFSET + i) < 0) { - return false; - } - } - return true; + static boolean isASCII(byte[] chars) { + return IOUtils.isASCII(chars, 0, chars.length); } } diff --git a/core/src/test/java/com/alibaba/fastjson2/util/JDKUtilsTest.java b/core/src/test/java/com/alibaba/fastjson2/util/JDKUtilsTest.java index 8acac875ac..735520c3d2 100644 --- a/core/src/test/java/com/alibaba/fastjson2/util/JDKUtilsTest.java +++ b/core/src/test/java/com/alibaba/fastjson2/util/JDKUtilsTest.java @@ -4,11 +4,11 @@ import org.junit.jupiter.api.Test; import java.lang.invoke.*; +import java.util.Arrays; import java.util.function.ToIntFunction; import static com.alibaba.fastjson2.util.JDKUtils.*; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.*; public class JDKUtilsTest { @Test @@ -135,6 +135,13 @@ public void lookup_int() throws Throwable { assertNotNull(func); } + @Test + public void test_isASCII() { + byte[] bytes = new byte[127]; + Arrays.fill(bytes, (byte) 'a'); + assertTrue(isASCII(bytes)); + } + private static class PrivateBeanInt { private byte coder; From 8bdd208d1e1f621455afe78bd3d6c6a5b865b893 Mon Sep 17 00:00:00 2001 From: wenshao Date: Tue, 21 Jan 2025 00:57:39 +0800 Subject: [PATCH 02/11] swar --- .../fastjson2/benchmark/BytesAsciiCheck.java | 12 +++++- .../com/alibaba/fastjson2/util/IOUtils.java | 40 ++++++++++++++++++- .../com/alibaba/fastjson2/util/JDKUtils.java | 16 ++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java index 1698039ea3..ba5f7cd538 100644 --- a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java +++ b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java @@ -75,11 +75,21 @@ public void isASCII(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length)); } + @Benchmark + public void isASCIIJDK(Blackhole bh) throws Throwable { + bh.consume(com.alibaba.fastjson2.util.JDKUtils.PREDICATE_IS_ASCII.test(bytes)); + } + @Benchmark public void indexOfSlash(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlash(bytes, 0, bytes.length)); } + @Benchmark + public void indexOfSlashV(Blackhole bh) throws Throwable { + bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlashV(bytes, 0, bytes.length)); + } + @Benchmark public void indexOfChar(Blackhole bh) throws Throwable { bh.consume(indexOfChar(bytes, '\'', 0, bytes.length)); @@ -119,7 +129,7 @@ public static void main(String[] args) throws Exception { .timeUnit(TimeUnit.MILLISECONDS) .warmupIterations(3) .threads(1) - .forks(3) + .forks(1) .build(); new Runner(options).run(); } diff --git a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java index 46d6ff1828..0705890804 100644 --- a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java +++ b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java @@ -1622,24 +1622,60 @@ public static int digit1(byte[] bytes, int off) { } public static int indexOfQuote(byte[] value, int quote, int fromIndex, int max) { + if (INDEX_OF_CHAR_LATIN1 == null) { + return indexOfQuote0(value, quote, fromIndex, max); + } + try { + return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, quote, fromIndex, max); + } catch (Throwable e) { + throw new JSONException(e.getMessage()); + } + } + static int indexOfQuote0(byte[] value, int quote, int fromIndex, int max) { int i = fromIndex; + long address = ARRAY_BYTE_BASE_OFFSET + fromIndex; int upperBound = fromIndex + ((max - fromIndex) & ~7); long vectorQuote = quote == '\'' ? 0x2727_2727_2727_2727L : 0x2222_2222_2222_2222L; - while (i < upperBound && notContains(getLongLE(value, i), vectorQuote)) { + while (i < upperBound && notContains(UNSAFE.getLong(value, address), vectorQuote)) { i += 8; + address += 8; } return indexOfChar0(value, quote, i, max); } public static int indexOfSlash(byte[] value, int fromIndex, int max) { + if (INDEX_OF_CHAR_LATIN1 == null) { + return indexOfSlashV(value, fromIndex, max); + } + try { + return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, (int) '\\', fromIndex, max); + } catch (Throwable e) { + throw new JSONException(e.getMessage()); + } + } + + public static int indexOfSlashV(byte[] value, int fromIndex, int max) { int i = fromIndex; + long address = ARRAY_BYTE_BASE_OFFSET + fromIndex; int upperBound = fromIndex + ((max - fromIndex) & ~7); - while (i < upperBound && notContains(getLongLE(value, i), 0x5C5C5C5C5C5C5C5CL)) { + while (i < upperBound && notContains(UNSAFE.getLong(value, address), 0x5C5C5C5C5C5C5C5CL)) { i += 8; + address += 8; } return indexOfChar0(value, '\\', i, max); } + public static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { + if (INDEX_OF_CHAR_LATIN1 == null) { + return indexOfChar0(value, ch, fromIndex, max); + } + try { + return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, ch, fromIndex, max); + } catch (Throwable e) { + throw new JSONException(e.getMessage()); + } + } + private static int indexOfChar0(byte[] value, int ch, int fromIndex, int max) { for (int i = fromIndex; i < max; i++) { if (value[i] == ch) { diff --git a/core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java b/core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java index 818f2a0471..a8703043db 100644 --- a/core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java +++ b/core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java @@ -59,6 +59,7 @@ public class JDKUtils { public static final MethodHandle METHOD_HANDLE_HAS_NEGATIVE; public static final Predicate PREDICATE_IS_ASCII; + public static final MethodHandle INDEX_OF_CHAR_LATIN1; static final MethodHandles.Lookup IMPL_LOOKUP; static volatile MethodHandle CONSTRUCTOR_LOOKUP; @@ -340,6 +341,21 @@ public class JDKUtils { METHOD_HANDLE_HAS_NEGATIVE = handle; } + MethodHandle indexOfCharLatin1 = null; + if (JVM_VERSION > 9) { + try { + Class cStringLatin1 = Class.forName("java.lang.StringLatin1"); + MethodHandles.Lookup lookup = trustedLookup(cStringLatin1); + indexOfCharLatin1 = lookup.findStatic( + cStringLatin1, + "indexOfChar", + MethodType.methodType(int.class, byte[].class, int.class, int.class, int.class)); + } catch (Throwable ignored) { + // ignore + } + } + INDEX_OF_CHAR_LATIN1 = indexOfCharLatin1; + Boolean compact_strings = null; try { if (JVM_VERSION == 8) { From 4a988dbdd65978256497d82aa9ea17a0967514ce Mon Sep 17 00:00:00 2001 From: wenshao Date: Tue, 21 Jan 2025 17:59:28 +0800 Subject: [PATCH 03/11] isASCIIChar --- .../com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java index ba5f7cd538..d50bcc6dc7 100644 --- a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java +++ b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java @@ -70,6 +70,12 @@ public void direct(Blackhole bh) throws Throwable { bh.consume(hasNegatives(bytes, 0, bytes.length)); } + @Benchmark + public void isASCIIChar(Blackhole bh) { + bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(chars, 0, chars.length)); + } + + @Benchmark public void isASCII(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length)); From 686e67e6c67d7065483976a4285c77ff7a4c9dcf Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 22 Jan 2025 01:06:40 +0800 Subject: [PATCH 04/11] codestyle --- .../java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java index d50bcc6dc7..c54b8f7859 100644 --- a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java +++ b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java @@ -75,7 +75,6 @@ public void isASCIIChar(Blackhole bh) { bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(chars, 0, chars.length)); } - @Benchmark public void isASCII(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length)); From bd3e4e633ee1ad2243e1144f70e9a02cd05c10fd Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 22 Jan 2025 01:23:50 +0800 Subject: [PATCH 05/11] fix build error --- benchmark/pom.xml | 5 ---- .../fastjson2/benchmark/BytesAsciiCheck.java | 13 ++++------ .../alibaba/fastjson2/JSONWriterJSONB.java | 2 +- .../com/alibaba/fastjson2/util/IOUtils.java | 6 ++--- .../alibaba/fastjson2/util/IOUtilsTest.java | 4 ++-- pom.xml | 24 ++++++++----------- 6 files changed, 20 insertions(+), 34 deletions(-) diff --git a/benchmark/pom.xml b/benchmark/pom.xml index 91edfe022f..e653a1eef1 100644 --- a/benchmark/pom.xml +++ b/benchmark/pom.xml @@ -22,11 +22,6 @@ - - com.alibaba.fastjson2 - fastjson2-codegen - ${project.version} - com.alibaba.fastjson2 fastjson2-extension diff --git a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java index c54b8f7859..84e9801c69 100644 --- a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java +++ b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java @@ -71,13 +71,13 @@ public void direct(Blackhole bh) throws Throwable { } @Benchmark - public void isASCIIChar(Blackhole bh) { - bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(chars, 0, chars.length)); + public void isASCII(Blackhole bh) throws Throwable { + bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length)); } @Benchmark - public void isASCII(Blackhole bh) throws Throwable { - bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length)); + public void isLatin1(Blackhole bh) throws Throwable { + bh.consume(com.alibaba.fastjson2.util.IOUtils.isLatin1(chars, 0, chars.length)); } @Benchmark @@ -113,11 +113,6 @@ private static int indexOfChar(byte[] bytes, int ch, int fromIndex, int toIndex) } } -// @Benchmark - public void isASCII_chars(Blackhole bh) throws Throwable { - bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(chars, 0, chars.length)); - } - public static boolean hasNegatives(byte[] ba, int off, int len) { for (int i = off; i < off + len; i++) { if (ba[i] < 0) { diff --git a/core/src/main/java/com/alibaba/fastjson2/JSONWriterJSONB.java b/core/src/main/java/com/alibaba/fastjson2/JSONWriterJSONB.java index 1c400651ca..07b59d4e7f 100644 --- a/core/src/main/java/com/alibaba/fastjson2/JSONWriterJSONB.java +++ b/core/src/main/java/com/alibaba/fastjson2/JSONWriterJSONB.java @@ -477,7 +477,7 @@ private void writeString0(char[] chars, int coff, int strlen) { off = this.off; } else { - ascii = isASCII(chars, coff, strlen); + ascii = isLatin1(chars, coff, strlen); } int minCapacity = (ascii ? strlen : strlen * 3) + off + 6; diff --git a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java index 0705890804..89efa7437c 100644 --- a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java +++ b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java @@ -1869,19 +1869,19 @@ static short convEndian(boolean big, short n) { return big == BIG_ENDIAN ? n : Short.reverseBytes(n); } - public static boolean isASCII(char[] chars, int off, int len) { + public static boolean isLatin1(char[] chars, int off, int len) { int upperBound = off + (len & ~7); int end = off + len; long address = ARRAY_BYTE_BASE_OFFSET + ((long) off << 1); while (off < upperBound - && (convEndian(false, UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8)) & 0xFF80FF80FF80FF80L) == 0 + && (convEndian(false, UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8)) & 0xFF00FF00FF00FF00L) == 0 ) { address += 16; off += 8; } while (off < end) { - if (chars[off++] >= 0x7F) { + if (chars[off++] > 0xFF) { return false; } } diff --git a/core/src/test/java/com/alibaba/fastjson2/util/IOUtilsTest.java b/core/src/test/java/com/alibaba/fastjson2/util/IOUtilsTest.java index 2196bd494a..1098602d81 100644 --- a/core/src/test/java/com/alibaba/fastjson2/util/IOUtilsTest.java +++ b/core/src/test/java/com/alibaba/fastjson2/util/IOUtilsTest.java @@ -427,7 +427,7 @@ public void convEndian() throws Throwable { public void test_isASCII() { char[] chars = new char[] {'0', '1', '2', '3', '4', '5', '6', 0x80}; long v = UNSAFE.getLong(chars, ARRAY_CHAR_BASE_OFFSET); - assertTrue(IOUtils.isASCII(chars, 0, 4)); - assertTrue(IOUtils.isASCII(chars, 4, 4)); + assertTrue(IOUtils.isLatin1(chars, 0, 4)); + assertTrue(IOUtils.isLatin1(chars, 4, 4)); } } diff --git a/pom.xml b/pom.xml index d23b335e73..7d9c577259 100644 --- a/pom.xml +++ b/pom.xml @@ -62,28 +62,14 @@ - benchmark - codegen - codegen-test core - example-solon-test example-spring-test - extension extension-jaxrs extension-solon extension-spring5 - fastjson1-compatible kotlin safemode-test @@ -1033,6 +1019,16 @@ test-jdk17 + + enable-codegen + + (,21] + + + codegen + codegen-test + + deploy-settings From 036a3681c41fb12af28e40b7ca016785fe4bdff5 Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 22 Jan 2025 01:56:55 +0800 Subject: [PATCH 06/11] fix build error --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7d9c577259..58a5bbd7f9 100644 --- a/pom.xml +++ b/pom.xml @@ -1022,7 +1022,7 @@ enable-codegen - (,21] + (,22] codegen From e4454ef5c6f608adc808276115c859e1b7808bc0 Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 22 Jan 2025 14:36:20 +0800 Subject: [PATCH 07/11] add IOUtilsBench --- .../benchmark/wast/IOUtilsBench.java | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 benchmark/src/main/java/com/alibaba/fastjson2/benchmark/wast/IOUtilsBench.java diff --git a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/wast/IOUtilsBench.java b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/wast/IOUtilsBench.java new file mode 100644 index 0000000000..863c10a7d4 --- /dev/null +++ b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/wast/IOUtilsBench.java @@ -0,0 +1,45 @@ +package com.alibaba.fastjson2.benchmark.wast; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.concurrent.TimeUnit; + +public class IOUtilsBench { + static byte[] bytes; + static char[] chars; + static String str; + + static { + StringBuilder buf = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + buf.append(12345678); + } + str = buf.toString(); + bytes = str.getBytes(); + chars = str.toCharArray(); + } + + @Benchmark + public void digit4(Blackhole bh) throws Throwable { + for (int i = 0; i < 1000; i += 8) { + bh.consume(com.alibaba.fastjson2.util.IOUtils.digit4(bytes, 0)); + } + } + + public static void main(String[] args) throws Exception { + Options options = new OptionsBuilder() + .include(IOUtilsBench.class.getName()) + .mode(Mode.Throughput) + .timeUnit(TimeUnit.MILLISECONDS) + .warmupIterations(3) + .threads(1) + .forks(1) + .build(); + new Runner(options).run(); + } +} From 0451d5c1c0409fa6fde799654391b098d1fe2729 Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 22 Jan 2025 14:44:58 +0800 Subject: [PATCH 08/11] optimize isASCII --- .../fastjson2/benchmark/BytesAsciiCheck.java | 12 ++++---- .../benchmark/BytesAsciiCheckTest.java | 6 ++-- .../com/alibaba/fastjson2/util/IOUtils.java | 29 +++++++++---------- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java index 84e9801c69..47dfc547aa 100644 --- a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java +++ b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java @@ -70,7 +70,7 @@ public void direct(Blackhole bh) throws Throwable { bh.consume(hasNegatives(bytes, 0, bytes.length)); } - @Benchmark +// @Benchmark public void isASCII(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length)); } @@ -80,27 +80,27 @@ public void isLatin1(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.isLatin1(chars, 0, chars.length)); } - @Benchmark +// @Benchmark public void isASCIIJDK(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.JDKUtils.PREDICATE_IS_ASCII.test(bytes)); } - @Benchmark +// @Benchmark public void indexOfSlash(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlash(bytes, 0, bytes.length)); } - @Benchmark +// @Benchmark public void indexOfSlashV(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlashV(bytes, 0, bytes.length)); } - @Benchmark +// @Benchmark public void indexOfChar(Blackhole bh) throws Throwable { bh.consume(indexOfChar(bytes, '\'', 0, bytes.length)); } - @Benchmark +// @Benchmark public void indexOfString(Blackhole bh) throws Throwable { bh.consume(str.indexOf('\\')); } diff --git a/benchmark/src/test/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheckTest.java b/benchmark/src/test/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheckTest.java index 297fb31abf..dbe22b821e 100644 --- a/benchmark/src/test/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheckTest.java +++ b/benchmark/src/test/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheckTest.java @@ -60,11 +60,11 @@ public static void direct() throws Throwable { } } - public static void isASCII_chars() throws Throwable { + public static void isLatin1() throws Throwable { for (int j = 0; j < 5; j++) { long start = System.currentTimeMillis(); for (int i = 0; i < LOOP_COUNT; ++i) { - benchmark.isASCII_chars(BH); + benchmark.isLatin1(BH); } long millis = System.currentTimeMillis() - start; System.out.println("BytesAsciiCheck-isASCII_chars : " + millis); @@ -88,7 +88,7 @@ public static void main(String[] args) throws Throwable { // handler(); // lambda(); // direct(); - isASCII_chars(); + isLatin1(); // isASCII(); } } diff --git a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java index 89efa7437c..441802923d 100644 --- a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java +++ b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java @@ -1872,36 +1872,33 @@ static short convEndian(boolean big, short n) { public static boolean isLatin1(char[] chars, int off, int len) { int upperBound = off + (len & ~7); int end = off + len; - long address = ARRAY_BYTE_BASE_OFFSET + ((long) off << 1); - while (off < upperBound - && (convEndian(false, UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8)) & 0xFF00FF00FF00FF00L) == 0 - ) { + long address = ARRAY_CHAR_BASE_OFFSET + off; + long value = 0; + while (off < upperBound) { + value |= UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8); address += 16; off += 8; } - - while (off < end) { - if (chars[off++] > 0xFF) { - return false; - } + while (off++ < end) { + value |= UNSAFE.getShort(chars, address); + address += 2; } - return true; + return (convEndian(false, value) & 0xFF00FF00FF00FF00L) == 0; } public static boolean isASCII(byte[] bytes, int off, int len) { int upperBound = off + (len & ~7); int end = off + len; long address = ARRAY_BYTE_BASE_OFFSET + off; - while (off < upperBound && (UNSAFE.getLong(bytes, address) & 0x8080808080808080L) == 0) { + long value = 0; + while (off < upperBound) { + value |= UNSAFE.getLong(bytes, address); address += 8; off += 8; } - while (off < end) { - if ((bytes[off++] & 0x80) != 0) { - return false; - } + value |= bytes[off++]; } - return true; + return (value & 0x8080808080808080L) == 0; } } From 9f9a01760efa8d25066bebb68e7bb31577d6a7e9 Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 22 Jan 2025 19:30:36 +0800 Subject: [PATCH 09/11] remove unused code --- .../main/java/com/alibaba/fastjson2/util/IOUtils.java | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java index 441802923d..e5c49ffdbb 100644 --- a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java +++ b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java @@ -1665,17 +1665,6 @@ public static int indexOfSlashV(byte[] value, int fromIndex, int max) { return indexOfChar0(value, '\\', i, max); } - public static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { - if (INDEX_OF_CHAR_LATIN1 == null) { - return indexOfChar0(value, ch, fromIndex, max); - } - try { - return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, ch, fromIndex, max); - } catch (Throwable e) { - throw new JSONException(e.getMessage()); - } - } - private static int indexOfChar0(byte[] value, int ch, int fromIndex, int max) { for (int i = fromIndex; i < max; i++) { if (value[i] == ch) { From 0051da7ce4c5624442580b47d32c055fd6ccece4 Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 22 Jan 2025 19:45:33 +0800 Subject: [PATCH 10/11] add benchmark --- .../alibaba/fastjson2/benchmark/BytesAsciiCheck.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java index 47dfc547aa..5c990aacf9 100644 --- a/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java +++ b/benchmark/src/main/java/com/alibaba/fastjson2/benchmark/BytesAsciiCheck.java @@ -80,27 +80,27 @@ public void isLatin1(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.isLatin1(chars, 0, chars.length)); } -// @Benchmark + @Benchmark public void isASCIIJDK(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.JDKUtils.PREDICATE_IS_ASCII.test(bytes)); } -// @Benchmark + @Benchmark public void indexOfSlash(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlash(bytes, 0, bytes.length)); } -// @Benchmark + @Benchmark public void indexOfSlashV(Blackhole bh) throws Throwable { bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlashV(bytes, 0, bytes.length)); } -// @Benchmark + @Benchmark public void indexOfChar(Blackhole bh) throws Throwable { bh.consume(indexOfChar(bytes, '\'', 0, bytes.length)); } -// @Benchmark + @Benchmark public void indexOfString(Blackhole bh) throws Throwable { bh.consume(str.indexOf('\\')); } From 1d2b7e0b50e6337857f7d43956a6f5f62953e21f Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 22 Jan 2025 19:46:55 +0800 Subject: [PATCH 11/11] bug fix --- .../src/main/java/com/alibaba/fastjson2/util/IOUtils.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java index e5c49ffdbb..5b9b316faa 100644 --- a/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java +++ b/core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java @@ -118,7 +118,7 @@ public class IOUtils { DIGITS_K_64[i] = c0 + v; } ZERO_DOT_LATIN1 = UNSAFE.getShort(new byte[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET); - ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET); + ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_CHAR_BASE_OFFSET); } public static void writeDigitPair(byte[] buf, int charPos, int value) { @@ -1735,7 +1735,7 @@ public static int getIntUnaligned(byte[] bytes, int offset) { } public static int getIntUnaligned(char[] bytes, int offset) { - return UNSAFE.getInt(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1)); + return UNSAFE.getInt(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1)); } public static long getLongBE(byte[] bytes, int offset) { @@ -1748,7 +1748,7 @@ public static long getLongUnaligned(byte[] bytes, int offset) { } public static long getLongUnaligned(char[] bytes, int offset) { - return UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1)); + return UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1)); } public static long getLongLE(byte[] bytes, int offset) { @@ -1758,7 +1758,7 @@ public static long getLongLE(byte[] bytes, int offset) { public static long getLongLE(char[] bytes, int offset) { return convEndian(false, - UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1))); + UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1))); } public static short hex2(int i) {