Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Swar 20250120 #3305

Merged
merged 12 commits into from
Jan 22, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions benchmark/pom.xml
Original file line number Diff line number Diff line change
@@ -22,11 +22,6 @@
</properties>

<dependencies>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2-codegen</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2-extension</artifactId>
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.alibaba.fastjson2.benchmark;

import com.alibaba.fastjson2.JSONException;
import com.alibaba.fastjson2.benchmark.eishay.EishayParseBinaryArrayMapping;
import com.alibaba.fastjson2.util.JDKUtils;
import org.apache.commons.io.IOUtils;
@@ -11,68 +12,110 @@
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.io.InputStream;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.concurrent.TimeUnit;

import static com.alibaba.fastjson2.util.JDKUtils.ARRAY_BYTE_BASE_OFFSET;
import static com.alibaba.fastjson2.util.JDKUtils.UNSAFE;

public class BytesAsciiCheck {
static byte[] bytes;
static char[] chars;
static String str;
static final MethodHandle INDEX_OF_CHAR;

static {
MethodHandle indexOfChar = null;
try {
try {
Class<?> cStringLatin1 = Class.forName("java.lang.StringLatin1");
MethodHandles.Lookup lookup = JDKUtils.trustedLookup(cStringLatin1);
indexOfChar = lookup.findStatic(
cStringLatin1,
"indexOfChar",
MethodType.methodType(int.class, byte[].class, int.class, int.class, int.class));
} catch (Throwable ignored) {
// ignore
}
} catch (Exception e) {
e.printStackTrace();
}

INDEX_OF_CHAR = indexOfChar;
try {
InputStream is = EishayParseBinaryArrayMapping.class.getClassLoader().getResourceAsStream("data/eishay.json");
String str = IOUtils.toString(is, "UTF-8");
str = IOUtils.toString(is, "UTF-8");
bytes = str.getBytes();
chars = str.toCharArray();
} catch (Exception e) {
e.printStackTrace();
}
}

@Benchmark
// @Benchmark
public void handler(Blackhole bh) throws Throwable {
bh.consume(
JDKUtils.METHOD_HANDLE_HAS_NEGATIVE.invoke(bytes, 0, bytes.length)
);
}

@Benchmark
// @Benchmark
public void lambda(Blackhole bh) throws Throwable {
bh.consume(
JDKUtils.PREDICATE_IS_ASCII.test(bytes)
);
}

@Benchmark
// @Benchmark
public void direct(Blackhole bh) throws Throwable {
bh.consume(hasNegatives(bytes, 0, bytes.length));
}

// @Benchmark
public void isASCII(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.isASCII(bytes, 0, bytes.length));
}

@Benchmark
public void direct8(Blackhole bh) throws Throwable {
bh.consume(hasNegatives_8(bytes, 0, bytes.length));
public void isLatin1(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.isLatin1(chars, 0, chars.length));
}

public static boolean hasNegatives(byte[] ba, int off, int len) {
for (int i = off; i < off + len; i++) {
if (ba[i] < 0) {
return true;
}
}
return false;
@Benchmark
public void isASCIIJDK(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.JDKUtils.PREDICATE_IS_ASCII.test(bytes));
}

public static boolean hasNegatives_8(byte[] bytes, int off, int len) {
int i = off;
while (i + 8 <= off + len) {
if ((UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + i) & 0x8080808080808080L) != 0) {
return true;
}
i += 8;
@Benchmark
public void indexOfSlash(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlash(bytes, 0, bytes.length));
}

@Benchmark
public void indexOfSlashV(Blackhole bh) throws Throwable {
bh.consume(com.alibaba.fastjson2.util.IOUtils.indexOfSlashV(bytes, 0, bytes.length));
}

@Benchmark
public void indexOfChar(Blackhole bh) throws Throwable {
bh.consume(indexOfChar(bytes, '\'', 0, bytes.length));
}

@Benchmark
public void indexOfString(Blackhole bh) throws Throwable {
bh.consume(str.indexOf('\\'));
}

private static int indexOfChar(byte[] bytes, int ch, int fromIndex, int toIndex) {
try {
return (int) INDEX_OF_CHAR.invokeExact(bytes, ch, fromIndex, toIndex);
} catch (Throwable ignored) {
throw new JSONException("");
}
}

for (; i < off + len; i++) {
if (bytes[i] < 0) {
public static boolean hasNegatives(byte[] ba, int off, int len) {
for (int i = off; i < off + len; i++) {
if (ba[i] < 0) {
return true;
}
}
@@ -85,6 +128,7 @@ public static void main(String[] args) throws Exception {
.mode(Mode.Throughput)
.timeUnit(TimeUnit.MILLISECONDS)
.warmupIterations(3)
.threads(1)
.forks(1)
.build();
new Runner(options).run();
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.alibaba.fastjson2.benchmark.wast;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.util.concurrent.TimeUnit;

public class IOUtilsBench {
static byte[] bytes;
static char[] chars;
static String str;

static {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < 1000; i++) {
buf.append(12345678);
}
str = buf.toString();
bytes = str.getBytes();
chars = str.toCharArray();
}

@Benchmark
public void digit4(Blackhole bh) throws Throwable {
for (int i = 0; i < 1000; i += 8) {
bh.consume(com.alibaba.fastjson2.util.IOUtils.digit4(bytes, 0));
}
}

public static void main(String[] args) throws Exception {
Options options = new OptionsBuilder()
.include(IOUtilsBench.class.getName())
.mode(Mode.Throughput)
.timeUnit(TimeUnit.MILLISECONDS)
.warmupIterations(3)
.threads(1)
.forks(1)
.build();
new Runner(options).run();
}
}
Original file line number Diff line number Diff line change
@@ -34,6 +34,19 @@ public static void lambda() throws Throwable {
}
}

public static void isASCII() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < LOOP_COUNT; ++i) {
benchmark.isASCII(BH);
}
long millis = System.currentTimeMillis() - start;
System.out.println("BytesAsciiCheck-isASCII : " + millis);

// zulu17.40.19 : 118
}
}

public static void direct() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
@@ -47,14 +60,14 @@ public static void direct() throws Throwable {
}
}

public static void direct8() throws Throwable {
public static void isLatin1() throws Throwable {
for (int j = 0; j < 5; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < LOOP_COUNT; ++i) {
benchmark.direct8(BH);
benchmark.isLatin1(BH);
}
long millis = System.currentTimeMillis() - start;
System.out.println("BytesAsciiCheck-direct8 : " + millis);
System.out.println("BytesAsciiCheck-isASCII_chars : " + millis);

// zulu17.40.19 : 478
}
@@ -75,6 +88,7 @@ public static void main(String[] args) throws Throwable {
// handler();
// lambda();
// direct();
// direct8();
isLatin1();
// isASCII();
}
}
Original file line number Diff line number Diff line change
@@ -477,7 +477,7 @@ private void writeString0(char[] chars, int coff, int strlen) {

off = this.off;
} else {
ascii = isASCII(chars, coff, strlen);
ascii = isLatin1(chars, coff, strlen);
}

int minCapacity = (ascii ? strlen : strlen * 3) + off + 6;
76 changes: 59 additions & 17 deletions core/src/main/java/com/alibaba/fastjson2/util/IOUtils.java
Original file line number Diff line number Diff line change
@@ -118,7 +118,7 @@ public class IOUtils {
DIGITS_K_64[i] = c0 + v;
}
ZERO_DOT_LATIN1 = UNSAFE.getShort(new byte[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET);
ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_BYTE_BASE_OFFSET);
ZERO_DOT_UTF16 = UNSAFE.getInt(new char[] {'0', '.'}, ARRAY_CHAR_BASE_OFFSET);
}

public static void writeDigitPair(byte[] buf, int charPos, int value) {
@@ -1622,20 +1622,45 @@ public static int digit1(byte[] bytes, int off) {
}

public static int indexOfQuote(byte[] value, int quote, int fromIndex, int max) {
if (INDEX_OF_CHAR_LATIN1 == null) {
return indexOfQuote0(value, quote, fromIndex, max);
}
try {
return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, quote, fromIndex, max);
} catch (Throwable e) {
throw new JSONException(e.getMessage());
}
}
static int indexOfQuote0(byte[] value, int quote, int fromIndex, int max) {
int i = fromIndex;
long address = ARRAY_BYTE_BASE_OFFSET + fromIndex;
int upperBound = fromIndex + ((max - fromIndex) & ~7);
long vectorQuote = quote == '\'' ? 0x2727_2727_2727_2727L : 0x2222_2222_2222_2222L;
while (i < upperBound && notContains(getLongLE(value, i), vectorQuote)) {
while (i < upperBound && notContains(UNSAFE.getLong(value, address), vectorQuote)) {
i += 8;
address += 8;
}
return indexOfChar0(value, quote, i, max);
}

public static int indexOfSlash(byte[] value, int fromIndex, int max) {
if (INDEX_OF_CHAR_LATIN1 == null) {
return indexOfSlashV(value, fromIndex, max);
}
try {
return (int) INDEX_OF_CHAR_LATIN1.invokeExact(value, (int) '\\', fromIndex, max);
} catch (Throwable e) {
throw new JSONException(e.getMessage());
}
}

public static int indexOfSlashV(byte[] value, int fromIndex, int max) {
int i = fromIndex;
long address = ARRAY_BYTE_BASE_OFFSET + fromIndex;
int upperBound = fromIndex + ((max - fromIndex) & ~7);
while (i < upperBound && notContains(getLongLE(value, i), 0x5C5C5C5C5C5C5C5CL)) {
while (i < upperBound && notContains(UNSAFE.getLong(value, address), 0x5C5C5C5C5C5C5C5CL)) {
i += 8;
address += 8;
}
return indexOfChar0(value, '\\', i, max);
}
@@ -1710,7 +1735,7 @@ public static int getIntUnaligned(byte[] bytes, int offset) {
}

public static int getIntUnaligned(char[] bytes, int offset) {
return UNSAFE.getInt(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1));
return UNSAFE.getInt(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1));
}

public static long getLongBE(byte[] bytes, int offset) {
@@ -1723,7 +1748,7 @@ public static long getLongUnaligned(byte[] bytes, int offset) {
}

public static long getLongUnaligned(char[] bytes, int offset) {
return UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1));
return UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1));
}

public static long getLongLE(byte[] bytes, int offset) {
@@ -1733,7 +1758,7 @@ public static long getLongLE(byte[] bytes, int offset) {

public static long getLongLE(char[] bytes, int offset) {
return convEndian(false,
UNSAFE.getLong(bytes, ARRAY_BYTE_BASE_OFFSET + ((long) offset << 1)));
UNSAFE.getLong(bytes, ARRAY_CHAR_BASE_OFFSET + ((long) offset << 1)));
}

public static short hex2(int i) {
@@ -1833,19 +1858,36 @@ static short convEndian(boolean big, short n) {
return big == BIG_ENDIAN ? n : Short.reverseBytes(n);
}

public static boolean isASCII(char[] chars, int coff, int strlen) {
int i = coff;
for (int upperBound = coff + (strlen & ~3); i < upperBound; i += 4) {
if ((getLongLE(chars, i) & 0xFF00FF00FF00FF00L) != 0) {
return false;
}
public static boolean isLatin1(char[] chars, int off, int len) {
int upperBound = off + (len & ~7);
int end = off + len;
long address = ARRAY_CHAR_BASE_OFFSET + off;
long value = 0;
while (off < upperBound) {
value |= UNSAFE.getLong(chars, address) | UNSAFE.getLong(chars, address + 8);
address += 16;
off += 8;
}
while (off++ < end) {
value |= UNSAFE.getShort(chars, address);
address += 2;
}
return (convEndian(false, value) & 0xFF00FF00FF00FF00L) == 0;
}

for (; i < strlen; ++i) {
if (chars[i] > 0x00FF) {
return false;
}
public static boolean isASCII(byte[] bytes, int off, int len) {
int upperBound = off + (len & ~7);
int end = off + len;
long address = ARRAY_BYTE_BASE_OFFSET + off;
long value = 0;
while (off < upperBound) {
value |= UNSAFE.getLong(bytes, address);
address += 8;
off += 8;
}
return true;
while (off < end) {
value |= bytes[off++];
}
return (value & 0x8080808080808080L) == 0;
}
}
33 changes: 18 additions & 15 deletions core/src/main/java/com/alibaba/fastjson2/util/JDKUtils.java
Original file line number Diff line number Diff line change
@@ -59,6 +59,7 @@ public class JDKUtils {

public static final MethodHandle METHOD_HANDLE_HAS_NEGATIVE;
public static final Predicate<byte[]> PREDICATE_IS_ASCII;
public static final MethodHandle INDEX_OF_CHAR_LATIN1;

static final MethodHandles.Lookup IMPL_LOOKUP;
static volatile MethodHandle CONSTRUCTOR_LOOKUP;
@@ -340,6 +341,21 @@ public class JDKUtils {
METHOD_HANDLE_HAS_NEGATIVE = handle;
}

MethodHandle indexOfCharLatin1 = null;
if (JVM_VERSION > 9) {
try {
Class<?> cStringLatin1 = Class.forName("java.lang.StringLatin1");
MethodHandles.Lookup lookup = trustedLookup(cStringLatin1);
indexOfCharLatin1 = lookup.findStatic(
cStringLatin1,
"indexOfChar",
MethodType.methodType(int.class, byte[].class, int.class, int.class, int.class));
} catch (Throwable ignored) {
// ignore
}
}
INDEX_OF_CHAR_LATIN1 = indexOfCharLatin1;

Boolean compact_strings = null;
try {
if (JVM_VERSION == 8) {
@@ -510,20 +526,7 @@ public static String latin1StringJDK8(byte[] bytes, int offset, int strlen) {
return STRING_CREATOR_JDK8.apply(chars, Boolean.TRUE);
}

public static boolean isASCII(byte[] chars) {
int i = 0;
int strlen = chars.length;
for (int upperBound = (strlen & ~7); i < upperBound; i += 8) {
if ((UNSAFE.getLong(chars, ARRAY_BYTE_BASE_OFFSET + i) & 0x8080808080808080L) != 0) {
return false;
}
}

for (; i < strlen; ++i) {
if (UNSAFE.getByte(chars, ARRAY_BYTE_BASE_OFFSET + i) < 0) {
return false;
}
}
return true;
static boolean isASCII(byte[] chars) {
return IOUtils.isASCII(chars, 0, chars.length);
}
}
Original file line number Diff line number Diff line change
@@ -427,7 +427,7 @@ public void convEndian() throws Throwable {
public void test_isASCII() {
char[] chars = new char[] {'0', '1', '2', '3', '4', '5', '6', 0x80};
long v = UNSAFE.getLong(chars, ARRAY_CHAR_BASE_OFFSET);
assertTrue(IOUtils.isASCII(chars, 0, 4));
assertTrue(IOUtils.isASCII(chars, 4, 4));
assertTrue(IOUtils.isLatin1(chars, 0, 4));
assertTrue(IOUtils.isLatin1(chars, 4, 4));
}
}
11 changes: 9 additions & 2 deletions core/src/test/java/com/alibaba/fastjson2/util/JDKUtilsTest.java
Original file line number Diff line number Diff line change
@@ -4,11 +4,11 @@
import org.junit.jupiter.api.Test;

import java.lang.invoke.*;
import java.util.Arrays;
import java.util.function.ToIntFunction;

import static com.alibaba.fastjson2.util.JDKUtils.*;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.*;

public class JDKUtilsTest {
@Test
@@ -135,6 +135,13 @@ public void lookup_int() throws Throwable {
assertNotNull(func);
}

@Test
public void test_isASCII() {
byte[] bytes = new byte[127];
Arrays.fill(bytes, (byte) 'a');
assertTrue(isASCII(bytes));
}

private static class PrivateBeanInt {
private byte coder;

24 changes: 10 additions & 14 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -62,28 +62,14 @@
</properties>

<modules>
<!--
<module>adapter</module>
-->
<module>benchmark</module>
<module>codegen</module>
<module>codegen-test</module>
<module>core</module>
<!--
<module>example-graalvm-native</module>
-->
<module>example-solon-test</module>
<module>example-spring-test</module>
<!--
<module>example-spring6-test</module>
-->
<module>extension</module>
<module>extension-jaxrs</module>
<module>extension-solon</module>
<module>extension-spring5</module>
<!--
<module>extension-spring6</module>
-->
<module>fastjson1-compatible</module>
<module>kotlin</module>
<module>safemode-test</module>
@@ -1033,6 +1019,16 @@
<module>test-jdk17</module>
</modules>
</profile>
<profile>
<id>enable-codegen</id>
<activation>
<jdk>(,22]</jdk>
</activation>
<modules>
<module>codegen</module>
<module>codegen-test</module>
</modules>
</profile>
<profile>
<id>deploy-settings</id>
<activation>