Skip to content

Commit 06d253f

Browse files
eregonmsimacek
authored andcommitted
[GR-68916] Intern strings with a weak cache to deduplicate strings in the heap
PullRequest: graalpython/4020
2 parents 678b9c5 + a4e4e72 commit 06d253f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+646
-272
lines changed

.github/workflows/platforms-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
echo "Using $JAVA_HOME"
3434
$JAVA_HOME/bin/java -version
3535
mkdir $(pwd)/user_resource_cache
36-
mvn -f graalpython/com.oracle.graal.python.test.integration/pom.xml -Dcom.oracle.graal.python.test.polyglot.version=26.0.0 -Dcom.oracle.graal.python.test.polyglot_repo=file:///$(pwd)/m2repo --batch-mode -U -Dtruffle.UseFallbackRuntime=true -Dpolyglot.engine.allowUnsupportedPlatform=true -Dpolyglot.engine.userResourceCache=/$(pwd)/user_resource_cache -Dpolyglot.python.UnsupportedPlatformEmulates=linux -Dorg.graalvm.python.resources.exclude=native.files test -Dtest=HelloWorldTests,AttributeTests,BuiltinSubclassTest,ComplexTexts,CreateClassTest,AsyncActionThreadingTest,JavaInteropTest
36+
mvn -f graalpython/com.oracle.graal.python.test.integration/pom.xml -Dcom.oracle.graal.python.test.polyglot.version=25.1.0 -Dcom.oracle.graal.python.test.polyglot_repo=file:///$(pwd)/m2repo --batch-mode -U -Dtruffle.UseFallbackRuntime=true -Dpolyglot.engine.allowUnsupportedPlatform=true -Dpolyglot.engine.userResourceCache=/$(pwd)/user_resource_cache -Dpolyglot.python.UnsupportedPlatformEmulates=linux -Dorg.graalvm.python.resources.exclude=native.files test -Dtest=HelloWorldTests,AttributeTests,BuiltinSubclassTest,ComplexTexts,CreateClassTest,AsyncActionThreadingTest,JavaInteropTest
3737
rm -rf $(pwd)/user_resource_cache
3838
3939
- uses: docker/setup-qemu-action@v3
@@ -53,6 +53,6 @@ jobs:
5353
echo 'Using \$JAVA_HOME'
5454
\$JAVA_HOME/bin/java -version
5555
mkdir \$(pwd)/user_resource_cache
56-
mvn -f graalpython/com.oracle.graal.python.test.integration/pom.xml -Dcom.oracle.graal.python.test.polyglot.version=26.0.0 -Dcom.oracle.graal.python.test.polyglot_repo=file:///\$(pwd)/m2repo --batch-mode -U -Dtruffle.UseFallbackRuntime=true -Dpolyglot.engine.allowUnsupportedPlatform=true -Dpolyglot.engine.userResourceCache=/\$(pwd)/user_resource_cache -Dpolyglot.python.UnsupportedPlatformEmulates=linux -Dorg.graalvm.python.resources.exclude=native.files test -Dtest=HelloWorldTests,AttributeTests,BuiltinSubclassTest,ComplexTexts,CreateClassTest,AsyncActionThreadingTest,JavaInteropTest
56+
mvn -f graalpython/com.oracle.graal.python.test.integration/pom.xml -Dcom.oracle.graal.python.test.polyglot.version=25.1.0 -Dcom.oracle.graal.python.test.polyglot_repo=file:///\$(pwd)/m2repo --batch-mode -U -Dtruffle.UseFallbackRuntime=true -Dpolyglot.engine.allowUnsupportedPlatform=true -Dpolyglot.engine.userResourceCache=/\$(pwd)/user_resource_cache -Dpolyglot.python.UnsupportedPlatformEmulates=linux -Dorg.graalvm.python.resources.exclude=native.files test -Dtest=HelloWorldTests,AttributeTests,BuiltinSubclassTest,ComplexTexts,CreateClassTest,AsyncActionThreadingTest,JavaInteropTest
5757
rm -rf \$(pwd)/user_resource_cache
5858
"

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
This changelog summarizes major changes between GraalVM versions of the Python
44
language runtime. The main focus is on user-observable behavior of the engine.
55

6+
## Version 25.1.0
7+
* Intern string literals in source files
8+
69
## Version 25.0.1
710
* Allow users to keep going on unsupported JDK/OS/ARCH combinations at their own risk by opting out of early failure using `-Dtruffle.UseFallbackRuntime=true`, `-Dpolyglot.engine.userResourceCache=/set/to/a/writeable/dir`, `-Dpolyglot.engine.allowUnsupportedPlatform=true`, and `-Dpolyglot.python.UnsupportedPlatformEmulates=[linux|macos|windows]` and `-Dorg.graalvm.python.resources.exclude=native.files`.
811

ci/python-bench.libsonnet

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@
241241
name: "bisect-benchmark",
242242
targets: ['bench'],
243243
logs +: logs(self.os, self.arch),
244+
deploysArtifacts: true,
244245
packages +: packages(self.os, self.arch) + {
245246
"apache/ant": ">=1.9.4",
246247
libyaml: "==0.2.5",

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_unicodedata.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,7 @@ test.test_unicodedata.UnicodeFunctionsTest.test_east_asian_width_unassigned @ da
1212
test.test_unicodedata.UnicodeFunctionsTest.test_issue10254 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1313
test.test_unicodedata.UnicodeFunctionsTest.test_issue29456 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1414
test.test_unicodedata.UnicodeFunctionsTest.test_name_inverse_lookup @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
15-
test.test_unicodedata.UnicodeFunctionsTest.test_numeric @ darwin-x86_64
1615
test.test_unicodedata.UnicodeFunctionsTest.test_pr29 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1716
test.test_unicodedata.UnicodeMiscTest.test_bug_1704793 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1817
test.test_unicodedata.UnicodeMiscTest.test_bug_4971 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1918
test.test_unicodedata.UnicodeMiscTest.test_bug_5828 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
20-
test.test_unicodedata.UnicodeMiscTest.test_decimal_numeric_consistent @ darwin-x86_64
21-
test.test_unicodedata.UnicodeMiscTest.test_digit_numeric_consistent @ darwin-x86_64
22-
test.test_unicodedata.UnicodeMiscTest.test_ucd_510 @ darwin-x86_64

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/Python3Core.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import static com.oracle.graal.python.nodes.StringLiterals.T_GRAALPYTHON;
4848
import static com.oracle.graal.python.nodes.StringLiterals.T_JAVA;
4949
import static com.oracle.graal.python.nodes.StringLiterals.T_REF;
50+
import static com.oracle.graal.python.util.PythonUtils.toInternedTruffleStringUncached;
5051
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
5152
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
5253

@@ -1206,7 +1207,7 @@ private void initializeTypes() {
12061207
for (PythonBuiltins builtin : builtins) {
12071208
CoreFunctions annotation = builtin.getClass().getAnnotation(CoreFunctions.class);
12081209
if (annotation.defineModule().length() > 0) {
1209-
createModule(toTruffleStringUncached(annotation.defineModule()), builtin);
1210+
createModule(toInternedTruffleStringUncached(annotation.defineModule()), builtin);
12101211
}
12111212
}
12121213
// publish builtin types in the corresponding modules
@@ -1240,13 +1241,13 @@ private void populateBuiltins() {
12401241
builtin.initialize(this);
12411242
CoreFunctions annotation = builtin.getClass().getAnnotation(CoreFunctions.class);
12421243
if (annotation.defineModule().length() > 0) {
1243-
PythonModule module = builtinModules.get(toTruffleStringUncached(annotation.defineModule()));
1244+
PythonModule module = builtinModules.get(toInternedTruffleStringUncached(annotation.defineModule()));
12441245
if (module != null) {
12451246
addBuiltinsTo(module, builtin);
12461247
}
12471248
}
12481249
if (annotation.extendsModule().length() > 0) {
1249-
PythonModule module = builtinModules.get(toTruffleStringUncached(annotation.extendsModule()));
1250+
PythonModule module = builtinModules.get(toInternedTruffleStringUncached(annotation.extendsModule()));
12501251
if (module != null) {
12511252
addBuiltinsTo(module, builtin);
12521253
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/PythonBuiltinClassType.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import static com.oracle.graal.python.nodes.BuiltinNames.J__STRUCT;
7070
import static com.oracle.graal.python.nodes.BuiltinNames.J__THREAD;
7171
import static com.oracle.graal.python.nodes.BuiltinNames.J__TYPING;
72+
import static com.oracle.graal.python.util.PythonUtils.toInternedTruffleStringUncached;
7273
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
7374

7475
import java.lang.reflect.Field;
@@ -1545,10 +1546,10 @@ public TypeBuilder doc(String doc) {
15451546
private final TpSlots slots;
15461547

15471548
PythonBuiltinClassType(String name, PythonBuiltinClassType base, TypeBuilder builder) {
1548-
this.name = toTruffleStringUncached(name);
1549+
this.name = toInternedTruffleStringUncached(name);
15491550
this.base = base;
1550-
this.publishInModule = toTruffleStringUncached(builder.publishInModule);
1551-
this.moduleName = builder.moduleName != null ? toTruffleStringUncached(builder.moduleName) : null;
1551+
this.publishInModule = toInternedTruffleStringUncached(builder.publishInModule);
1552+
this.moduleName = builder.moduleName != null ? toInternedTruffleStringUncached(builder.moduleName) : null;
15521553
if (builder.moduleName != null && !J_BUILTINS.equals(builder.moduleName)) {
15531554
printName = toTruffleStringUncached(builder.moduleName + "." + name);
15541555
} else {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/PythonBuiltins.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import static com.oracle.graal.python.nodes.SpecialAttributeNames.T___DOC__;
2929
import static com.oracle.graal.python.nodes.truffle.TruffleStringMigrationHelpers.assertNoJavaString;
3030
import static com.oracle.graal.python.nodes.truffle.TruffleStringMigrationHelpers.ensureNoJavaString;
31+
import static com.oracle.graal.python.util.PythonUtils.toInternedTruffleStringUncached;
3132
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
3233

3334
import java.util.HashMap;
@@ -91,7 +92,7 @@ public void initialize(Python3Core core) {
9192
} else {
9293
declaresExplicitSelf = true;
9394
}
94-
TruffleString tsName = toTruffleStringUncached(builtin.name());
95+
TruffleString tsName = toInternedTruffleStringUncached(builtin.name());
9596
PythonLanguage language = core.getLanguage();
9697
RootCallTarget callTarget = language.initBuiltinCallTarget(l -> new BuiltinFunctionRootNode(l, builtin, factory, declaresExplicitSelf), factory.getNodeClass(),
9798
builtin.name());
@@ -111,7 +112,7 @@ public void initialize(Python3Core core) {
111112
} else if (builtin.isStaticmethod()) {
112113
callable = PFactory.createStaticmethodFromCallableObj(language, function);
113114
}
114-
builtinFunctions.put(toTruffleStringUncached(builtin.name()), callable);
115+
builtinFunctions.put(tsName, callable);
115116
});
116117
}
117118

@@ -170,7 +171,7 @@ public static int numDefaults(Builtin builtin) {
170171
* instead in {@link #postInitialize}.
171172
*/
172173
protected final void addBuiltinConstant(String name, Object value) {
173-
addBuiltinConstant(toTruffleStringUncached(name), value);
174+
addBuiltinConstant(toInternedTruffleStringUncached(name), value);
174175
}
175176

176177
/**

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MarshalModuleBuiltins.java

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@
9393
import com.oracle.graal.python.builtins.objects.ints.PInt;
9494
import com.oracle.graal.python.builtins.objects.set.PBaseSet;
9595
import com.oracle.graal.python.builtins.objects.str.PString;
96-
import com.oracle.graal.python.builtins.objects.str.StringNodes;
9796
import com.oracle.graal.python.builtins.objects.str.StringNodes.IsInternedStringNode;
9897
import com.oracle.graal.python.builtins.objects.type.TypeNodes.IsSameTypeNode;
9998
import com.oracle.graal.python.compiler.BytecodeCodeUnit;
@@ -112,6 +111,7 @@
112111
import com.oracle.graal.python.lib.PyUnicodeCheckExactNode;
113112
import com.oracle.graal.python.nodes.ErrorMessages;
114113
import com.oracle.graal.python.nodes.PRaiseNode;
114+
import com.oracle.graal.python.nodes.StringLiterals;
115115
import com.oracle.graal.python.nodes.bytecode_dsl.BytecodeDSLCodeUnit;
116116
import com.oracle.graal.python.nodes.bytecode_dsl.PBytecodeDSLRootNode;
117117
import com.oracle.graal.python.nodes.bytecode_dsl.PBytecodeDSLRootNodeGen;
@@ -817,16 +817,17 @@ private void writeComplexObject(Object v, int flag) {
817817
} else if (isJavaString(v)) {
818818
writeByte(TYPE_UNICODE | flag);
819819
writeString(TruffleString.fromJavaStringUncached((String) v, TS_ENCODING));
820-
} else if (v instanceof TruffleString) {
821-
writeByte(TYPE_UNICODE | flag);
822-
writeString((TruffleString) v);
823820
} else if (PyUnicodeCheckExactNode.executeUncached(v)) {
824-
if (version >= 3 && IsInternedStringNode.executeUncached((PString) v)) {
821+
if (version >= 3 && IsInternedStringNode.executeUncached(v)) {
825822
writeByte(TYPE_INTERNED | flag);
826823
} else {
827824
writeByte(TYPE_UNICODE | flag);
828825
}
829-
writeString(((PString) v).getValueUncached());
826+
if (v instanceof PString pstring) {
827+
writeString(pstring.getValueUncached());
828+
} else {
829+
writeString((TruffleString) v);
830+
}
830831
} else if (PyTupleCheckExactNode.executeUncached(v)) {
831832
Object[] items = GetObjectArrayNode.executeUncached(v);
832833
if (version >= 4 && items.length < 256) {
@@ -1082,9 +1083,9 @@ private Object readObject(int type, AddRefAndReturn addRef) throws NumberFormatE
10821083
case TYPE_SHORT_ASCII:
10831084
return addRef.run(readAscii(readByteSize(), false));
10841085
case TYPE_INTERNED:
1085-
return addRef.run(StringNodes.InternStringNode.executeUncached(readString()));
1086+
return addRef.run(readString(true));
10861087
case TYPE_UNICODE:
1087-
return addRef.run(readString());
1088+
return addRef.run(readString(false));
10881089
case TYPE_SMALL_TUPLE:
10891090
int smallTupleSize = readByteSize();
10901091
Object[] smallTupleItems = new Object[smallTupleSize];
@@ -1162,9 +1163,18 @@ private void writeString(TruffleString v) {
11621163
writeBytes(ba.getArray(), ba.getOffset(), ba.getLength());
11631164
}
11641165

1165-
private TruffleString readString() {
1166+
private TruffleString readString(boolean intern) {
11661167
int sz = readInt();
1167-
return TruffleString.fromByteArrayUncached(readNBytes(sz), 0, sz, Encoding.UTF_8, true).switchEncodingUncached(TS_ENCODING, TranscodingErrorHandler.DEFAULT_KEEP_SURROGATES_IN_UTF8);
1168+
if (sz == 0) {
1169+
return StringLiterals.T_EMPTY_STRING;
1170+
}
1171+
var utf8String = TruffleString.fromByteArrayUncached(readNBytes(sz), 0, sz, Encoding.UTF_8, true);
1172+
var value = utf8String.switchEncodingUncached(TS_ENCODING, TranscodingErrorHandler.DEFAULT_KEEP_SURROGATES_IN_UTF8);
1173+
if (intern) {
1174+
return PythonUtils.internString(value);
1175+
} else {
1176+
return value;
1177+
}
11681178
}
11691179

11701180
private void writeShortString(String v) throws IOException {
@@ -1174,6 +1184,7 @@ private void writeShortString(String v) throws IOException {
11741184
out.write(bytes);
11751185
}
11761186

1187+
// Only used by readDoubleString() so no interning
11771188
private TruffleString readShortString() {
11781189
int sz = readByteSize();
11791190
byte[] bytes = readNBytes(sz);
@@ -1184,7 +1195,7 @@ private Object readAscii(long sz, boolean intern) {
11841195
byte[] bytes = readNBytes((int) sz);
11851196
TruffleString value = TruffleString.fromByteArrayUncached(bytes, 0, (int) sz, Encoding.US_ASCII, true).switchEncodingUncached(TS_ENCODING);
11861197
if (intern) {
1187-
return StringNodes.InternStringNode.executeUncached(value);
1198+
return PythonUtils.internString(value);
11881199
} else {
11891200
return value;
11901201
}
@@ -1216,7 +1227,7 @@ private Object readJavaArray() {
12161227
case ARRAY_TYPE_BOOLEAN:
12171228
return readBooleanArray();
12181229
case ARRAY_TYPE_STRING:
1219-
return readStringArray();
1230+
return readStringArray(false);
12201231
case ARRAY_TYPE_OBJECT:
12211232
return readObjectArray();
12221233
default:
@@ -1284,14 +1295,14 @@ private boolean[] readBooleanArray() {
12841295
return a;
12851296
}
12861297

1287-
private TruffleString[] readStringArray() {
1298+
private TruffleString[] readStringArray(boolean intern) {
12881299
int length = readInt();
12891300
if (length == 0) {
12901301
return EMPTY_TRUFFLESTRING_ARRAY;
12911302
}
12921303
TruffleString[] a = new TruffleString[length];
12931304
for (int i = 0; i < length; i++) {
1294-
a[i] = readString();
1305+
a[i] = readString(intern);
12951306
}
12961307
return a;
12971308
}
@@ -1346,19 +1357,19 @@ private BytecodeCodeUnit readBytecodeCodeUnit() {
13461357
if (fileVersion != Compiler.BYTECODE_VERSION) {
13471358
throw new MarshalError(ValueError, ErrorMessages.BYTECODE_VERSION_MISMATCH, Compiler.BYTECODE_VERSION, fileVersion);
13481359
}
1349-
TruffleString name = readString();
1350-
TruffleString qualname = readString();
1360+
TruffleString name = readString(true);
1361+
TruffleString qualname = readString(true);
13511362
int argCount = readInt();
13521363
int kwOnlyArgCount = readInt();
13531364
int positionalOnlyArgCount = readInt();
13541365
int stacksize = readInt();
13551366
byte[] code = readBytes();
13561367
byte[] srcOffsetTable = readBytes();
13571368
int flags = readInt();
1358-
TruffleString[] names = readStringArray();
1359-
TruffleString[] varnames = readStringArray();
1360-
TruffleString[] cellvars = readStringArray();
1361-
TruffleString[] freevars = readStringArray();
1369+
TruffleString[] names = readStringArray(true);
1370+
TruffleString[] varnames = readStringArray(true);
1371+
TruffleString[] cellvars = readStringArray(true);
1372+
TruffleString[] freevars = readStringArray(true);
13621373
int[] cell2arg = readIntArray();
13631374
if (cell2arg.length == 0) {
13641375
cell2arg = null;
@@ -1391,16 +1402,16 @@ private BytecodeDSLCodeUnit readBytecodeDSLCodeUnit() {
13911402
}
13921403

13931404
byte[] serialized = readBytes();
1394-
TruffleString name = readString();
1395-
TruffleString qualname = readString();
1405+
TruffleString name = readString(true);
1406+
TruffleString qualname = readString(true);
13961407
int argCount = readInt();
13971408
int kwOnlyArgCount = readInt();
13981409
int positionalOnlyArgCount = readInt();
13991410
int flags = readInt();
1400-
TruffleString[] names = readStringArray();
1401-
TruffleString[] varnames = readStringArray();
1402-
TruffleString[] cellvars = readStringArray();
1403-
TruffleString[] freevars = readStringArray();
1411+
TruffleString[] names = readStringArray(true);
1412+
TruffleString[] varnames = readStringArray(true);
1413+
TruffleString[] cellvars = readStringArray(true);
1414+
TruffleString[] freevars = readStringArray(true);
14041415
int[] cell2arg = readIntArray();
14051416
if (cell2arg.length == 0) {
14061417
cell2arg = null;
@@ -1492,7 +1503,7 @@ private void writeBytecodeDSLCodeUnit(BytecodeDSLCodeUnit code) throws IOExcepti
14921503
}
14931504

14941505
private PCode readCode() {
1495-
TruffleString fileName = readString();
1506+
TruffleString fileName = readString(true);
14961507
int flags = readInt();
14971508

14981509
int codeLen = readSize();

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/PosixModuleBuiltins.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
3939
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError;
4040
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
41+
import static com.oracle.graal.python.util.PythonUtils.tsInternedLiteral;
4142
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
4243

4344
import java.lang.management.ManagementFactory;
@@ -295,7 +296,7 @@ public void initialize(Python3Core core) {
295296
posix.setAttribute(PythonBuiltinClassType.PStatvfsResult.getName(), core.lookupType(PythonBuiltinClassType.PStatvfsResult));
296297
posix.setAttribute(PythonBuiltinClassType.PTerminalSize.getName(), core.lookupType(PythonBuiltinClassType.PTerminalSize));
297298

298-
posix.setAttribute(tsLiteral("error"), core.lookupType(PythonBuiltinClassType.OSError));
299+
posix.setAttribute(tsInternedLiteral("error"), core.lookupType(PythonBuiltinClassType.OSError));
299300
}
300301

301302
@Override

0 commit comments

Comments
 (0)