Skip to content

Commit a4e4e72

Browse files
eregonmsimacek
authored andcommitted
[GR-68916] Intern strings with a weak cache to deduplicate strings in the heap
* Make the cache static final because TpSlotVarargsBuiltin needs it and that's called from a static initializer
1 parent 210bf46 commit a4e4e72

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+641
-261
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
This changelog summarizes major changes between GraalVM versions of the Python
44
language runtime. The main focus is on user-observable behavior of the engine.
55

6+
## Version 25.1.0
7+
* Intern string literals in source files
8+
69
## Version 25.0.1
710
* Allow users to keep going on unsupported JDK/OS/ARCH combinations at their own risk by opting out of early failure using `-Dtruffle.UseFallbackRuntime=true`, `-Dpolyglot.engine.userResourceCache=/set/to/a/writeable/dir`, `-Dpolyglot.engine.allowUnsupportedPlatform=true`, and `-Dpolyglot.python.UnsupportedPlatformEmulates=[linux|macos|windows]` and `-Dorg.graalvm.python.resources.exclude=native.files`.
811

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/Python3Core.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import static com.oracle.graal.python.nodes.StringLiterals.T_GRAALPYTHON;
4848
import static com.oracle.graal.python.nodes.StringLiterals.T_JAVA;
4949
import static com.oracle.graal.python.nodes.StringLiterals.T_REF;
50+
import static com.oracle.graal.python.util.PythonUtils.toInternedTruffleStringUncached;
5051
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
5152
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
5253

@@ -1206,7 +1207,7 @@ private void initializeTypes() {
12061207
for (PythonBuiltins builtin : builtins) {
12071208
CoreFunctions annotation = builtin.getClass().getAnnotation(CoreFunctions.class);
12081209
if (annotation.defineModule().length() > 0) {
1209-
createModule(toTruffleStringUncached(annotation.defineModule()), builtin);
1210+
createModule(toInternedTruffleStringUncached(annotation.defineModule()), builtin);
12101211
}
12111212
}
12121213
// publish builtin types in the corresponding modules
@@ -1240,13 +1241,13 @@ private void populateBuiltins() {
12401241
builtin.initialize(this);
12411242
CoreFunctions annotation = builtin.getClass().getAnnotation(CoreFunctions.class);
12421243
if (annotation.defineModule().length() > 0) {
1243-
PythonModule module = builtinModules.get(toTruffleStringUncached(annotation.defineModule()));
1244+
PythonModule module = builtinModules.get(toInternedTruffleStringUncached(annotation.defineModule()));
12441245
if (module != null) {
12451246
addBuiltinsTo(module, builtin);
12461247
}
12471248
}
12481249
if (annotation.extendsModule().length() > 0) {
1249-
PythonModule module = builtinModules.get(toTruffleStringUncached(annotation.extendsModule()));
1250+
PythonModule module = builtinModules.get(toInternedTruffleStringUncached(annotation.extendsModule()));
12501251
if (module != null) {
12511252
addBuiltinsTo(module, builtin);
12521253
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/PythonBuiltinClassType.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import static com.oracle.graal.python.nodes.BuiltinNames.J__STRUCT;
7070
import static com.oracle.graal.python.nodes.BuiltinNames.J__THREAD;
7171
import static com.oracle.graal.python.nodes.BuiltinNames.J__TYPING;
72+
import static com.oracle.graal.python.util.PythonUtils.toInternedTruffleStringUncached;
7273
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
7374

7475
import java.lang.reflect.Field;
@@ -1545,10 +1546,10 @@ public TypeBuilder doc(String doc) {
15451546
private final TpSlots slots;
15461547

15471548
PythonBuiltinClassType(String name, PythonBuiltinClassType base, TypeBuilder builder) {
1548-
this.name = toTruffleStringUncached(name);
1549+
this.name = toInternedTruffleStringUncached(name);
15491550
this.base = base;
1550-
this.publishInModule = toTruffleStringUncached(builder.publishInModule);
1551-
this.moduleName = builder.moduleName != null ? toTruffleStringUncached(builder.moduleName) : null;
1551+
this.publishInModule = toInternedTruffleStringUncached(builder.publishInModule);
1552+
this.moduleName = builder.moduleName != null ? toInternedTruffleStringUncached(builder.moduleName) : null;
15521553
if (builder.moduleName != null && !J_BUILTINS.equals(builder.moduleName)) {
15531554
printName = toTruffleStringUncached(builder.moduleName + "." + name);
15541555
} else {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/PythonBuiltins.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import static com.oracle.graal.python.nodes.SpecialAttributeNames.T___DOC__;
2929
import static com.oracle.graal.python.nodes.truffle.TruffleStringMigrationHelpers.assertNoJavaString;
3030
import static com.oracle.graal.python.nodes.truffle.TruffleStringMigrationHelpers.ensureNoJavaString;
31+
import static com.oracle.graal.python.util.PythonUtils.toInternedTruffleStringUncached;
3132
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
3233

3334
import java.util.HashMap;
@@ -91,7 +92,7 @@ public void initialize(Python3Core core) {
9192
} else {
9293
declaresExplicitSelf = true;
9394
}
94-
TruffleString tsName = toTruffleStringUncached(builtin.name());
95+
TruffleString tsName = toInternedTruffleStringUncached(builtin.name());
9596
PythonLanguage language = core.getLanguage();
9697
RootCallTarget callTarget = language.initBuiltinCallTarget(l -> new BuiltinFunctionRootNode(l, builtin, factory, declaresExplicitSelf), factory.getNodeClass(),
9798
builtin.name());
@@ -111,7 +112,7 @@ public void initialize(Python3Core core) {
111112
} else if (builtin.isStaticmethod()) {
112113
callable = PFactory.createStaticmethodFromCallableObj(language, function);
113114
}
114-
builtinFunctions.put(toTruffleStringUncached(builtin.name()), callable);
115+
builtinFunctions.put(tsName, callable);
115116
});
116117
}
117118

@@ -170,7 +171,7 @@ public static int numDefaults(Builtin builtin) {
170171
* instead in {@link #postInitialize}.
171172
*/
172173
protected final void addBuiltinConstant(String name, Object value) {
173-
addBuiltinConstant(toTruffleStringUncached(name), value);
174+
addBuiltinConstant(toInternedTruffleStringUncached(name), value);
174175
}
175176

176177
/**

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MarshalModuleBuiltins.java

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@
9393
import com.oracle.graal.python.builtins.objects.ints.PInt;
9494
import com.oracle.graal.python.builtins.objects.set.PBaseSet;
9595
import com.oracle.graal.python.builtins.objects.str.PString;
96-
import com.oracle.graal.python.builtins.objects.str.StringNodes;
9796
import com.oracle.graal.python.builtins.objects.str.StringNodes.IsInternedStringNode;
9897
import com.oracle.graal.python.builtins.objects.type.TypeNodes.IsSameTypeNode;
9998
import com.oracle.graal.python.compiler.BytecodeCodeUnit;
@@ -112,6 +111,7 @@
112111
import com.oracle.graal.python.lib.PyUnicodeCheckExactNode;
113112
import com.oracle.graal.python.nodes.ErrorMessages;
114113
import com.oracle.graal.python.nodes.PRaiseNode;
114+
import com.oracle.graal.python.nodes.StringLiterals;
115115
import com.oracle.graal.python.nodes.bytecode_dsl.BytecodeDSLCodeUnit;
116116
import com.oracle.graal.python.nodes.bytecode_dsl.PBytecodeDSLRootNode;
117117
import com.oracle.graal.python.nodes.bytecode_dsl.PBytecodeDSLRootNodeGen;
@@ -817,16 +817,17 @@ private void writeComplexObject(Object v, int flag) {
817817
} else if (isJavaString(v)) {
818818
writeByte(TYPE_UNICODE | flag);
819819
writeString(TruffleString.fromJavaStringUncached((String) v, TS_ENCODING));
820-
} else if (v instanceof TruffleString) {
821-
writeByte(TYPE_UNICODE | flag);
822-
writeString((TruffleString) v);
823820
} else if (PyUnicodeCheckExactNode.executeUncached(v)) {
824-
if (version >= 3 && IsInternedStringNode.executeUncached((PString) v)) {
821+
if (version >= 3 && IsInternedStringNode.executeUncached(v)) {
825822
writeByte(TYPE_INTERNED | flag);
826823
} else {
827824
writeByte(TYPE_UNICODE | flag);
828825
}
829-
writeString(((PString) v).getValueUncached());
826+
if (v instanceof PString pstring) {
827+
writeString(pstring.getValueUncached());
828+
} else {
829+
writeString((TruffleString) v);
830+
}
830831
} else if (PyTupleCheckExactNode.executeUncached(v)) {
831832
Object[] items = GetObjectArrayNode.executeUncached(v);
832833
if (version >= 4 && items.length < 256) {
@@ -1082,9 +1083,9 @@ private Object readObject(int type, AddRefAndReturn addRef) throws NumberFormatE
10821083
case TYPE_SHORT_ASCII:
10831084
return addRef.run(readAscii(readByteSize(), false));
10841085
case TYPE_INTERNED:
1085-
return addRef.run(StringNodes.InternStringNode.executeUncached(readString()));
1086+
return addRef.run(readString(true));
10861087
case TYPE_UNICODE:
1087-
return addRef.run(readString());
1088+
return addRef.run(readString(false));
10881089
case TYPE_SMALL_TUPLE:
10891090
int smallTupleSize = readByteSize();
10901091
Object[] smallTupleItems = new Object[smallTupleSize];
@@ -1162,9 +1163,18 @@ private void writeString(TruffleString v) {
11621163
writeBytes(ba.getArray(), ba.getOffset(), ba.getLength());
11631164
}
11641165

1165-
private TruffleString readString() {
1166+
private TruffleString readString(boolean intern) {
11661167
int sz = readInt();
1167-
return TruffleString.fromByteArrayUncached(readNBytes(sz), 0, sz, Encoding.UTF_8, true).switchEncodingUncached(TS_ENCODING, TranscodingErrorHandler.DEFAULT_KEEP_SURROGATES_IN_UTF8);
1168+
if (sz == 0) {
1169+
return StringLiterals.T_EMPTY_STRING;
1170+
}
1171+
var utf8String = TruffleString.fromByteArrayUncached(readNBytes(sz), 0, sz, Encoding.UTF_8, true);
1172+
var value = utf8String.switchEncodingUncached(TS_ENCODING, TranscodingErrorHandler.DEFAULT_KEEP_SURROGATES_IN_UTF8);
1173+
if (intern) {
1174+
return PythonUtils.internString(value);
1175+
} else {
1176+
return value;
1177+
}
11681178
}
11691179

11701180
private void writeShortString(String v) throws IOException {
@@ -1174,6 +1184,7 @@ private void writeShortString(String v) throws IOException {
11741184
out.write(bytes);
11751185
}
11761186

1187+
// Only used by readDoubleString() so no interning
11771188
private TruffleString readShortString() {
11781189
int sz = readByteSize();
11791190
byte[] bytes = readNBytes(sz);
@@ -1184,7 +1195,7 @@ private Object readAscii(long sz, boolean intern) {
11841195
byte[] bytes = readNBytes((int) sz);
11851196
TruffleString value = TruffleString.fromByteArrayUncached(bytes, 0, (int) sz, Encoding.US_ASCII, true).switchEncodingUncached(TS_ENCODING);
11861197
if (intern) {
1187-
return StringNodes.InternStringNode.executeUncached(value);
1198+
return PythonUtils.internString(value);
11881199
} else {
11891200
return value;
11901201
}
@@ -1216,7 +1227,7 @@ private Object readJavaArray() {
12161227
case ARRAY_TYPE_BOOLEAN:
12171228
return readBooleanArray();
12181229
case ARRAY_TYPE_STRING:
1219-
return readStringArray();
1230+
return readStringArray(false);
12201231
case ARRAY_TYPE_OBJECT:
12211232
return readObjectArray();
12221233
default:
@@ -1284,14 +1295,14 @@ private boolean[] readBooleanArray() {
12841295
return a;
12851296
}
12861297

1287-
private TruffleString[] readStringArray() {
1298+
private TruffleString[] readStringArray(boolean intern) {
12881299
int length = readInt();
12891300
if (length == 0) {
12901301
return EMPTY_TRUFFLESTRING_ARRAY;
12911302
}
12921303
TruffleString[] a = new TruffleString[length];
12931304
for (int i = 0; i < length; i++) {
1294-
a[i] = readString();
1305+
a[i] = readString(intern);
12951306
}
12961307
return a;
12971308
}
@@ -1346,19 +1357,19 @@ private BytecodeCodeUnit readBytecodeCodeUnit() {
13461357
if (fileVersion != Compiler.BYTECODE_VERSION) {
13471358
throw new MarshalError(ValueError, ErrorMessages.BYTECODE_VERSION_MISMATCH, Compiler.BYTECODE_VERSION, fileVersion);
13481359
}
1349-
TruffleString name = readString();
1350-
TruffleString qualname = readString();
1360+
TruffleString name = readString(true);
1361+
TruffleString qualname = readString(true);
13511362
int argCount = readInt();
13521363
int kwOnlyArgCount = readInt();
13531364
int positionalOnlyArgCount = readInt();
13541365
int stacksize = readInt();
13551366
byte[] code = readBytes();
13561367
byte[] srcOffsetTable = readBytes();
13571368
int flags = readInt();
1358-
TruffleString[] names = readStringArray();
1359-
TruffleString[] varnames = readStringArray();
1360-
TruffleString[] cellvars = readStringArray();
1361-
TruffleString[] freevars = readStringArray();
1369+
TruffleString[] names = readStringArray(true);
1370+
TruffleString[] varnames = readStringArray(true);
1371+
TruffleString[] cellvars = readStringArray(true);
1372+
TruffleString[] freevars = readStringArray(true);
13621373
int[] cell2arg = readIntArray();
13631374
if (cell2arg.length == 0) {
13641375
cell2arg = null;
@@ -1391,16 +1402,16 @@ private BytecodeDSLCodeUnit readBytecodeDSLCodeUnit() {
13911402
}
13921403

13931404
byte[] serialized = readBytes();
1394-
TruffleString name = readString();
1395-
TruffleString qualname = readString();
1405+
TruffleString name = readString(true);
1406+
TruffleString qualname = readString(true);
13961407
int argCount = readInt();
13971408
int kwOnlyArgCount = readInt();
13981409
int positionalOnlyArgCount = readInt();
13991410
int flags = readInt();
1400-
TruffleString[] names = readStringArray();
1401-
TruffleString[] varnames = readStringArray();
1402-
TruffleString[] cellvars = readStringArray();
1403-
TruffleString[] freevars = readStringArray();
1411+
TruffleString[] names = readStringArray(true);
1412+
TruffleString[] varnames = readStringArray(true);
1413+
TruffleString[] cellvars = readStringArray(true);
1414+
TruffleString[] freevars = readStringArray(true);
14041415
int[] cell2arg = readIntArray();
14051416
if (cell2arg.length == 0) {
14061417
cell2arg = null;
@@ -1492,7 +1503,7 @@ private void writeBytecodeDSLCodeUnit(BytecodeDSLCodeUnit code) throws IOExcepti
14921503
}
14931504

14941505
private PCode readCode() {
1495-
TruffleString fileName = readString();
1506+
TruffleString fileName = readString(true);
14961507
int flags = readInt();
14971508

14981509
int codeLen = readSize();

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/PosixModuleBuiltins.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
3939
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError;
4040
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
41+
import static com.oracle.graal.python.util.PythonUtils.tsInternedLiteral;
4142
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
4243

4344
import java.lang.management.ManagementFactory;
@@ -295,7 +296,7 @@ public void initialize(Python3Core core) {
295296
posix.setAttribute(PythonBuiltinClassType.PStatvfsResult.getName(), core.lookupType(PythonBuiltinClassType.PStatvfsResult));
296297
posix.setAttribute(PythonBuiltinClassType.PTerminalSize.getName(), core.lookupType(PythonBuiltinClassType.PTerminalSize));
297298

298-
posix.setAttribute(tsLiteral("error"), core.lookupType(PythonBuiltinClassType.OSError));
299+
posix.setAttribute(tsInternedLiteral("error"), core.lookupType(PythonBuiltinClassType.OSError));
299300
}
300301

301302
@Override

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/StructModuleBuiltins.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import static com.oracle.graal.python.nodes.BuiltinNames.T__STRUCT;
1010
import static com.oracle.graal.python.runtime.exception.PythonErrorType.StructError;
1111
import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING;
12-
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
12+
import static com.oracle.graal.python.util.PythonUtils.tsInternedLiteral;
1313

1414
import java.util.Arrays;
1515
import java.util.List;
@@ -51,7 +51,7 @@
5151
@CoreFunctions(defineModule = J__STRUCT, isEager = true)
5252
public class StructModuleBuiltins extends PythonBuiltins {
5353
private static final int DEFAULT_CACHE_SIZE = 100;
54-
private static final TruffleString T_ERROR = tsLiteral("error");
54+
private static final TruffleString T_ERROR = tsInternedLiteral("error");
5555
private final LRUStructCache cache = new LRUStructCache(DEFAULT_CACHE_SIZE);
5656

5757
static class LRUStructCache extends LRUCache<Object, PStruct> {

0 commit comments

Comments
 (0)