Skip to content

Commit e8435b2

Browse files
committed
gh-139156: Use PyBytesWriter in UTF-32 encoder
Replace PyBytes_FromStringAndSize() and _PyBytes_Resize() with the PyBytesWriter API.
1 parent e3d9bd6 commit e8435b2

File tree

1 file changed

+36
-44
lines changed

1 file changed

+36
-44
lines changed

Objects/unicodeobject.c

Lines changed: 36 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6089,59 +6089,55 @@ _PyUnicode_EncodeUTF32(PyObject *str,
60896089
const char *errors,
60906090
int byteorder)
60916091
{
6092-
int kind;
6093-
const void *data;
6094-
Py_ssize_t len;
6095-
PyObject *v;
6096-
uint32_t *out;
6097-
#if PY_LITTLE_ENDIAN
6098-
int native_ordering = byteorder <= 0;
6099-
#else
6100-
int native_ordering = byteorder >= 0;
6101-
#endif
6102-
const char *encoding;
6103-
Py_ssize_t nsize, pos;
6104-
PyObject *errorHandler = NULL;
6105-
PyObject *exc = NULL;
6106-
PyObject *rep = NULL;
6107-
61086092
if (!PyUnicode_Check(str)) {
61096093
PyErr_BadArgument();
61106094
return NULL;
61116095
}
6112-
kind = PyUnicode_KIND(str);
6113-
data = PyUnicode_DATA(str);
6114-
len = PyUnicode_GET_LENGTH(str);
6096+
int kind = PyUnicode_KIND(str);
6097+
const void *data = PyUnicode_DATA(str);
6098+
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
61156099

61166100
if (len > PY_SSIZE_T_MAX / 4 - (byteorder == 0))
61176101
return PyErr_NoMemory();
6118-
nsize = len + (byteorder == 0);
6119-
v = PyBytes_FromStringAndSize(NULL, nsize * 4);
6120-
if (v == NULL)
6102+
Py_ssize_t nsize = len + (byteorder == 0);
6103+
PyBytesWriter *writer = PyBytesWriter_Create(nsize * 4);
6104+
if (writer == NULL) {
61216105
return NULL;
6106+
}
61226107

61236108
/* output buffer is 4-bytes aligned */
6124-
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 4));
6125-
out = (uint32_t *)PyBytes_AS_STRING(v);
6126-
if (byteorder == 0)
6109+
assert(_Py_IS_ALIGNED(PyBytesWriter_GetData(writer), 4));
6110+
uint32_t *out = (uint32_t *)PyBytesWriter_GetData(writer);
6111+
if (byteorder == 0) {
61276112
*out++ = 0xFEFF;
6128-
if (len == 0)
6129-
goto done;
6113+
}
6114+
if (len == 0) {
6115+
return PyBytesWriter_Finish(writer);
6116+
}
61306117

6118+
const char *encoding;
61316119
if (byteorder == -1)
61326120
encoding = "utf-32-le";
61336121
else if (byteorder == 1)
61346122
encoding = "utf-32-be";
61356123
else
61366124
encoding = "utf-32";
61376125

6126+
#if PY_LITTLE_ENDIAN
6127+
int native_ordering = byteorder <= 0;
6128+
#else
6129+
int native_ordering = byteorder >= 0;
6130+
#endif
61386131
if (kind == PyUnicode_1BYTE_KIND) {
61396132
ucs1lib_utf32_encode((const Py_UCS1 *)data, len, &out, native_ordering);
6140-
goto done;
6133+
return PyBytesWriter_Finish(writer);
61416134
}
61426135

6143-
pos = 0;
6144-
while (pos < len) {
6136+
PyObject *errorHandler = NULL;
6137+
PyObject *exc = NULL;
6138+
PyObject *rep = NULL;
6139+
6140+
for (Py_ssize_t pos = 0; pos < len; ) {
61456141
Py_ssize_t newpos, repsize, moreunits;
61466142

61476143
if (kind == PyUnicode_2BYTE_KIND) {
@@ -6188,21 +6184,18 @@ _PyUnicode_EncodeUTF32(PyObject *str,
61886184

61896185
/* four bytes are reserved for each surrogate */
61906186
if (moreunits > 0) {
6191-
Py_ssize_t outpos = out - (uint32_t*) PyBytes_AS_STRING(v);
6192-
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 4) {
6193-
/* integer overflow */
6194-
PyErr_NoMemory();
6187+
out = PyBytesWriter_GrowAndUpdatePointer(writer, 4 * moreunits, out);
6188+
if (out == NULL) {
61956189
goto error;
61966190
}
6197-
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * moreunits) < 0)
6198-
goto error;
6199-
out = (uint32_t*) PyBytes_AS_STRING(v) + outpos;
62006191
}
62016192

62026193
if (PyBytes_Check(rep)) {
62036194
memcpy(out, PyBytes_AS_STRING(rep), repsize);
62046195
out += repsize / 4;
6205-
} else /* rep is unicode */ {
6196+
}
6197+
else {
6198+
/* rep is unicode */
62066199
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
62076200
ucs1lib_utf32_encode(PyUnicode_1BYTE_DATA(rep), repsize,
62086201
&out, native_ordering);
@@ -6214,18 +6207,17 @@ _PyUnicode_EncodeUTF32(PyObject *str,
62146207
/* Cut back to size actually needed. This is necessary for, for example,
62156208
encoding of a string containing isolated surrogates and the 'ignore'
62166209
handler is used. */
6217-
nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
6218-
if (nsize != PyBytes_GET_SIZE(v))
6219-
_PyBytes_Resize(&v, nsize);
6210+
nsize = (unsigned char*) out - (unsigned char*) PyBytesWriter_GetData(writer);
6211+
PyObject *bytes = PyBytesWriter_FinishWithSize(writer, nsize);
62206212
Py_XDECREF(errorHandler);
62216213
Py_XDECREF(exc);
6222-
done:
6223-
return v;
6214+
return bytes;
6215+
62246216
error:
62256217
Py_XDECREF(rep);
62266218
Py_XDECREF(errorHandler);
62276219
Py_XDECREF(exc);
6228-
Py_XDECREF(v);
6220+
PyBytesWriter_Discard(writer);
62296221
return NULL;
62306222
}
62316223

0 commit comments

Comments
 (0)