Skip to content

Commit ba9217e

Browse files
committed
Issue #gh-139122: Reimplement base UUID type, uuid4(), and uuid7() in C
The C implementation considerably boosts the performance of the key UUID operations: ------------------------------------ Operation Speedup ------------------------------------ uuid4() generation 15.01x uuid7() generation 29.64x UUID from string 6.76x UUID from bytes 5.16x str(uuid) conversion 6.66x ------------------------------------ Summary of changes: * The UUID type is reimplemented in C in its entirety. * The pure-Python is kept around and is used of the C implementation isn't available for some reason. * Both implementations are tested extensively; additional tests are added to ensure that the C implementation of the type follows the pure Python implementation fully. * The Python implementation stores UUID values as int objects. The C implementation stores them as `uint8_t[16]` array. * The C implementation has faster hash() implementation but also caches the computed hash value to speedup cases when UUIDs are used as set/dict keys. * The C implementation has a freelist to make new UUID object instantiation as fast as possible. * uuid4() and uuid7() are now implmented in C. The most performance boost (10x) comes from overfetching entropy to decrease the number of _PyOS_URandom() calls. On its own it's a safe optimization with the edge case that Unix fork needs to be explicitly handled. We do that by comparing the current PID to the PID of when the random buffer was populated. * Portions of code are coming from my implementation of faster UUID in gel-python [1]. I did use AI during the development, but basically had to rewrite the code it generated to be more idiomatic and efficient. * The benchmark can be found here [2]. * This PR makes Python UUID operations as fast as they are in NodeJS and Bun runtimes. [1] https://github.com/MagicStack/py-pgproto/blob/b8109fb311a59f30f9947567a13508da9a776564/uuid.pyx [2] https://gist.github.com/1st1/f03e816f34a61e4d46c78ff98baf4818
1 parent d6a6fe2 commit ba9217e

File tree

6 files changed

+2052
-38
lines changed

6 files changed

+2052
-38
lines changed

Include/internal/pycore_pylifecycle.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ extern const char* _Py_gitversion(void);
9898
// Export for '_asyncio' shared extension
9999
PyAPI_FUNC(int) _Py_IsInterpreterFinalizing(PyInterpreterState *interp);
100100

101-
/* Random */
102-
extern int _PyOS_URandom(void *buffer, Py_ssize_t size);
101+
// Export for '_uuid' shared extension
102+
PyAPI_FUNC(int) _PyOS_URandom(void *buffer, Py_ssize_t size);
103103

104104
// Export for '_random' shared extension
105105
PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);

Lib/test/test_uuid.py

Lines changed: 110 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def get_command_stdout(command, args):
3535

3636
class BaseTestUUID:
3737
uuid = None
38+
is_c_uuid = False
3839

3940
def test_nil_uuid(self):
4041
nil_uuid = self.uuid.NIL
@@ -282,14 +283,16 @@ def test_exceptions(self):
282283
badvalue(lambda: self.uuid.UUID('123456781234567812345678z2345678'))
283284

284285
# Badly formed bytes.
285-
badvalue(lambda: self.uuid.UUID(bytes='abc'))
286-
badvalue(lambda: self.uuid.UUID(bytes='\0'*15))
287-
badvalue(lambda: self.uuid.UUID(bytes='\0'*17))
286+
badtype(lambda: self.uuid.UUID(bytes='abc'))
287+
badvalue(lambda: self.uuid.UUID(bytes=b'abc'))
288+
badvalue(lambda: self.uuid.UUID(bytes=b'\0'*15))
289+
badvalue(lambda: self.uuid.UUID(bytes=b'\0'*17))
288290

289291
# Badly formed bytes_le.
290-
badvalue(lambda: self.uuid.UUID(bytes_le='abc'))
291-
badvalue(lambda: self.uuid.UUID(bytes_le='\0'*15))
292-
badvalue(lambda: self.uuid.UUID(bytes_le='\0'*17))
292+
badtype(lambda: self.uuid.UUID(bytes_le='abc'))
293+
badvalue(lambda: self.uuid.UUID(bytes_le=b'abc'))
294+
badvalue(lambda: self.uuid.UUID(bytes_le=b'\0'*15))
295+
badvalue(lambda: self.uuid.UUID(bytes_le=b'\0'*17))
293296

294297
# Badly formed fields.
295298
badvalue(lambda: self.uuid.UUID(fields=(1,)))
@@ -877,12 +880,18 @@ def test_uuid6_test_vectors(self):
877880
equal((u.int >> 80) & 0xffff, 0x232a)
878881
equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c)
879882

880-
def test_uuid7(self):
883+
def test_uuid7_functional(self):
881884
equal = self.assertEqual
882885
u = self.uuid.uuid7()
883886
equal(u.variant, self.uuid.RFC_4122)
884887
equal(u.version, 7)
885888

889+
def test_uuid7_mock(self):
890+
if self.is_c_uuid:
891+
self.skipTest("C implementation of uuid7 cannot be tested with mocks")
892+
893+
equal = self.assertEqual
894+
886895
# 1 Jan 2023 12:34:56.123_456_789
887896
timestamp_ns = 1672533296_123_456_789 # ns precision
888897
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
@@ -940,7 +949,15 @@ def test_uuid7_uniqueness(self):
940949
versions = {u.version for u in uuids}
941950
self.assertSetEqual(versions, {7})
942951

943-
def test_uuid7_monotonicity(self):
952+
def test_uuid7_monotonicity_functional(self):
953+
equal = self.assertEqual
954+
us = [self.uuid.uuid7() for _ in range(10_000)]
955+
equal(us, sorted(us))
956+
957+
def test_uuid7_monotonicity_mock(self):
958+
if self.is_c_uuid:
959+
self.skipTest("C implementation of uuid7 cannot be tested with mocks")
960+
944961
equal = self.assertEqual
945962

946963
us = [self.uuid.uuid7() for _ in range(10_000)]
@@ -1003,7 +1020,10 @@ def test_uuid7_monotonicity(self):
10031020

10041021
self.assertLess(u1, u2)
10051022

1006-
def test_uuid7_timestamp_backwards(self):
1023+
def test_uuid7_timestamp_backwards_mock(self):
1024+
if self.is_c_uuid:
1025+
self.skipTest("C implementation of uuid7 cannot be tested with mocks")
1026+
10071027
equal = self.assertEqual
10081028
# 1 Jan 2023 12:34:56.123_456_789
10091029
timestamp_ns = 1672533296_123_456_789 # ns precision
@@ -1043,7 +1063,10 @@ def test_uuid7_timestamp_backwards(self):
10431063
equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1)
10441064
equal(u.int & 0xffff_ffff, tail)
10451065

1046-
def test_uuid7_overflow_counter(self):
1066+
def test_uuid7_overflow_counter_mock(self):
1067+
if self.is_c_uuid:
1068+
self.skipTest("C implementation of uuid7 cannot be tested with mocks")
1069+
10471070
equal = self.assertEqual
10481071
# 1 Jan 2023 12:34:56.123_456_789
10491072
timestamp_ns = 1672533296_123_456_789 # ns precision
@@ -1149,6 +1172,7 @@ def test_uuid_weakref(self):
11491172

11501173
class CommandLineTestCases:
11511174
uuid = None # to be defined in subclasses
1175+
is_c_uuid = False
11521176

11531177
def do_test_standalone_uuid(self, version):
11541178
stdout = io.StringIO()
@@ -1257,6 +1281,7 @@ class TestUUIDWithoutExtModule(CommandLineTestCases, BaseTestUUID, unittest.Test
12571281
@unittest.skipUnless(c_uuid, 'requires the C _uuid module')
12581282
class TestUUIDWithExtModule(CommandLineTestCases, BaseTestUUID, unittest.TestCase):
12591283
uuid = c_uuid
1284+
is_c_uuid = True
12601285

12611286
def check_has_stable_libuuid_extractable_node(self):
12621287
if not self.uuid._has_stable_extractable_node:
@@ -1287,6 +1312,7 @@ def test_windows_getnode_from_libuuid(self):
12871312

12881313
class BaseTestInternals:
12891314
_uuid = py_uuid
1315+
is_c_uuid = False
12901316

12911317
def check_parse_mac(self, aix):
12921318
if not aix:
@@ -1480,6 +1506,7 @@ class TestInternalsWithoutExtModule(BaseTestInternals, unittest.TestCase):
14801506
@unittest.skipUnless(c_uuid, 'requires the C _uuid module')
14811507
class TestInternalsWithExtModule(BaseTestInternals, unittest.TestCase):
14821508
uuid = c_uuid
1509+
is_c_uuid = True
14831510

14841511
@unittest.skipUnless(os.name == 'posix', 'requires Posix')
14851512
def test_unix_getnode(self):
@@ -1497,5 +1524,78 @@ def test_windll_getnode(self):
14971524
self.check_node(node)
14981525

14991526

1527+
@unittest.skipUnless(c_uuid, "requires the C _uuid module")
1528+
class TestCImplementationCompat(unittest.TestCase):
1529+
def test_compatibility(self):
1530+
import uuid
1531+
1532+
PU = uuid._py_UUID
1533+
CU = uuid._c_UUID
1534+
N = 1000
1535+
1536+
uuids = [
1537+
"00000000-0000-0000-0000-000000000000",
1538+
"ffffffff-ffff-ffff-ffff-ffffffffffff",
1539+
"c0bec4fd-e4e3-050c-a362-da3f734ffd56", # regression
1540+
*(str(uuid.uuid4()) for _ in range(N)),
1541+
*(str(uuid.uuid7()) for _ in range(N)),
1542+
*(str(uuid.uuid1()) for _ in range(N)),
1543+
*(str(uuid.UUID(bytes=os.urandom(16))) for _ in range(N)),
1544+
]
1545+
1546+
def full_test(p, u):
1547+
self.assertEqual(p, u)
1548+
self.assertEqual(p.hex, u.hex)
1549+
self.assertEqual(p.int, u.int)
1550+
self.assertEqual(p.variant, u.variant)
1551+
self.assertEqual(p.version, u.version)
1552+
self.assertEqual(p.is_safe, u.is_safe)
1553+
self.assertEqual(p.bytes, u.bytes)
1554+
self.assertEqual(p.bytes_le, u.bytes_le)
1555+
self.assertEqual(p.fields, u.fields)
1556+
self.assertEqual(p.time_low, u.time_low)
1557+
self.assertEqual(p.time_mid, u.time_mid)
1558+
self.assertEqual(p.time_hi_version, u.time_hi_version)
1559+
self.assertEqual(p.clock_seq_hi_variant, u.clock_seq_hi_variant)
1560+
self.assertEqual(p.clock_seq_low, u.clock_seq_low)
1561+
self.assertEqual(p.node, u.node)
1562+
1563+
all_ps = set()
1564+
all_us = set()
1565+
for uuid_str in uuids:
1566+
with self.subTest(uuid=uuid_str):
1567+
p = PU(uuid_str)
1568+
u = CU(uuid_str)
1569+
full_test(p, u)
1570+
1571+
u2 = CU(bytes_le=p.bytes_le)
1572+
full_test(p, u2)
1573+
1574+
u3 = CU(fields=p.fields)
1575+
full_test(p, u3)
1576+
1577+
u4 = CU(int=p.int)
1578+
full_test(p, u4)
1579+
1580+
u5 = CU(
1581+
hex=p.hex,
1582+
is_safe=uuid.SafeUUID.safe,
1583+
)
1584+
full_test(
1585+
PU(
1586+
uuid_str,
1587+
is_safe=uuid.SafeUUID.safe,
1588+
),
1589+
u5,
1590+
)
1591+
1592+
all_ps.add(p)
1593+
all_us.add(u)
1594+
1595+
self.assertEqual(len(all_ps), len(all_us))
1596+
self.assertEqual(len(all_ps), len(uuids))
1597+
1598+
1599+
15001600
if __name__ == '__main__':
15011601
unittest.main()

Lib/uuid.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class SafeUUID:
9999

100100

101101
_UINT_128_MAX = (1 << 128) - 1
102+
102103
# 128-bit mask to clear the variant and version bits of a UUID integral value
103104
_RFC_4122_CLEARFLAGS_MASK = ~((0xf000 << 64) | (0xc000 << 48))
104105
# RFC 4122 variant bits and version bits to activate on a UUID integral value.
@@ -111,6 +112,19 @@ class SafeUUID:
111112
_RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48))
112113

113114

115+
# Import optional C extension at toplevel, to help disabling it when testing
116+
try:
117+
import _uuid
118+
_generate_time_safe = getattr(_uuid, "generate_time_safe", None)
119+
_has_stable_extractable_node = _uuid.has_stable_extractable_node
120+
_UuidCreate = getattr(_uuid, "UuidCreate", None)
121+
except ImportError:
122+
_uuid = None
123+
_generate_time_safe = None
124+
_has_stable_extractable_node = False
125+
_UuidCreate = None
126+
127+
114128
class UUID:
115129
"""Instances of the UUID class represent UUIDs as specified in RFC 4122.
116130
UUID objects are immutable, hashable, and usable as dictionary keys.
@@ -219,13 +233,21 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
219233
raise ValueError('badly formed hexadecimal UUID string')
220234
int = int_(hex, 16)
221235
elif bytes_le is not None:
236+
if not isinstance(bytes_le, bytes_):
237+
raise TypeError(
238+
f'a bytes-like object is required, not {type(bytes_le).__name__!r}'
239+
)
222240
if len(bytes_le) != 16:
223241
raise ValueError('bytes_le is not a 16-char string')
224242
assert isinstance(bytes_le, bytes_), repr(bytes_le)
225243
bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] +
226244
bytes_le[8-1:6-1:-1] + bytes_le[8:])
227245
int = int_.from_bytes(bytes) # big endian
228246
elif bytes is not None:
247+
if not isinstance(bytes, bytes_):
248+
raise TypeError(
249+
f'a bytes-like object is required, not {type(bytes).__name__!r}'
250+
)
229251
if len(bytes) != 16:
230252
raise ValueError('bytes is not a 16-char string')
231253
assert isinstance(bytes, bytes_), repr(bytes)
@@ -234,7 +256,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
234256
if len(fields) != 6:
235257
raise ValueError('fields is not a 6-tuple')
236258
(time_low, time_mid, time_hi_version,
237-
clock_seq_hi_variant, clock_seq_low, node) = fields
259+
clock_seq_hi_variant, clock_seq_low, node) = fields
238260
if not 0 <= time_low < (1 << 32):
239261
raise ValueError('field 1 out of range (need a 32-bit value)')
240262
if not 0 <= time_mid < (1 << 16):
@@ -249,7 +271,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
249271
raise ValueError('field 6 out of range (need a 48-bit value)')
250272
clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
251273
int = ((time_low << 96) | (time_mid << 80) |
252-
(time_hi_version << 64) | (clock_seq << 48) | node)
274+
(time_hi_version << 64) | (clock_seq << 48) | node)
253275
if not 0 <= int <= _UINT_128_MAX:
254276
raise ValueError('int is out of range (need a 128-bit value)')
255277
if version is not None:
@@ -629,19 +651,6 @@ def _netstat_getnode():
629651
return _find_mac_under_heading('netstat', '-ian', b'Address')
630652

631653

632-
# Import optional C extension at toplevel, to help disabling it when testing
633-
try:
634-
import _uuid
635-
_generate_time_safe = getattr(_uuid, "generate_time_safe", None)
636-
_has_stable_extractable_node = _uuid.has_stable_extractable_node
637-
_UuidCreate = getattr(_uuid, "UuidCreate", None)
638-
except ImportError:
639-
_uuid = None
640-
_generate_time_safe = None
641-
_has_stable_extractable_node = False
642-
_UuidCreate = None
643-
644-
645654
def _unix_getnode():
646655
"""Get the hardware address on Unix using the _uuid extension module."""
647656
if _generate_time_safe and _has_stable_extractable_node:
@@ -932,6 +941,20 @@ def uuid8(a=None, b=None, c=None):
932941
return UUID._from_int(int_uuid_8)
933942

934943

944+
_py_uuid4 = uuid4
945+
_py_uuid7 = uuid7
946+
_py_UUID = UUID
947+
try:
948+
from _uuid import UUID, uuid4, uuid7
949+
except ImportError:
950+
_c_UUID = None
951+
_c_uuid4 = None
952+
_c_uuid7 = None
953+
else:
954+
_c_UUID = UUID
955+
_c_uuid4 = uuid4
956+
_c_uuid7 = uuid7
957+
935958
def main():
936959
"""Run the uuid command line interface."""
937960
uuid_funcs = {
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
Reimplement base UUID type, uuid4(), and uuid7() in C
2+
3+
The C implementation considerably boosts the performance of the key UUID
4+
operations:
5+
6+
------------------------------------
7+
Operation Speedup
8+
------------------------------------
9+
uuid4() generation 15.01x
10+
uuid7() generation 29.64x
11+
UUID from string 6.76x
12+
UUID from bytes 5.16x
13+
str(uuid) conversion 6.66x
14+
------------------------------------
15+
16+
Summary of changes:
17+
18+
* The UUID type is reimplemented in C in its entirety.
19+
20+
* The pure-Python is kept around and is used of the C implementation
21+
isn't available for some reason.
22+
23+
* Both implementations are tested extensively; additional tests are
24+
added to ensure that the C implementation of the type follows the pure
25+
Python implementation fully.
26+
27+
* The Python implementation stores UUID values as int objects. The C
28+
implementation stores them as `uint8_t[16]` array.
29+
30+
* The C implementation has faster hash() implementation but also caches
31+
the computed hash value to speedup cases when UUIDs are used as
32+
set/dict keys.
33+
34+
* The C implementation has a freelist to make new UUID object
35+
instantiation as fast as possible.
36+
37+
* uuid4() and uuid7() are now implmented in C. The most performance
38+
boost (10x) comes from overfetching entropy to decrease the number of
39+
_PyOS_URandom() calls. On its own it's a safe optimization with the
40+
edge case that Unix fork needs to be explicitly handled. We do that by
41+
comparing the current PID to the PID of when the random buffer was
42+
populated.
43+
44+
* Portions of code are coming from my implementation of faster UUID
45+
in gel-python [1]. I did use AI during the development, but basically
46+
had to rewrite the code it generated to be more idiomatic and
47+
efficient.
48+
49+
* The benchmark can be found here [2].
50+
51+
* This PR makes Python UUID operations as fast as they are in NodeJS and
52+
Bun runtimes.
53+
54+
[1]
55+
https://github.com/MagicStack/py-pgproto/blob/b8109fb311a59f30f9947567a13508da9a776564/uuid.pyx
56+
57+
[2] https://gist.github.com/1st1/f03e816f34a61e4d46c78ff98baf4818

0 commit comments

Comments
 (0)