Skip to content

Commit 7e2f8af

Browse files
committed
Experimental support for dtype.hasobject
Add NPY_DT_get_clear_loop implementation in python. Include test for the key `custom_as_ctypes_type()` conversion.
1 parent 4435be0 commit 7e2f8af

File tree

3 files changed

+214
-11
lines changed

3 files changed

+214
-11
lines changed

sample_dtypes/scalar.py

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,44 @@
88
BYTES_BUF_ORDER = 'A'
99

1010

11+
def custom_as_ctypes_type(dtype: np.dtype):
12+
"""Wrapper around `numpy.ctypeslib.as_ctypes_type` that maps 'O' to py_object"""
13+
if not dtype.hasobject:
14+
return np.ctypeslib.as_ctypes_type(dtype)
15+
16+
if dtype.fields is None:
17+
# Simple dtype or array
18+
if dtype.subdtype:
19+
# Handle sub-array types like ('O', 3)
20+
base, shape = dtype.subdtype
21+
new_base = custom_as_ctypes_type(base)
22+
for dim in reversed(shape):
23+
new_base = new_base * dim
24+
return new_base
25+
else:
26+
# Simple object (non-objects are already handled)
27+
assert dtype == np.dtype(object)
28+
return ctypes.py_object
29+
30+
# Structured dtype
31+
new_fields = []
32+
for name, (field_dtype, *_) in dtype.fields.items():
33+
new_field_dtype = custom_as_ctypes_type(field_dtype)
34+
new_fields.append((name, new_field_dtype))
35+
36+
class CustomStruct(ctypes.Structure):
37+
_pack_ = dtype.alignment
38+
_fields_ = new_fields
39+
40+
return CustomStruct
41+
42+
1143
class SampleScalar:
1244
_ndarr: np.ndarray
1345

1446
def __init__(self, *, shape: tuple[int, ...] = DEFAULT_SHAPE, dtype=None):
1547
print(f'TODO: scalar.py: SampleScalar.__init__({shape=}, {dtype=})')
16-
self._ndarr = np.zeros(shape, dtype=dtype)
48+
self._ndarr = np.empty(shape, dtype=dtype)
1749

1850
def copy(self) -> 'SampleScalar':
1951
return type(self)(shape=self._ndarr.shape, dtype=self._ndarr.dtype)
@@ -26,6 +58,10 @@ def elsize(self) -> int:
2658
def alignment(self) -> int:
2759
return self._ndarr.dtype.alignment
2860

61+
@property
62+
def hasobject(self) -> bool:
63+
return self._ndarr.dtype.hasobject
64+
2965
def is_compatible(self, other: 'SampleScalar') -> bool:
3066
print(f'scalar.py: SampleScalar.is_compatible({other})')
3167
return self._ndarr.shape == other._ndarr.shape
@@ -36,10 +72,35 @@ def __repr__(self) -> str:
3672
def _get_np_view(self, dataptr: int) -> np.ndarray:
3773
"""Create numpy array that uses `dataptr` memory block"""
3874
ct_array = ctypes.cast(
39-
dataptr, ctypes.POINTER(np.ctypeslib.as_ctypes_type(self._ndarr.dtype))
75+
dataptr, ctypes.POINTER(custom_as_ctypes_type(self._ndarr.dtype))
4076
)
4177
return np.ctypeslib.as_array(ct_array, shape=self._ndarr.shape)
4278

79+
def _get_strided_np_view(
80+
self, dataptr: int, size: int, stride: int
81+
) -> np.ndarray:
82+
"""Create numpy array that uses strided `dataptr` memory block"""
83+
# Start with ctypes object to cover single SampleScalar
84+
ct_base = custom_as_ctypes_type(self._ndarr.dtype)
85+
for dim in reversed(self._ndarr.shape):
86+
ct_base = ct_base * dim
87+
88+
# Single ctypes object to cover whole strided data-block
89+
# CHECKME: `stride` may also be zero (?), but `size` must be 1
90+
assert stride >= ctypes.sizeof(ct_base), (
91+
'Insufficient stride size ' f'{stride} / {ctypes.sizeof(ct_base)}'
92+
)
93+
94+
class PaddedStruct(ctypes.Structure):
95+
_align_ = stride
96+
_fields_ = [('_', ct_base)]
97+
98+
ct_base = PaddedStruct * size
99+
100+
# Create ndarray view from this ctypes object
101+
ct_array = ctypes.cast(dataptr, ctypes.POINTER(ct_base))
102+
return np.ctypeslib.as_array(ct_array, shape=())
103+
43104
def setitem(self, src: 'SampleScalar', dataptr: int) -> None:
44105
"""Python `NPY_DT_setitem` implementation
45106
@@ -59,3 +120,13 @@ def getitem(self, dataptr: int) -> 'SampleScalar':
59120
new = self.copy()
60121
new._ndarr[...] = self._get_np_view(dataptr)
61122
return new
123+
124+
def clear_loop(self, data: int, size: int, stride: int) -> int:
125+
"""Python implementation for `out_loop` returned by `NPY_DT_get_clear_loop`
126+
127+
NOTE: See setitem()
128+
"""
129+
print(f'scalar.py: SampleScalar.clear_loop {data=}, {size=}, {stride=}')
130+
view = self._get_strided_np_view(data, size, stride)
131+
view[...] = 0 # Force dereference of all object-entries
132+
return 0

src/dtype.c

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ SampleDTypeObject *new_sampledtype_instance(PyObject *sample_scalar) {
3939
return NULL;
4040
}
4141

42+
attr = PyObject_GetAttrString(sample_scalar, "hasobject");
43+
if (attr == NULL) {
44+
return NULL;
45+
}
46+
int hasobject = PyObject_IsTrue(attr);
47+
Py_DECREF(attr);
48+
if (hasobject == -1 && PyErr_Occurred()) {
49+
return NULL;
50+
}
51+
4252
// Allocate final DType bject
4353
SampleDTypeObject *new = (SampleDTypeObject *)PyArrayDescr_Type.tp_new(
4454
(PyTypeObject *)&SampleDType, NULL, NULL);
@@ -48,6 +58,10 @@ SampleDTypeObject *new_sampledtype_instance(PyObject *sample_scalar) {
4858

4959
Py_INCREF(sample_scalar);
5060
new->sample_scalar = sample_scalar;
61+
if (hasobject) {
62+
// TODO: Other flags from NPY_OBJECT_DTYPE_FLAGS
63+
new->base.flags |= NPY_ITEM_HASOBJECT | NPY_NEEDS_INIT;
64+
}
5165
new->base.elsize = elsize;
5266
new->base.alignment = alignment;
5367

@@ -105,7 +119,7 @@ sampledtype_ensure_canonical(SampleDTypeObject *self) {
105119

106120
static int sampledtype_setitem(SampleDTypeObject *descr, PyObject *obj,
107121
char *dataptr) {
108-
printf("%s, target elsise %lld, type_num %d\n", __func__, descr->base.elsize,
122+
printf("%s, target elsise %zd, type_num %d\n", __func__, descr->base.elsize,
109123
descr->base.type_num);
110124

111125
if (Py_TYPE(obj) != SampleScalar_Type) {
@@ -125,13 +139,51 @@ static int sampledtype_setitem(SampleDTypeObject *descr, PyObject *obj,
125139
}
126140

127141
static PyObject *sampledtype_getitem(SampleDTypeObject *descr, char *dataptr) {
128-
printf("%s, source elsize %lld, type_num %d\n", __func__, descr->base.elsize,
142+
printf("%s, source elsize %zd, type_num %d\n", __func__, descr->base.elsize,
129143
descr->base.type_num);
130144

131145
return PyObject_CallMethod(descr->sample_scalar, "getitem", "n",
132146
(Py_ssize_t)dataptr);
133147
}
134148

149+
static int sampledtype_clear_loop(void *NPY_UNUSED(traverse_context),
150+
const PyArray_Descr *_descr, char *data,
151+
npy_intp size, npy_intp stride,
152+
NpyAuxData *NPY_UNUSED(auxdata)) {
153+
const SampleDTypeObject *descr = (const SampleDTypeObject *)_descr;
154+
printf("%s, source elsize %zd, type_num %d, data %p, size %zd, stride %zd\n",
155+
__func__, descr->base.elsize, descr->base.type_num, data, size,
156+
stride);
157+
158+
PyObject *res = PyObject_CallMethod(descr->sample_scalar, "clear_loop", "nnn",
159+
(Py_ssize_t)data, size, stride);
160+
if (res == NULL) {
161+
return -1;
162+
}
163+
int ret = PyLong_AsLong(res);
164+
Py_DECREF(res);
165+
if (ret == -1 && PyErr_Occurred()) {
166+
return -1;
167+
}
168+
169+
return ret;
170+
}
171+
172+
static int sampledtype_get_clear_loop(void *NPY_UNUSED(traverse_context),
173+
const SampleDTypeObject *descr,
174+
int aligned, npy_intp fixed_stride,
175+
PyArrayMethod_TraverseLoop **out_loop,
176+
NpyAuxData **NPY_UNUSED(out_auxdata),
177+
NPY_ARRAYMETHOD_FLAGS *flags) {
178+
printf("%s, source elsize %zd, type_num %d, aligned %d, fixed_stride %zd\n",
179+
__func__, descr->base.elsize, descr->base.type_num, aligned,
180+
fixed_stride);
181+
182+
*out_loop = &sampledtype_clear_loop;
183+
*flags = NPY_METH_NO_FLOATINGPOINT_ERRORS;
184+
return 0;
185+
}
186+
135187
static PyType_Slot SampleDType_Slots[] = {
136188
{NPY_DT_discover_descr_from_pyobject,
137189
&sampledtype_discover_descriptor_from_pyobject},
@@ -140,6 +192,7 @@ static PyType_Slot SampleDType_Slots[] = {
140192
{NPY_DT_ensure_canonical, &sampledtype_ensure_canonical},
141193
{NPY_DT_setitem, &sampledtype_setitem},
142194
{NPY_DT_getitem, &sampledtype_getitem},
195+
{NPY_DT_get_clear_loop, &sampledtype_get_clear_loop},
143196
{0, NULL}};
144197

145198
static PyObject *sampledtype_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args,

tests/test_refcount.py

Lines changed: 86 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
"""Test internal object reference counting"""
1+
"""Test internal object reference counting
2+
3+
NOTE:
4+
Most of these tests fail when debugging, because "ms-python.debugpy" takes
5+
extra local object ownership.
6+
"""
27

38
import sys
49
import numpy as np
10+
import pytest
11+
import sample_dtypes
512

613

714
def test_dtype_refcnt():
8-
"""Test SampleScalar internal dtype reference counts
9-
10-
NOTE: This fails when debugging as of extra ownership of local object
11-
"""
12-
import sample_dtypes
13-
15+
"""Test SampleScalar internal dtype reference counts"""
1416
# Use non-singleton (non-immortal) dtypes to have valid ref-counts
1517
dtype = np.dtype([('a', np.int16)])
1618
assert (
@@ -23,3 +25,80 @@ def test_dtype_refcnt():
2325

2426
del arr, scalar
2527
assert sys.getrefcount(dtype) == 2, 'Unexpected dtype ref-count'
28+
29+
30+
@pytest.mark.parametrize(
31+
'dtype',
32+
[
33+
# Simple object dtype
34+
np.dtype(object),
35+
# Array of objects
36+
np.dtype((object, 3)),
37+
# Structured dtype to include object (alignment issue)
38+
np.dtype([('i', np.int16), ('obj', object)]),
39+
# Complex/compound dtype with Structured and Array elements
40+
np.dtype(
41+
([('i', np.uint16), ('obj_arr', [('obj', object)], (2, 3))], 4)
42+
),
43+
],
44+
)
45+
def test_custom_as_ctypes_type(dtype):
46+
"""Test scalar.custom_as_ctypes_type conversion, see np.ctypeslib.as_ctypes_type"""
47+
import ctypes
48+
from sample_dtypes.scalar import custom_as_ctypes_type
49+
50+
ct_type = custom_as_ctypes_type(dtype)
51+
assert (
52+
ctypes.sizeof(ct_type) == dtype.itemsize
53+
), 'Converted size does NOT match'
54+
55+
shape = (3, 2)
56+
src = np.empty(shape, dtype=dtype)
57+
tgt = np.empty(shape, dtype=dtype)
58+
# Generate some different source values
59+
src.flat[...] = (np.arange(src.size) + 10).astype(src.dtype)
60+
61+
# Create view that uses `tgt` data-buffer
62+
ct_array = ctypes.cast(
63+
tgt.__array_interface__['data'][0], ctypes.POINTER(ct_type)
64+
)
65+
tgt_view = np.ctypeslib.as_array(ct_array, shape=shape)
66+
# Replace view content, expect same content in `tgt`
67+
tgt_view[...] = src
68+
np.testing.assert_array_equal(tgt, src, 'Mismatch between array and its view')
69+
70+
71+
def test_sub_dtype_refcnt():
72+
"""Test SampleScalar internal dtype reference counts"""
73+
# Use generic-object dtype to test sub-object ownership transfer
74+
scalar = sample_dtypes.SampleScalar(dtype=object)
75+
arr: np.ndarray = np.empty(3, sample_dtypes.SampleDType(scalar.copy()))
76+
77+
obj = f'string {1}' # Dynamic (non-immortal) string object
78+
scalar._ndarr[0, 1] = obj
79+
# Expect 3 references to the string: `obj`, `scalar._ndarr` and `getrefcount`
80+
assert (
81+
sys.getrefcount(obj) == 3
82+
), 'Unexpected ref-count for non-immortal object'
83+
84+
# Do NPY_DT_setitem
85+
arr[1] = scalar
86+
# Expect 4 references to the string: ones from above and `arr._ndarr`
87+
assert sys.getrefcount(obj) == 4, 'Unexpected ref-count after assignment'
88+
assert obj is arr[1]._ndarr[0, 1], 'Same object must be referenced by arr'
89+
arr[2] = scalar
90+
# Expect 5 references to the string: ones from above and extra `arr._ndarr`
91+
assert sys.getrefcount(obj) == 5, 'Unexpected ref-count after assignment'
92+
assert obj is arr[2]._ndarr[0, 1], 'Same object must be referenced by arr'
93+
94+
# Drop references one by one
95+
del scalar
96+
# Expect `scalar._ndarr` reference to be dropped
97+
assert (
98+
sys.getrefcount(obj) == 4
99+
), 'Unexpected ref-count after scalar reference deletion'
100+
del arr
101+
# Expect two `arr._ndarr` references to be dropped
102+
assert (
103+
sys.getrefcount(obj) == 2
104+
), 'Unexpected ref-count after ndarray reference deletion'

0 commit comments

Comments
 (0)