Skip to content

Commit 2b66ea7

Browse files
authored
Merge pull request #120 from static-frame/119/block-index-shape-cache
`BlockIndex` shape caching
2 parents 373b8da + ba99482 commit 2b66ea7

File tree

2 files changed

+40
-14
lines changed

2 files changed

+40
-14
lines changed

src/_arraykit.c

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4178,6 +4178,8 @@ typedef struct BlockIndexObject {
41784178
Py_ssize_t bir_capacity;
41794179
BlockIndexRecord* bir;
41804180
PyArray_Descr* dtype;
4181+
int8_t shape_recache;
4182+
PyObject* shape;
41814183
} BlockIndexObject;
41824184

41834185

@@ -4341,7 +4343,7 @@ BIIterSeq_iternext(BIIterSeqObject *self) {
43414343
PyArrayObject *a = (PyArrayObject *)self->selector;
43424344
switch (PyArray_TYPE(a)) { // type of passed in array
43434345
case NPY_INT64:
4344-
t = *(npy_int64*)PyArray_GETPTR1(a, i);
4346+
t = (Py_ssize_t)*(npy_int64*)PyArray_GETPTR1(a, i);
43454347
break;
43464348
case NPY_INT32:
43474349
t = *(npy_int32*)PyArray_GETPTR1(a, i);
@@ -4353,7 +4355,7 @@ BIIterSeq_iternext(BIIterSeqObject *self) {
43534355
t = *(npy_int8*)PyArray_GETPTR1(a, i);
43544356
break;
43554357
case NPY_UINT64:
4356-
t = *(npy_uint64*)PyArray_GETPTR1(a, i);
4358+
t = (Py_ssize_t)*(npy_uint64*)PyArray_GETPTR1(a, i);
43574359
break;
43584360
case NPY_UINT32:
43594361
t = *(npy_uint32*)PyArray_GETPTR1(a, i);
@@ -4776,6 +4778,9 @@ BlockIndex_init(PyObject *self, PyObject *args, PyObject *kwargs) {
47764778
bi->bir_count = bir_count;
47774779
bi->bir_capacity = bir_capacity;
47784780

4781+
bi->shape_recache = 1; // always init to true
4782+
bi->shape = NULL;
4783+
47794784
// Load the bi->bir struct array, if defined
47804785
bi->bir = NULL;
47814786
// always set bi to capacity defined at this point
@@ -4800,6 +4805,7 @@ BlockIndex_init(PyObject *self, PyObject *args, PyObject *kwargs) {
48004805
return -1;
48014806
}
48024807
}
4808+
48034809
return 0;
48044810
}
48054811

@@ -4808,9 +4814,9 @@ BlockIndex_dealloc(BlockIndexObject *self) {
48084814
if (self->bir != NULL) {
48094815
PyMem_Free(self->bir);
48104816
}
4811-
if (self->dtype != NULL) {
4812-
Py_DECREF((PyObject*)self->dtype);
4813-
}
4817+
// both dtype and shape might not be set
4818+
Py_XDECREF((PyObject*)self->dtype);
4819+
Py_XDECREF(self->shape);
48144820
Py_TYPE(self)->tp_free((PyObject *)self);
48154821
}
48164822

@@ -4841,7 +4847,7 @@ BlockIndex_register(BlockIndexObject *self, PyObject *value) {
48414847
}
48424848
Py_ssize_t increment = ndim == 1 ? 1 : PyArray_DIM(a, 1);
48434849

4844-
// assign alignment on first observation; otherwise take
4850+
// assign alignment on first observation; otherwise force alignemnt. We do this regardless of if the array has no columns.
48454851
Py_ssize_t alignment = PyArray_DIM(a, 0);
48464852
if (self->row_count == -1) {
48474853
self->row_count = alignment;
@@ -4854,16 +4860,20 @@ BlockIndex_register(BlockIndexObject *self, PyObject *value) {
48544860
return NULL;
48554861
}
48564862

4863+
// if we are not adding columns, we are not adding types, so we are not changing the dtype or shape
48574864
if (increment == 0) {
48584865
Py_RETURN_FALSE;
48594866
}
48604867

4868+
48614869
PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
4862-
if (self->dtype == NULL) {
4870+
self->shape_recache = 1; // adjusting columns, must recache shape
4871+
4872+
if (self->dtype == NULL) { // if not already set
48634873
Py_INCREF((PyObject*)dt);
48644874
self->dtype = dt;
48654875
}
4866-
else if (!PyDataType_ISOBJECT(self->dtype)) {
4876+
else if (!PyDataType_ISOBJECT(self->dtype)) { // if object cannot resolve further
48674877
PyArray_Descr* dtr = AK_ResolveDTypes(self->dtype, dt); // new ref
48684878
Py_DECREF((PyObject*)self->dtype);
48694879
self->dtype = dtr;
@@ -4972,6 +4982,9 @@ BlockIndex_copy(BlockIndexObject *self, PyObject *Py_UNUSED(unused))
49724982
bi->bir_count = self->bir_count;
49734983
bi->bir_capacity = self->bir_capacity;
49744984

4985+
bi->shape_recache = 1; // could copy, but do not want to copy a pending cache state
4986+
bi->shape = NULL;
4987+
49754988
bi->bir = NULL;
49764989
AK_BI_BIR_new(bi); // do initial alloc to self->bir_capacity
49774990
memcpy(bi->bir,
@@ -4993,9 +5006,16 @@ BlockIndex_iter(BlockIndexObject* self) {
49935006

49945007

49955008
static PyObject *
4996-
BlockIndex_shape_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
4997-
// NOTE: this could be cached
4998-
return Py_BuildValue("nn", self->row_count, self->bir_count);
5009+
BlockIndex_shape_getter(BlockIndexObject *self, void* Py_UNUSED(closure))
5010+
{
5011+
if (self->shape == NULL || self->shape_recache) {
5012+
Py_XDECREF(self->shape); // get rid of old if it exists
5013+
self->shape = Py_BuildValue("nn", self->row_count, self->bir_count); // new ref
5014+
}
5015+
// shape is not null and shape_recache is false
5016+
Py_INCREF(self->shape); // for caller
5017+
self->shape_recache = 0;
5018+
return self->shape;
49995019
}
50005020

50015021
static PyObject *

test/test_block_index.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ class TestUnit(unittest.TestCase):
1515
def test_block_index_init_a(self) -> None:
1616
bi1 = BlockIndex()
1717
self.assertEqual(bi1.dtype, np.dtype(float))
18-
# print(bi1)
18+
s = bi1.shape
19+
self.assertEqual(s, (-1, 0))
20+
del bi1
21+
self.assertEqual(s, (-1, 0))
22+
del s
1923

2024
def test_block_index_init_b1(self) -> None:
2125
with self.assertRaises(ValueError):
@@ -165,10 +169,12 @@ def test_block_index_copy_a(self) -> None:
165169
bi1 = BlockIndex()
166170
bi1.register(np.arange(12).reshape(2,6))
167171
bi1.register(np.arange(4).reshape(2,2))
168-
172+
s1 = bi1.shape
169173
bi2 = bi1.copy()
170174
self.assertEqual(bi1.to_list(), bi2.to_list())
171-
175+
self.assertEqual(bi1.dtype, bi2.dtype)
176+
del bi1
177+
self.assertEqual(bi2.shape, s1)
172178

173179
def test_block_index_copy_b(self) -> None:
174180
dt1 = np.dtype(np.float64)

0 commit comments

Comments
 (0)