Skip to content

Commit 1d69ef9

Browse files
authored
Merge pull request #117 from static-frame/115/block-index-zero-shape
`BlockIndex` handlign for zero-width arrays
2 parents bebdc97 + f86fa4b commit 1d69ef9

File tree

8 files changed

+98
-26
lines changed

8 files changed

+98
-26
lines changed

README.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,15 @@ ArrayKit requires the following:
3737
What is New in ArrayKit
3838
-------------------------
3939

40+
0.4.1
41+
............
42+
43+
Updated ``BlockIndex.register()`` to handle 0-column 2D arrays and return False.
44+
45+
Added ``BlockIndex.rows``, ``BlockIndex.columns`` properties.
46+
47+
Updated unset ``BlockIndex.dtype`` to return a float dtype.
48+
4049

4150
0.4.0
4251
............

doc/articles/block_index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pickle
1010

1111
from arraykit import BlockIndex
12-
# from arraykit import ErrorInitBlocks
12+
# from arraykit import ErrorInitTypeBlocks
1313
from arraykit import shape_filter
1414
from arraykit import resolve_dtype
1515

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from setuptools import setup
66
from pathlib import Path
77

8-
AK_VERSION = '0.4.0'
8+
AK_VERSION = '0.4.1'
99

1010
def get_long_description() -> str:
1111
return '''The ArrayKit library provides utilities for creating and transforming NumPy arrays, implementing performance-critical StaticFrame operations as Python C extensions.

src/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from ._arraykit import __version__
66
from ._arraykit import ArrayGO as ArrayGO
77
from ._arraykit import BlockIndex as BlockIndex
8-
from ._arraykit import ErrorInitBlocks as ErrorInitBlocks
8+
from ._arraykit import ErrorInitTypeBlocks as ErrorInitTypeBlocks
99

1010
from ._arraykit import immutable_filter as immutable_filter
1111
from ._arraykit import mloc as mloc

src/__init__.pyi

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ _T = tp.TypeVar('_T')
66

77
__version__: str
88

9-
class ErrorInitBlocks:
9+
class ErrorInitTypeBlocks:
1010
def __init__(self, *args: tp.Any, **kwargs: tp.Any) -> None: ...
1111
def with_traceback(self, tb: Exception) -> Exception: ...
1212
def __setstate__(self) -> None: ...
@@ -27,10 +27,12 @@ class ArrayGO:
2727

2828
class BlockIndex:
2929
shape: tp.Tuple[int, int]
30-
dtype: tp.Optional[np.dtype]
30+
dtype: np.dtype
31+
rows: int
32+
columns: int
3133

3234
def __init__() -> None: ...
33-
def register(self, __value: object) -> None: ...
35+
def register(self, __value: np.ndarray) -> bool: ...
3436
def to_list(self,) -> tp.List[int]: ...
3537
def to_bytes(self,) -> bytes: ...
3638
def copy(self,) -> 'BlockIndex': ...

src/_arraykit.c

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4114,7 +4114,7 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
41144114
//------------------------------------------------------------------------------
41154115

41164116
static PyTypeObject BlockIndexType;
4117-
static PyObject *ErrorInitBlocks;
4117+
static PyObject *ErrorInitTypeBlocks;
41184118

41194119
// NOTE: we use platform size types here, which are appropriate for the values, but might pose issues if trying to pass pickles between 32 and 64 bit machines.
41204120
typedef struct BlockIndexRecord {
@@ -4777,18 +4777,18 @@ BlockIndex_repr(BlockIndexObject *self) {
47774777
dt);
47784778
}
47794779

4780-
// Returns NULL on error, None otherwise. This checks and raises on non-array inputs, dimensions other than 1 or 2.
4780+
// Returns NULL on error, True if the block should be reatained, False if the block has zero columns and should not be retained. This checks and raises on non-array inputs, dimensions other than 1 or 2, and mis-aligned columns.
47814781
static PyObject *
47824782
BlockIndex_register(BlockIndexObject *self, PyObject *value) {
47834783
if (!PyArray_Check(value)) {
4784-
PyErr_Format(ErrorInitBlocks, "Found non-array block: %R", value);
4784+
PyErr_Format(ErrorInitTypeBlocks, "Found non-array block: %R", value);
47854785
return NULL;
47864786
}
47874787
PyArrayObject *a = (PyArrayObject *)value;
47884788
int ndim = PyArray_NDIM(a);
47894789

47904790
if (ndim < 1 || ndim > 2) {
4791-
PyErr_Format(ErrorInitBlocks, "Array block has invalid dimensions: %i", ndim);
4791+
PyErr_Format(ErrorInitTypeBlocks, "Array block has invalid dimensions: %i", ndim);
47924792
return NULL;
47934793
}
47944794
Py_ssize_t increment = ndim == 1 ? 1 : PyArray_DIM(a, 1);
@@ -4799,13 +4799,17 @@ BlockIndex_register(BlockIndexObject *self, PyObject *value) {
47994799
self->row_count = alignment;
48004800
}
48014801
else if (self->row_count != alignment) {
4802-
PyErr_Format(ErrorInitBlocks,
4802+
PyErr_Format(ErrorInitTypeBlocks,
48034803
"Array block has unaligned row count: found %i, expected %i",
48044804
alignment,
48054805
self->row_count);
48064806
return NULL;
48074807
}
48084808

4809+
if (increment == 0) {
4810+
Py_RETURN_FALSE;
4811+
}
4812+
48094813
PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
48104814
if (self->dtype == NULL) {
48114815
Py_INCREF((PyObject*)dt);
@@ -4829,7 +4833,7 @@ BlockIndex_register(BlockIndexObject *self, PyObject *value) {
48294833
self->bir_count++;
48304834
}
48314835
self->block_count++;
4832-
Py_RETURN_NONE;
4836+
Py_RETURN_TRUE;
48334837
}
48344838

48354839

@@ -4942,20 +4946,36 @@ BlockIndex_iter(BlockIndexObject* self) {
49424946

49434947
static PyObject *
49444948
BlockIndex_shape_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
4949+
// NOTE: this could be cached
49454950
return Py_BuildValue("nn", self->row_count, self->bir_count);
49464951
}
49474952

4953+
static PyObject *
4954+
BlockIndex_rows_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
4955+
return PyLong_FromSsize_t(self->row_count);
4956+
}
4957+
4958+
static PyObject *
4959+
BlockIndex_columns_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
4960+
return PyLong_FromSsize_t(self->bir_count );
4961+
}
4962+
4963+
// Return the resolved dtype for all registered blocks. If no block have been registered, this will return a float dtype.
49484964
static PyObject *
49494965
BlockIndex_dtype_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
49504966
if (self->dtype != NULL) {
49514967
Py_INCREF(self->dtype);
49524968
return (PyObject*)self->dtype;
49534969
}
4954-
Py_RETURN_NONE;
4970+
// NOTE: could use NPY_DEFAULT_TYPE here; SF defines this explicitly as float64
4971+
return (PyObject*)PyArray_DescrFromType(NPY_FLOAT64);
49554972
}
49564973

4974+
49574975
static struct PyGetSetDef BlockIndex_getset[] = {
49584976
{"shape", (getter)BlockIndex_shape_getter, NULL, NULL, NULL},
4977+
{"rows", (getter)BlockIndex_rows_getter, NULL, NULL, NULL},
4978+
{"columns", (getter)BlockIndex_columns_getter, NULL, NULL, NULL},
49594979
{"dtype", (getter)BlockIndex_dtype_getter, NULL, NULL, NULL},
49604980
{NULL},
49614981
};
@@ -5380,12 +5400,12 @@ PyInit__arraykit(void)
53805400
{
53815401
import_array();
53825402

5383-
ErrorInitBlocks = PyErr_NewExceptionWithDoc(
5384-
"arraykit.ErrorInitBlocks",
5403+
ErrorInitTypeBlocks = PyErr_NewExceptionWithDoc(
5404+
"arraykit.ErrorInitTypeBlocks",
53855405
"RuntimeError error in block initialization.",
53865406
PyExc_RuntimeError,
53875407
NULL);
5388-
if (ErrorInitBlocks == NULL) {
5408+
if (ErrorInitTypeBlocks == NULL) {
53895409
return NULL;
53905410
}
53915411

@@ -5411,7 +5431,7 @@ PyInit__arraykit(void)
54115431
PyModule_AddObject(m, "BlockIndex", (PyObject *) &BlockIndexType) ||
54125432
PyModule_AddObject(m, "ArrayGO", (PyObject *) &ArrayGOType) ||
54135433
PyModule_AddObject(m, "deepcopy", deepcopy) ||
5414-
PyModule_AddObject(m, "ErrorInitBlocks", ErrorInitBlocks)
5434+
PyModule_AddObject(m, "ErrorInitTypeBlocks", ErrorInitTypeBlocks)
54155435
){
54165436
Py_DECREF(deepcopy);
54175437
Py_XDECREF(m);

test/test_block_index.py

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
import numpy as np
88

99
from arraykit import BlockIndex
10-
from arraykit import ErrorInitBlocks
10+
from arraykit import ErrorInitTypeBlocks
1111

1212

1313
class TestUnit(unittest.TestCase):
1414

1515
def test_block_index_init_a(self) -> None:
1616
bi1 = BlockIndex()
17+
self.assertEqual(bi1.dtype, np.dtype(float))
1718
# print(bi1)
1819

1920
def test_block_index_init_b1(self) -> None:
@@ -52,19 +53,19 @@ def test_block_index_init_d(self) -> None:
5253

5354
def test_block_index_register_a(self) -> None:
5455
bi1 = BlockIndex()
55-
with self.assertRaises(ErrorInitBlocks):
56+
with self.assertRaises(ErrorInitTypeBlocks):
5657
bi1.register('foo')
5758

58-
with self.assertRaises(ErrorInitBlocks):
59+
with self.assertRaises(ErrorInitTypeBlocks):
5960
bi1.register(3.5)
6061

6162
def test_block_index_register_b(self) -> None:
6263

6364
bi1 = BlockIndex()
64-
with self.assertRaises(ErrorInitBlocks):
65+
with self.assertRaises(ErrorInitTypeBlocks):
6566
bi1.register(np.array(0))
6667

67-
with self.assertRaises(ErrorInitBlocks):
68+
with self.assertRaises(ErrorInitTypeBlocks):
6869
bi1.register(np.arange(12).reshape(2,3,2))
6970

7071

@@ -76,6 +77,8 @@ def test_block_index_register_c(self) -> None:
7677
self.assertEqual(bi1.to_list(),
7778
[(0, 0), (1, 0), (2, 0), (2, 1)])
7879
self.assertEqual(bi1.shape, (3, 4))
80+
self.assertEqual(bi1.rows, 3)
81+
self.assertEqual(bi1.columns, 4)
7982

8083
def test_block_index_register_d(self) -> None:
8184
bi1 = BlockIndex()
@@ -87,18 +90,55 @@ def test_block_index_register_d(self) -> None:
8790
[(0, 0), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 0), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5)]
8891
)
8992
self.assertEqual(bi1.shape, (2, 14))
93+
self.assertEqual(bi1.rows, 2)
94+
self.assertEqual(bi1.columns, 14)
9095

9196
def test_block_index_register_e(self) -> None:
9297
bi1 = BlockIndex()
9398
bi1.register(np.arange(2))
94-
with self.assertRaises(ErrorInitBlocks):
99+
with self.assertRaises(ErrorInitTypeBlocks):
95100
bi1.register(np.arange(12).reshape(3,4))
96101

97102

98103
def test_block_index_register_f(self) -> None:
99104
bi1 = BlockIndex()
100-
a1 = np.arange(20000).reshape(2, 10_000) #.reshape(2, 10_000)
105+
a1 = np.arange(20000).reshape(2, 10_000)
101106
bi1.register(a1)
107+
self.assertEqual(bi1.rows, 2)
108+
self.assertEqual(bi1.columns, 10_000)
109+
110+
111+
def test_block_index_register_g(self) -> None:
112+
bi1 = BlockIndex()
113+
a1 = np.array(()).reshape(4, 0)
114+
self.assertFalse(bi1.register(a1))
115+
self.assertEqual(bi1.shape, (4, 0))
116+
# as not dtype has been registered, we will get default float
117+
self.assertEqual(bi1.dtype, np.dtype(float))
118+
119+
a2 = np.arange(8).reshape(4, 2).astype(bool)
120+
self.assertTrue(bi1.register(a2))
121+
self.assertEqual(bi1.shape, (4, 2))
122+
self.assertEqual(bi1.dtype, np.dtype(bool))
123+
124+
125+
def test_block_index_register_h(self) -> None:
126+
bi1 = BlockIndex()
127+
a1 = np.array(()).reshape(0, 4).astype(bool)
128+
self.assertTrue(bi1.register(a1))
129+
self.assertEqual(bi1.shape, (0, 4))
130+
self.assertEqual(bi1.dtype, np.dtype(bool))
131+
132+
a2 = np.array(()).reshape(0, 0).astype(float)
133+
self.assertFalse(bi1.register(a2))
134+
self.assertEqual(bi1.shape, (0, 4))
135+
# dtype is still bool
136+
self.assertEqual(bi1.dtype, np.dtype(bool))
137+
138+
a3 = np.array(()).reshape(0, 3).astype(int)
139+
self.assertTrue(bi1.register(a3))
140+
self.assertEqual(bi1.shape, (0, 7))
141+
self.assertEqual(bi1.dtype, np.dtype(object))
102142

103143

104144
#---------------------------------------------------------------------------
@@ -191,10 +231,11 @@ def test_block_index_getitem_a(self) -> None:
191231
bi1 = BlockIndex()
192232
bi1.register(np.arange(12).reshape(2,6))
193233
self.assertEqual(bi1.shape, (2, 6))
234+
self.assertEqual(bi1.columns, 6)
194235

195236
bi1.register(np.arange(4).reshape(2,2))
196237
self.assertEqual(bi1.shape, (2, 8))
197-
238+
self.assertEqual(bi1.columns, 8)
198239

199240
def test_block_index_getitem_b(self) -> None:
200241
bi1 = BlockIndex()

test/test_pyi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def from_module(cls, module):
3030
continue
3131
obj = getattr(module, name)
3232
if isinstance(obj, type): # a class
33-
if name == ak.ErrorInitBlocks.__name__:
33+
if name == ak.ErrorInitTypeBlocks.__name__:
3434
# skip as there is Python version variability
3535
continue
3636
classes[name] = []

0 commit comments

Comments
 (0)