Skip to content

Commit aac9767

Browse files
authored
Merge pull request #183 from static-frame/182/objectable
Objectable filter, `astype_array`
2 parents 72a5141 + f2ac2ff commit aac9767

File tree

10 files changed

+427
-14
lines changed

10 files changed

+427
-14
lines changed

README.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ What is New in ArrayKit
4141

4242
Now building free-threaded compatible wheels for Python 3.13.
4343

44+
Added ``is_objectable()`` and ``is_objectable_dt64()``.
45+
46+
Added ``astype_array()``.
4447

4548

4649
1.0.9

src/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@
3131
from ._arraykit import array_to_tuple_array as array_to_tuple_array
3232
from ._arraykit import array_to_tuple_iter as array_to_tuple_iter
3333
from ._arraykit import nonzero_1d as nonzero_1d
34-
34+
from ._arraykit import is_objectable_dt64 as is_objectable_dt64
35+
from ._arraykit import is_objectable as is_objectable
36+
from ._arraykit import astype_array as astype_array
3537
from ._arraykit import AutoMap as AutoMap
3638
from ._arraykit import FrozenAutoMap as FrozenAutoMap
3739
from ._arraykit import NonUniqueError as NonUniqueError
40+

src/__init__.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) ->
203203
def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ...
204204
def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ...
205205
def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ...
206+
def is_objectable_dt64(__array: np.ndarray, /) -> bool: ...
207+
def is_objectable(__array: np.ndarray, /) -> bool: ...
208+
def astype_array(__array: np.ndarray, __dtype: np.dtype | None, /) -> np.ndarray: ...
206209
def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ...
207210
def array_to_tuple_array(__array: np.ndarray) -> np.ndarray: ...
208211
def array_to_tuple_iter(__array: np.ndarray) -> tp.Iterator[tp.Tuple[tp.Any, ...]]: ...

src/_arraykit.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ static PyMethodDef arraykit_methods[] = {
5252
NULL},
5353
{"count_iteration", count_iteration, METH_O, NULL},
5454
{"nonzero_1d", nonzero_1d, METH_O, NULL},
55+
{"is_objectable_dt64", is_objectable_dt64, METH_O, NULL},
56+
{"is_objectable", is_objectable, METH_O, NULL},
57+
{"astype_array", astype_array, METH_VARARGS, NULL},
5558
{"isna_element",
5659
(PyCFunction)isna_element,
5760
METH_VARARGS | METH_KEYWORDS,
@@ -95,6 +98,7 @@ PyInit__arraykit(void)
9598
return NULL;
9699
}
97100

101+
// store a reference to the deepcopy function
98102
PyObject *copy = PyImport_ImportModule("copy");
99103
if (copy == NULL) {
100104
return NULL;
@@ -105,6 +109,18 @@ PyInit__arraykit(void)
105109
return NULL;
106110
}
107111

112+
// store a year dtype object
113+
PyObject* dt_year_str = PyUnicode_FromString("datetime64[Y]");
114+
if (!dt_year_str) return NULL;
115+
116+
PyArray_Descr* dt_year = NULL;
117+
if (!PyArray_DescrConverter2(dt_year_str, &dt_year)) {
118+
Py_DECREF(dt_year_str);
119+
return NULL;
120+
}
121+
Py_DECREF(dt_year_str);
122+
123+
108124
PyObject *m = PyModule_Create(&arraykit_module);
109125
if (!m ||
110126
PyModule_AddStringConstant(m, "__version__", Py_STRINGIFY(AK_VERSION)) ||
@@ -128,9 +144,11 @@ PyInit__arraykit(void)
128144
PyModule_AddObject(m, "ErrorInitTypeBlocks", ErrorInitTypeBlocks) ||
129145
PyModule_AddObject(m, "AutoMap", (PyObject *)&AMType) ||
130146
PyModule_AddObject(m, "FrozenAutoMap", (PyObject *)&FAMType) ||
131-
PyModule_AddObject(m, "NonUniqueError", NonUniqueError)
147+
PyModule_AddObject(m, "NonUniqueError", NonUniqueError) ||
148+
PyModule_AddObject(m, "dt_year", (PyObject *)dt_year)
132149
){
133-
Py_DECREF(deepcopy);
150+
Py_XDECREF(deepcopy);
151+
Py_XDECREF(dt_year);
134152
Py_XDECREF(m);
135153
return NULL;
136154
}

src/methods.c

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,139 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
201201
return AK_nonzero_1d(array);
202202
}
203203

204+
PyObject*
205+
is_objectable_dt64(PyObject *m, PyObject *a) {
206+
AK_CHECK_NUMPY_ARRAY(a);
207+
PyArrayObject* array = (PyArrayObject*)a;
208+
209+
// this returns a new reference
210+
PyObject* dt_year = PyObject_GetAttrString(m, "dt_year");
211+
int is_objectable = AK_is_objectable_dt64(array, dt_year);
212+
Py_DECREF(dt_year);
213+
214+
switch (is_objectable) {
215+
case -1:
216+
return NULL;
217+
case 0:
218+
Py_RETURN_FALSE;
219+
case 1:
220+
Py_RETURN_TRUE;
221+
}
222+
return NULL;
223+
}
224+
225+
226+
PyObject*
227+
is_objectable(PyObject *m, PyObject *a) {
228+
AK_CHECK_NUMPY_ARRAY(a);
229+
PyArrayObject* array = (PyArrayObject*)a;
230+
231+
char kind = PyArray_DESCR(array)->kind;
232+
if ((kind == 'M' || kind == 'm')) {
233+
// this returns a new reference
234+
PyObject* dt_year = PyObject_GetAttrString(m, "dt_year");
235+
int is_objectable = AK_is_objectable_dt64(array, dt_year);
236+
Py_DECREF(dt_year);
237+
238+
switch (is_objectable) {
239+
case -1:
240+
return NULL;
241+
case 0:
242+
Py_RETURN_FALSE;
243+
case 1:
244+
Py_RETURN_TRUE;
245+
}
246+
}
247+
Py_RETURN_TRUE;
248+
}
249+
250+
// Convert array to the dtype provided. NOTE: mutable arrays will be returned unless the input array is immutable and no dtype change is needed
251+
PyObject*
252+
astype_array(PyObject* m, PyObject* args) {
253+
254+
PyObject* a = NULL;
255+
PyObject* dtype_spec = Py_None;
256+
257+
if (!PyArg_ParseTuple(args, "O!|O:astype_array",
258+
&PyArray_Type, &a,
259+
&dtype_spec)) {
260+
return NULL;
261+
}
262+
PyArrayObject* array = (PyArrayObject*)a;
263+
264+
PyArray_Descr* dtype = NULL;
265+
if (dtype_spec == Py_None) {
266+
dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE);
267+
} else {
268+
if (!PyArray_DescrConverter(dtype_spec, &dtype)) {
269+
return NULL;
270+
}
271+
}
272+
273+
if (PyArray_EquivTypes(PyArray_DESCR(array), dtype)) {
274+
Py_DECREF(dtype);
275+
276+
if (PyArray_ISWRITEABLE(array)) {
277+
PyObject* result = PyArray_NewCopy(array, NPY_ANYORDER);
278+
if (!result) {
279+
return NULL;
280+
}
281+
return result;
282+
}
283+
else { // already immutable
284+
Py_INCREF(a);
285+
return a;
286+
}
287+
}
288+
// if converting to an object
289+
if (dtype->type_num == NPY_OBJECT) {
290+
char kind = PyArray_DESCR(array)->kind;
291+
if ((kind == 'M' || kind == 'm')) {
292+
PyObject* dt_year = PyObject_GetAttrString(m, "dt_year");
293+
int is_objectable = AK_is_objectable_dt64(array, dt_year);
294+
Py_DECREF(dt_year);
295+
296+
if (!is_objectable) {
297+
PyObject* result = PyArray_NewLikeArray(array, NPY_ANYORDER, dtype, 0);
298+
if (!result) {
299+
Py_DECREF(dtype);
300+
return NULL;
301+
}
302+
PyObject** data = (PyObject**)PyArray_DATA((PyArrayObject*)result);
303+
304+
PyArrayIterObject* it = (PyArrayIterObject*)PyArray_IterNew(a);
305+
if (!it) {
306+
Py_DECREF(result);
307+
return NULL;
308+
}
309+
310+
npy_intp i = 0;
311+
while (it->index < it->size) {
312+
PyObject* item = PyArray_ToScalar(it->dataptr, array);
313+
if (!item) {
314+
Py_DECREF(result);
315+
Py_DECREF(it);
316+
return NULL;
317+
}
318+
data[i++] = item;
319+
PyArray_ITER_NEXT(it);
320+
}
321+
Py_DECREF(it);
322+
return result;
323+
}
324+
}
325+
}
326+
// all other cases: do a standard cast conversion
327+
PyObject* result = PyArray_CastToType(array, dtype, 0);
328+
if (!result) {
329+
Py_DECREF(dtype);
330+
return NULL;
331+
}
332+
return result;
333+
}
334+
335+
336+
204337
static char *first_true_1d_kwarg_names[] = {
205338
"array",
206339
"forward",

src/methods.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,15 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg);
4747
PyObject *
4848
nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a);
4949

50+
PyObject *
51+
is_objectable_dt64(PyObject *m, PyObject *a);
52+
53+
PyObject *
54+
is_objectable(PyObject *m, PyObject *a);
55+
56+
PyObject *
57+
astype_array(PyObject *m, PyObject *args);
58+
5059
PyObject *
5160
first_true_1d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);
5261

src/utilities.h

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,69 @@ AK_slice_to_ascending_slice(PyObject* slice, Py_ssize_t size)
223223
-step);
224224
}
225225

226+
227+
static inline NPY_DATETIMEUNIT
228+
AK_dt_unit_from_array(PyArrayObject* a) {
229+
// This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type.
230+
PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
231+
PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta);
232+
return dma->base;
233+
}
234+
235+
// Given a dt64 array, determine if it can be cast to a object without data loss. Returns -1 on error. NOTE: if we use dt_year, must incref first
236+
static inline int
237+
AK_is_objectable_dt64(PyArrayObject* a, PyObject* dt_year)
238+
{
239+
NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a);
240+
switch (unit) {
241+
case NPY_FR_ERROR:
242+
case NPY_FR_Y:
243+
case NPY_FR_M:
244+
case NPY_FR_W:
245+
return false;
246+
case NPY_FR_D:
247+
case NPY_FR_h:
248+
case NPY_FR_m:
249+
case NPY_FR_s:
250+
case NPY_FR_ms:
251+
case NPY_FR_us:
252+
break;
253+
case NPY_FR_ns:
254+
case NPY_FR_ps:
255+
case NPY_FR_fs:
256+
case NPY_FR_as:
257+
case NPY_FR_GENERIC:
258+
return false;
259+
}
260+
261+
Py_INCREF(dt_year);
262+
PyObject* a_year = PyArray_CastToType(a, (PyArray_Descr*)dt_year, 0);
263+
if (!a_year) {
264+
Py_DECREF(dt_year);
265+
return -1;
266+
}
267+
268+
npy_int64* data = (npy_int64*)PyArray_DATA((PyArrayObject*)a_year);
269+
npy_intp size = PyArray_SIZE((PyArrayObject*)a_year);
270+
271+
for (npy_intp i = 0; i < size; ++i) {
272+
npy_int64 v = data[i];
273+
if (v == NPY_DATETIME_NAT) {
274+
continue;
275+
}
276+
// offset: 1-1970, 9999-1970
277+
if (v < -1969 || v > 8029) {
278+
Py_DECREF(a_year);
279+
return 0;
280+
}
281+
}
282+
Py_DECREF(a_year);
283+
return 1;
284+
}
285+
286+
287+
288+
226289
// Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices.
227290
static inline PyObject *
228291
AK_nonzero_1d(PyArrayObject* array) {
@@ -319,15 +382,6 @@ AK_nonzero_1d(PyArrayObject* array) {
319382
return final;
320383
}
321384

322-
static inline NPY_DATETIMEUNIT
323-
AK_dt_unit_from_array(PyArrayObject* a) {
324-
// This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type.
325-
PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
326-
PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta);
327-
// PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta);
328-
return dma->base;
329-
}
330-
331385
static inline NPY_DATETIMEUNIT
332386
AK_dt_unit_from_scalar(PyDatetimeScalarObject* dts) {
333387
// Based on convert_pyobject_to_datetime and related usage in datetime.c

0 commit comments

Comments
 (0)