Skip to content

Commit 7eef75e

Browse files
committed
2 parents ca0dd5c + ce69cbc commit 7eef75e

File tree

7 files changed

+228
-16
lines changed

7 files changed

+228
-16
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@
88
.vscode
99
__pycache__
1010
build
11+
*.diff
12+
*.orig

performance/__main__.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
2-
3-
1+
import datetime
42
import timeit
53
import argparse
64

@@ -16,6 +14,7 @@
1614
from performance.reference.util import resolve_dtype as resolve_dtype_ref
1715
from performance.reference.util import resolve_dtype_iter as resolve_dtype_iter_ref
1816
from performance.reference.util import array_deepcopy as array_deepcopy_ref
17+
from performance.reference.util import isna_element as isna_element_ref
1918

2019
from performance.reference.array_go import ArrayGO as ArrayGOREF
2120

@@ -29,6 +28,7 @@
2928
from arraykit import resolve_dtype as resolve_dtype_ak
3029
from arraykit import resolve_dtype_iter as resolve_dtype_iter_ak
3130
from arraykit import array_deepcopy as array_deepcopy_ak
31+
from arraykit import isna_element as isna_element_ak
3232

3333
from arraykit import ArrayGO as ArrayGOAK
3434

@@ -250,6 +250,73 @@ class ArrayGOPerfREF(ArrayGOPerf):
250250
entry = staticmethod(ArrayGOREF)
251251

252252

253+
#-------------------------------------------------------------------------------
254+
class IsNaElementPerf(Perf):
255+
NUMBER = 1000
256+
257+
def pre(self):
258+
class FloatSubclass(float): pass
259+
class ComplexSubclass(complex): pass
260+
261+
self.values = [
262+
# Na-elements
263+
np.datetime64('NaT'), np.timedelta64('NaT'), None, float('NaN'), -float('NaN'),
264+
265+
# Non-float, Non-na elements
266+
1, 'str', np.datetime64('2020-12-31'), datetime.date(2020, 12, 31), False,
267+
]
268+
269+
nan = np.nan
270+
complex_nans = [
271+
complex(nan, 0),
272+
complex(-nan, 0),
273+
complex(0, nan),
274+
complex(0, -nan),
275+
]
276+
277+
float_classes = [float, np.float16, np.float32, np.float64, FloatSubclass]
278+
if hasattr(np, 'float128'):
279+
float_classes.append(np.float128)
280+
281+
cfloat_classes = [complex, np.complex64, np.complex128, ComplexSubclass]
282+
if hasattr(np, 'complex256'):
283+
cfloat_classes.append(np.complex256)
284+
285+
# Append all the different types of nans across dtypes
286+
for ctor in float_classes:
287+
self.values.append(ctor(nan))
288+
self.values.append(ctor(-nan))
289+
290+
for ctor in cfloat_classes:
291+
for complex_nan in complex_nans:
292+
self.values.append(ctor(complex_nan))
293+
294+
# Append a wide range of float values, with different precision, across types
295+
for val in (
296+
1e-1000, 1e-309, 1e-39, 1e-16, 1e-5, 0.1, 0., 1.0, 1e5, 1e16, 1e39, 1e309, 1e1000,
297+
):
298+
for ctor in float_classes:
299+
self.values.append(ctor(val))
300+
self.values.append(ctor(-val))
301+
302+
for ctor in cfloat_classes:
303+
self.values.append(ctor(complex(val, val)))
304+
self.values.append(ctor(complex(-val, val)))
305+
self.values.append(ctor(complex(val, -val)))
306+
self.values.append(ctor(complex(-val, -val)))
307+
308+
def main(self):
309+
for _ in range(10):
310+
for val in self.values:
311+
self.entry(val)
312+
313+
class IsNaElementPerfAK(IsNaElementPerf):
314+
entry = staticmethod(isna_element_ak)
315+
316+
class IsNaElementPerfREF(IsNaElementPerf):
317+
entry = staticmethod(isna_element_ref)
318+
319+
253320
#-------------------------------------------------------------------------------
254321

255322
def get_arg_parser():

performance/reference/util.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def mloc(array: np.ndarray) -> int:
3131
'''
3232
return tp.cast(int, array.__array_interface__['data'][0])
3333

34+
3435
def immutable_filter(src_array: np.ndarray) -> np.ndarray:
3536
'''Pass an immutable array; otherwise, return an immutable copy of the provided array.
3637
'''
@@ -40,6 +41,7 @@ def immutable_filter(src_array: np.ndarray) -> np.ndarray:
4041
return dst_array
4142
return src_array # keep it as is
4243

44+
4345
def name_filter(name):
4446
'''
4547
For name attributes on containers, only permit recursively hashable objects.
@@ -51,7 +53,6 @@ def name_filter(name):
5153
return name
5254

5355

54-
5556
def shape_filter(array: np.ndarray) -> tp.Tuple[int, int]:
5657
'''Represent a 1D array as a 2D array with length as rows of a single-column array.
5758
@@ -62,6 +63,7 @@ def shape_filter(array: np.ndarray) -> tp.Tuple[int, int]:
6263
return array.shape[0], 1
6364
return array.shape #type: ignore
6465

66+
6567
def column_2d_filter(array: np.ndarray) -> np.ndarray:
6668
'''Reshape a flat ndim 1 array into a 2D array with one columns and rows of length. This is used (a) for getting string representations and (b) for using np.concatenate and np binary operators on 1D arrays.
6769
'''
@@ -70,6 +72,7 @@ def column_2d_filter(array: np.ndarray) -> np.ndarray:
7072
return np.reshape(array, (array.shape[0], 1))
7173
return array
7274

75+
7376
def column_1d_filter(array: np.ndarray) -> np.ndarray:
7477
'''
7578
Ensure that a column that might be 2D or 1D is returned as a 1D array.
@@ -79,6 +82,7 @@ def column_1d_filter(array: np.ndarray) -> np.ndarray:
7982
return np.reshape(array, array.shape[0])
8083
return array
8184

85+
8286
def row_1d_filter(array: np.ndarray) -> np.ndarray:
8387
'''
8488
Ensure that a row that might be 2D or 1D is returned as a 1D array.
@@ -142,6 +146,7 @@ def resolve_dtype(dt1: np.dtype, dt2: np.dtype) -> np.dtype:
142146
# if not a string or an object, can use result type
143147
return np.result_type(dt1, dt2)
144148

149+
145150
def resolve_dtype_iter(dtypes: tp.Iterable[np.dtype]) -> np.dtype:
146151
'''Given an iterable of one or more dtypes, do pairwise comparisons to determine compatible overall type. Once we get to object we can stop checking and return object.
147152
@@ -158,7 +163,6 @@ def resolve_dtype_iter(dtypes: tp.Iterable[np.dtype]) -> np.dtype:
158163
return dt_resolve
159164

160165

161-
162166
def array_deepcopy(
163167
array: np.ndarray,
164168
memo: tp.Optional[tp.Dict[int, tp.Any]],
@@ -181,3 +185,17 @@ def array_deepcopy(
181185
if memo is not None:
182186
memo[ident] = post
183187
return post
188+
189+
190+
def isna_element(value: tp.Any) -> bool:
191+
'''Return Boolean if value is an NA. This does not yet handle pd.NA
192+
'''
193+
try:
194+
return np.isnan(value) #type: ignore
195+
except TypeError:
196+
pass
197+
198+
if isinstance(value, (np.datetime64, np.timedelta64)):
199+
return np.isnat(value) #type: ignore
200+
201+
return value is None

setup.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from setuptools import Extension # type: ignore
22
from setuptools import setup
3+
from numpy.distutils.misc_util import get_info
34
import numpy as np # type: ignore
45

56

@@ -14,6 +15,17 @@ def get_long_description() -> str:
1415
Packages: https://pypi.org/project/arraykit
1516
'''
1617

18+
additional_info = get_info('npymath') # We need this for various numpy C math APIs to work
19+
20+
# Update the dictionary to include configuration we want.
21+
additional_info['include_dirs'] = [np.get_include()] + additional_info['include_dirs']
22+
additional_info['define_macros'] = [("AK_VERSION", AK_VERSION)] + additional_info['define_macros']
23+
24+
ak_extension = Extension(
25+
name='arraykit._arraykit', # build into module
26+
sources=['src/_arraykit.c'],
27+
**additional_info,
28+
)
1729

1830
setup(
1931
name='arraykit',
@@ -43,12 +55,5 @@ def get_long_description() -> str:
4355
package_dir={'arraykit': 'src'},
4456
package_data={'arraykit': ['__init__.pyi', 'py.typed']},
4557
include_package_data=True,
46-
ext_modules=[
47-
Extension(
48-
name='arraykit._arraykit', # build into module
49-
sources=['src/_arraykit.c'],
50-
include_dirs=[np.get_include()],
51-
define_macros=[('AK_VERSION', AK_VERSION)],
52-
),
53-
],
58+
ext_modules=[ak_extension],
5459
)

src/__init__.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ def row_1d_filter(__array: np.array) -> np.ndarray: ...
3030
def array_deepcopy(__array: np.array, memo: tp.Dict[int, tp.Any]) -> np.ndarray: ...
3131
def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ...
3232
def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ...
33+
def isna_element(__value: tp.Any) -> bool: ...

src/_arraykit.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
# define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
66

77
# include "numpy/arrayobject.h"
8+
# include "numpy/arrayscalars.h"
9+
# include "numpy/halffloat.h"
810

911
//------------------------------------------------------------------------------
1012
// Macros
@@ -360,6 +362,69 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg)
360362
return (PyObject *)AK_ResolveDTypeIter(arg);
361363
}
362364

365+
//------------------------------------------------------------------------------
366+
// general utility
367+
368+
static PyObject *
369+
isna_element(PyObject *Py_UNUSED(m), PyObject *arg)
370+
{
371+
// None
372+
if (arg == Py_None) {
373+
Py_RETURN_TRUE;
374+
}
375+
376+
// NaN
377+
if (PyFloat_Check(arg)) {
378+
return PyBool_FromLong(isnan(PyFloat_AS_DOUBLE(arg)));
379+
}
380+
if (PyArray_IsScalar(arg, Half)) {
381+
return PyBool_FromLong(npy_half_isnan(PyArrayScalar_VAL(arg, Half)));
382+
}
383+
if (PyArray_IsScalar(arg, Float32)) {
384+
return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float32)));
385+
}
386+
if (PyArray_IsScalar(arg, Float64)) {
387+
return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float64)));
388+
}
389+
# ifdef PyFloat128ArrType_Type
390+
if (PyArray_IsScalar(arg, Float128)) {
391+
return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float128)));
392+
}
393+
# endif
394+
395+
// Complex NaN
396+
if (PyComplex_Check(arg)) {
397+
Py_complex val = ((PyComplexObject*)arg)->cval;
398+
return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
399+
}
400+
if (PyArray_IsScalar(arg, Complex64)) {
401+
npy_cfloat val = PyArrayScalar_VAL(arg, Complex64);
402+
return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
403+
}
404+
if (PyArray_IsScalar(arg, Complex128)) {
405+
npy_cdouble val = PyArrayScalar_VAL(arg, Complex128);
406+
return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
407+
}
408+
# ifdef PyComplex256ArrType_Type
409+
if (PyArray_IsScalar(arg, Complex256)) {
410+
npy_clongdouble val = PyArrayScalar_VAL(arg, Complex256);
411+
return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
412+
}
413+
# endif
414+
415+
// NaT - Datetime
416+
if (PyArray_IsScalar(arg, Datetime)) {
417+
return PyBool_FromLong(PyArrayScalar_VAL(arg, Datetime) == NPY_DATETIME_NAT);
418+
}
419+
420+
// NaT - Timedelta
421+
if (PyArray_IsScalar(arg, Timedelta)) {
422+
return PyBool_FromLong(PyArrayScalar_VAL(arg, Timedelta) == NPY_DATETIME_NAT);
423+
}
424+
425+
Py_RETURN_FALSE;
426+
}
427+
363428
//------------------------------------------------------------------------------
364429
// ArrayGO
365430
//------------------------------------------------------------------------------
@@ -640,6 +705,7 @@ static PyMethodDef arraykit_methods[] = {
640705
NULL},
641706
{"resolve_dtype", resolve_dtype, METH_VARARGS, NULL},
642707
{"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL},
708+
{"isna_element", isna_element, METH_O, NULL},
643709
{NULL},
644710
};
645711

0 commit comments

Comments
 (0)