Merge branch 'master' of https://github.com/InvestmentSystems/arraykit

flexatone · flexatone · commit 7eef75e1d1db · 2021-05-27T17:21:28.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,5 @@
 .vscode
 __pycache__
 build
+*.diff
+*.orig
diff --git a/performance/__main__.py b/performance/__main__.py
@@ -1,6 +1,4 @@
-
-
-
+import datetime
 import timeit
 import argparse
 
@@ -16,6 +14,7 @@
 from performance.reference.util import resolve_dtype as resolve_dtype_ref
 from performance.reference.util import resolve_dtype_iter as resolve_dtype_iter_ref
 from performance.reference.util import array_deepcopy as array_deepcopy_ref
+from performance.reference.util import isna_element as isna_element_ref
 
 from performance.reference.array_go import ArrayGO as ArrayGOREF
 
@@ -29,6 +28,7 @@
 from arraykit import resolve_dtype as resolve_dtype_ak
 from arraykit import resolve_dtype_iter as resolve_dtype_iter_ak
 from arraykit import array_deepcopy as array_deepcopy_ak
+from arraykit import isna_element as isna_element_ak
 
 from arraykit import ArrayGO as ArrayGOAK
 
@@ -250,6 +250,73 @@ class ArrayGOPerfREF(ArrayGOPerf):
     entry = staticmethod(ArrayGOREF)
 
 
+#-------------------------------------------------------------------------------
+class IsNaElementPerf(Perf):
+    NUMBER = 1000
+
+    def pre(self):
+        class FloatSubclass(float): pass
+        class ComplexSubclass(complex): pass
+
+        self.values = [
+                # Na-elements
+                np.datetime64('NaT'), np.timedelta64('NaT'), None, float('NaN'), -float('NaN'),
+
+                # Non-float, Non-na elements
+                1, 'str', np.datetime64('2020-12-31'), datetime.date(2020, 12, 31), False,
+        ]
+
+        nan = np.nan
+        complex_nans = [
+                complex(nan, 0),
+                complex(-nan, 0),
+                complex(0, nan),
+                complex(0, -nan),
+        ]
+
+        float_classes = [float, np.float16, np.float32, np.float64, FloatSubclass]
+        if hasattr(np, 'float128'):
+            float_classes.append(np.float128)
+
+        cfloat_classes = [complex, np.complex64, np.complex128, ComplexSubclass]
+        if hasattr(np, 'complex256'):
+            cfloat_classes.append(np.complex256)
+
+        # Append all the different types of nans across dtypes
+        for ctor in float_classes:
+            self.values.append(ctor(nan))
+            self.values.append(ctor(-nan))
+
+        for ctor in cfloat_classes:
+            for complex_nan in complex_nans:
+                self.values.append(ctor(complex_nan))
+
+        # Append a wide range of float values, with different precision, across types
+        for val in (
+                1e-1000, 1e-309, 1e-39, 1e-16, 1e-5, 0.1, 0., 1.0, 1e5, 1e16, 1e39, 1e309, 1e1000,
+            ):
+            for ctor in float_classes:
+                self.values.append(ctor(val))
+                self.values.append(ctor(-val))
+
+            for ctor in cfloat_classes:
+                self.values.append(ctor(complex(val, val)))
+                self.values.append(ctor(complex(-val, val)))
+                self.values.append(ctor(complex(val, -val)))
+                self.values.append(ctor(complex(-val, -val)))
+
+    def main(self):
+        for _ in range(10):
+            for val in self.values:
+                self.entry(val)
+
+class IsNaElementPerfAK(IsNaElementPerf):
+    entry = staticmethod(isna_element_ak)
+
+class IsNaElementPerfREF(IsNaElementPerf):
+    entry = staticmethod(isna_element_ref)
+
+
 #-------------------------------------------------------------------------------
 
 def get_arg_parser():
diff --git a/performance/reference/util.py b/performance/reference/util.py
@@ -31,6 +31,7 @@ def mloc(array: np.ndarray) -> int:
     '''
     return tp.cast(int, array.__array_interface__['data'][0])
 
+
 def immutable_filter(src_array: np.ndarray) -> np.ndarray:
     '''Pass an immutable array; otherwise, return an immutable copy of the provided array.
     '''
@@ -40,6 +41,7 @@ def immutable_filter(src_array: np.ndarray) -> np.ndarray:
         return dst_array
     return src_array # keep it as is
 
+
 def name_filter(name):
     '''
     For name attributes on containers, only permit recursively hashable objects.
@@ -51,7 +53,6 @@ def name_filter(name):
     return name
 
 
-
 def shape_filter(array: np.ndarray) -> tp.Tuple[int, int]:
     '''Represent a 1D array as a 2D array with length as rows of a single-column array.
 
@@ -62,6 +63,7 @@ def shape_filter(array: np.ndarray) -> tp.Tuple[int, int]:
         return array.shape[0], 1
     return array.shape #type: ignore
 
+
 def column_2d_filter(array: np.ndarray) -> np.ndarray:
     '''Reshape a flat ndim 1 array into a 2D array with one columns and rows of length. This is used (a) for getting string representations and (b) for using np.concatenate and np binary operators on 1D arrays.
     '''
@@ -70,6 +72,7 @@ def column_2d_filter(array: np.ndarray) -> np.ndarray:
         return np.reshape(array, (array.shape[0], 1))
     return array
 
+
 def column_1d_filter(array: np.ndarray) -> np.ndarray:
     '''
     Ensure that a column that might be 2D or 1D is returned as a 1D array.
@@ -79,6 +82,7 @@ def column_1d_filter(array: np.ndarray) -> np.ndarray:
         return np.reshape(array, array.shape[0])
     return array
 
+
 def row_1d_filter(array: np.ndarray) -> np.ndarray:
     '''
     Ensure that a row that might be 2D or 1D is returned as a 1D array.
@@ -142,6 +146,7 @@ def resolve_dtype(dt1: np.dtype, dt2: np.dtype) -> np.dtype:
     # if not a string or an object, can use result type
     return np.result_type(dt1, dt2)
 
+
 def resolve_dtype_iter(dtypes: tp.Iterable[np.dtype]) -> np.dtype:
     '''Given an iterable of one or more dtypes, do pairwise comparisons to determine compatible overall type. Once we get to object we can stop checking and return object.
 
@@ -158,7 +163,6 @@ def resolve_dtype_iter(dtypes: tp.Iterable[np.dtype]) -> np.dtype:
     return dt_resolve
 
 
-
 def array_deepcopy(
         array: np.ndarray,
         memo: tp.Optional[tp.Dict[int, tp.Any]],
@@ -181,3 +185,17 @@ def array_deepcopy(
     if memo is not None:
         memo[ident] = post
     return post
+
+
+def isna_element(value: tp.Any) -> bool:
+    '''Return Boolean if value is an NA. This does not yet handle pd.NA
+    '''
+    try:
+        return np.isnan(value) #type: ignore
+    except TypeError:
+        pass
+
+    if isinstance(value, (np.datetime64, np.timedelta64)):
+        return np.isnat(value) #type: ignore
+
+    return value is None
diff --git a/setup.py b/setup.py
@@ -1,5 +1,6 @@
 from setuptools import Extension  # type: ignore
 from setuptools import setup
+from numpy.distutils.misc_util import get_info
 import numpy as np  # type: ignore
 
 
@@ -14,6 +15,17 @@ def get_long_description() -> str:
 Packages: https://pypi.org/project/arraykit
 '''
 
+additional_info = get_info('npymath') # We need this for various numpy C math APIs to work
+
+# Update the dictionary to include configuration we want.
+additional_info['include_dirs'] = [np.get_include()] + additional_info['include_dirs']
+additional_info['define_macros'] = [("AK_VERSION", AK_VERSION)] + additional_info['define_macros']
+
+ak_extension = Extension(
+        name='arraykit._arraykit', # build into module
+        sources=['src/_arraykit.c'],
+        **additional_info,
+)
 
 setup(
     name='arraykit',
@@ -43,12 +55,5 @@ def get_long_description() -> str:
     package_dir={'arraykit': 'src'},
     package_data={'arraykit': ['__init__.pyi', 'py.typed']},
     include_package_data=True,
-    ext_modules=[
-        Extension(
-            name='arraykit._arraykit', # build into module
-            sources=['src/_arraykit.c'],
-            include_dirs=[np.get_include()],
-            define_macros=[('AK_VERSION', AK_VERSION)],
-        ),
-    ],
+    ext_modules=[ak_extension],
 )
diff --git a/src/__init__.pyi b/src/__init__.pyi
@@ -30,3 +30,4 @@ def row_1d_filter(__array: np.array) -> np.ndarray: ...
 def array_deepcopy(__array: np.array, memo: tp.Dict[int, tp.Any]) -> np.ndarray: ...
 def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ...
 def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ...
+def isna_element(__value: tp.Any) -> bool: ...
diff --git a/src/_arraykit.c b/src/_arraykit.c
@@ -5,6 +5,8 @@
 # define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 
 # include "numpy/arrayobject.h"
+# include "numpy/arrayscalars.h"
+# include "numpy/halffloat.h"
 
 //------------------------------------------------------------------------------
 // Macros
@@ -360,6 +362,69 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg)
     return (PyObject *)AK_ResolveDTypeIter(arg);
 }
 
+//------------------------------------------------------------------------------
+// general utility
+
+static PyObject *
+isna_element(PyObject *Py_UNUSED(m), PyObject *arg)
+{
+    // None
+    if (arg == Py_None) {
+        Py_RETURN_TRUE;
+    }
+
+    // NaN
+    if (PyFloat_Check(arg)) {
+        return PyBool_FromLong(isnan(PyFloat_AS_DOUBLE(arg)));
+    }
+    if (PyArray_IsScalar(arg, Half)) {
+        return PyBool_FromLong(npy_half_isnan(PyArrayScalar_VAL(arg, Half)));
+    }
+    if (PyArray_IsScalar(arg, Float32)) {
+        return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float32)));
+    }
+    if (PyArray_IsScalar(arg, Float64)) {
+        return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float64)));
+    }
+    # ifdef PyFloat128ArrType_Type
+    if (PyArray_IsScalar(arg, Float128)) {
+        return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float128)));
+    }
+    # endif
+
+    // Complex NaN
+    if (PyComplex_Check(arg)) {
+        Py_complex val = ((PyComplexObject*)arg)->cval;
+        return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
+    }
+    if (PyArray_IsScalar(arg, Complex64)) {
+        npy_cfloat val = PyArrayScalar_VAL(arg, Complex64);
+        return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
+    }
+    if (PyArray_IsScalar(arg, Complex128)) {
+        npy_cdouble val = PyArrayScalar_VAL(arg, Complex128);
+        return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
+    }
+    # ifdef PyComplex256ArrType_Type
+    if (PyArray_IsScalar(arg, Complex256)) {
+        npy_clongdouble val = PyArrayScalar_VAL(arg, Complex256);
+        return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
+    }
+    # endif
+
+    // NaT - Datetime
+    if (PyArray_IsScalar(arg, Datetime)) {
+        return PyBool_FromLong(PyArrayScalar_VAL(arg, Datetime) == NPY_DATETIME_NAT);
+    }
+
+    // NaT - Timedelta
+    if (PyArray_IsScalar(arg, Timedelta)) {
+        return PyBool_FromLong(PyArrayScalar_VAL(arg, Timedelta) == NPY_DATETIME_NAT);
+    }
+
+    Py_RETURN_FALSE;
+}
+
 //------------------------------------------------------------------------------
 // ArrayGO
 //------------------------------------------------------------------------------
@@ -640,6 +705,7 @@ static PyMethodDef arraykit_methods[] =  {
             NULL},
     {"resolve_dtype", resolve_dtype, METH_VARARGS, NULL},
     {"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL},
+    {"isna_element", isna_element, METH_O, NULL},
     {NULL},
 };
 
diff --git a/test/test_util.py b/test/test_util.py

-Original file line number
+Diff line change
 .vscode
 __pycache__
 build
 +*.diff
 +*.orig