Skip to content

Commit 6968560

Browse files
committed
code cleanup
1 parent 55c7984 commit 6968560

File tree

2 files changed

+37
-73
lines changed

2 files changed

+37
-73
lines changed

doc/articles/nonzero_1d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __call__(self):
4848

4949

5050
#-------------------------------------------------------------------------------
51-
NUMBER = 200
51+
NUMBER = 400
5252

5353
def seconds_to_display(seconds: float) -> str:
5454
seconds /= NUMBER

src/_arraykit.c

Lines changed: 36 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -3546,18 +3546,6 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) {
35463546
indices[count++] = p - p_start; \
35473547
} \
35483548

3549-
#define NONZERO_APPEND_OFFSET(offset) do { \
3550-
if (AK_UNLIKELY(count == capacity)) { \
3551-
capacity <<= 1; \
3552-
indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\
3553-
if (indices == NULL) { \
3554-
return NULL; \
3555-
} \
3556-
} \
3557-
indices[count++] = p + offset - p_start; \
3558-
} while(0) \
3559-
3560-
35613549
// Given a Boolean, contiguous 1D array, return the index positions in an int64 array.
35623550
static inline PyObject*
35633551
AK_nonzero_1d(PyArrayObject* array) {
@@ -3575,8 +3563,7 @@ AK_nonzero_1d(PyArrayObject* array) {
35753563

35763564
Py_ssize_t count = 0;
35773565
// the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size
3578-
Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 2;
3579-
3566+
Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8;
35803567
npy_int64* indices = (npy_int64*)malloc(sizeof(npy_int64) * capacity);
35813568

35823569
// array is contiguous, 1d, boolean
@@ -3585,9 +3572,10 @@ AK_nonzero_1d(PyArrayObject* array) {
35853572
npy_bool* p_end = p + count_max;
35863573
npy_bool* p_end_roll = p_end - size_div.rem;
35873574

3588-
3575+
NPY_BEGIN_THREADS_DEF;
3576+
NPY_BEGIN_THREADS;
35893577
// Through experimentation it has been verified that doing full-size allocation of memory does not permit outperforming NumPy at 10_000_000 scale; but doing less optimizations does help.
3590-
// Doing esoteric things with bit masks does not generally improve perforamnce.
3578+
// Using bit masks does not improve perforamnce over pointer arithmetic.
35913579
// Prescanning for all empty is very effective.
35923580

35933581
while (p < p_end_roll) {
@@ -3616,6 +3604,7 @@ AK_nonzero_1d(PyArrayObject* array) {
36163604
if (*p) {NONZERO_APPEND_INDEX;}
36173605
p++;
36183606
}
3607+
NPY_END_THREADS;
36193608

36203609
// npy_uint64 roll;
36213610
// while (p < p_end_roll) {
@@ -3694,7 +3683,6 @@ AK_nonzero_1d(PyArrayObject* array) {
36943683
PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
36953684
return final;
36963685
}
3697-
36983686
#undef NONZERO_APPEND_INDEX
36993687

37003688
static PyObject*
@@ -3717,7 +3705,6 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
37173705
}
37183706

37193707

3720-
37213708
static char *first_true_1d_kwarg_names[] = {
37223709
"array",
37233710
"forward",
@@ -5862,28 +5849,6 @@ static PyTypeObject BlockIndexType = {
58625849
// TriMap
58635850
//------------------------------------------------------------------------------
58645851

5865-
// NOTE: slice selection and assignment is much faster than array selection
5866-
// >>> a1 = np.arange(100_000)
5867-
// >>> slc = slice(50_000, 60_000)
5868-
// >>> alc = np.arange(50_000, 60_000)
5869-
// >>> %timeit a1[slc]
5870-
// 45.6 ns ± 0.133 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
5871-
// >>> %timeit a1[alc]
5872-
// 4.98 µs ± 12.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
5873-
// >>> %timeit a1[slc] = alc
5874-
// 873 ns ± 3.33 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
5875-
// >>> %timeit a1[alc] = alc
5876-
// 6.3 µs ± 25.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
5877-
5878-
// -- array of integers
5879-
// self._src_many_from: tp.List[int] = []
5880-
// -- normal lists of objects?
5881-
// self._src_many_to: tp.List[slice] = [] // could be int-pairs
5882-
5883-
// self._dst_many_from: tp.List[TNDArrayInt] = []
5884-
// self._dst_many_to: tp.List[slice] = [] // could be int-pairs
5885-
5886-
58875852
typedef struct TriMapOne {
58885853
Py_ssize_t from; // signed
58895854
Py_ssize_t to;
@@ -5899,7 +5864,6 @@ typedef struct TriMapManyFrom {
58995864
PyArrayObject* dst;
59005865
} TriMapManyFrom;
59015866

5902-
59035867
typedef struct TriMapObject {
59045868
PyObject_HEAD
59055869
Py_ssize_t src_len;
@@ -6285,8 +6249,10 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
62856249
goto error;
62866250
}
62876251

6288-
npy_bool* final_src_match_data = (npy_bool*)PyArray_DATA((PyArrayObject*)final_src_match);
6289-
npy_bool* final_dst_match_data = (npy_bool*)PyArray_DATA((PyArrayObject*)final_dst_match);
6252+
npy_bool* final_src_match_data = (npy_bool*)PyArray_DATA(
6253+
(PyArrayObject*)final_src_match);
6254+
npy_bool* final_dst_match_data = (npy_bool*)PyArray_DATA(
6255+
(PyArrayObject*)final_dst_match);
62906256

62916257
TriMapOne* o;
62926258
TriMapOne* o_end;
@@ -6300,7 +6266,6 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
63006266
for (; o < o_end; o++) {
63016267
final_dst_match_data[o->to] = NPY_TRUE;
63026268
}
6303-
63046269
// many assign from src and dst into the same final positions
63056270
npy_bool* s;
63066271
npy_bool* d;
@@ -6333,7 +6298,6 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
63336298
if (final_dst_unmatched == NULL) {
63346299
goto error;
63356300
}
6336-
63376301
tm->final_src_fill = AK_nonzero_1d((PyArrayObject*)final_src_unmatched);
63386302
if (tm->final_src_fill == NULL) {
63396303
goto error;
@@ -6342,7 +6306,6 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
63426306
if (tm->final_dst_fill == NULL) {
63436307
goto error;
63446308
}
6345-
63466309
Py_DECREF(final_src_match);
63476310
Py_DECREF(final_dst_match);
63486311
Py_DECREF(final_src_unmatched);
@@ -6358,7 +6321,6 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
63586321
return NULL;
63596322
}
63606323

6361-
63626324
static PyObject*
63636325
TriMap_is_many(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
63646326
if (!self->finalized) {
@@ -6482,7 +6444,6 @@ TriMap_dst_no_fill(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
64826444
} \
64836445
} \
64846446

6485-
64866447
// Based on `tm` state, transfer from src or from dst (depending on `from_src`) to a `array_to`, a newly created contiguous array that is compatible with the values in `array_from`. Returns -1 on error. This only needs to match to / from type combinations that are possible from `resolve_dtype`, i.e., bool never goes to integer.
64876448
static inline int
64886449
AK_TM_transfer(TriMapObject* tm,
@@ -6686,8 +6647,8 @@ AK_TM_transfer(TriMapObject* tm,
66866647
return 0;
66876648
}
66886649
}
6689-
AK_DEBUG_MSG_OBJ("array_to", (PyObject*)array_to);
6690-
AK_DEBUG_MSG_OBJ("array_from", (PyObject*)array_from);
6650+
// AK_DEBUG_MSG_OBJ("array_to", (PyObject*)array_to);
6651+
// AK_DEBUG_MSG_OBJ("array_from", (PyObject*)array_from);
66916652
PyErr_SetString(PyExc_TypeError, "No handling for types");
66926653
return -1;
66936654
}
@@ -6793,6 +6754,30 @@ AK_TM_fill_object(TriMapObject* tm,
67936754
return 0;
67946755
}
67956756

6757+
// TODO: AK_TM_fill_flexible
6758+
// this manually inserts string
6759+
// if (t_is_flexible) {
6760+
// // insert fill values
6761+
// Py_UCS4* t = (Py_UCS4*)PyArray_DATA(array_to);
6762+
// npy_intp t_cp = PyArray_DESCR(array_to)->elsize / UCS4_SIZE;
6763+
// Py_ssize_t len = PyUnicode_GET_LENGTH(fill_value) * UCS4_SIZE; // code points
6764+
// Py_ssize_t count = from_src ? tm->src_len : tm->dst_len;
6765+
// // NOTE: matches do not tell where a fill is needed
6766+
// npy_bool* d = from_src ? tm->src_match_data : tm->dst_match_data;
6767+
// npy_bool* d_end = d + count;
6768+
// while (d < d_end) {
6769+
// if (*d == NPY_FALSE) {
6770+
// if (PyUnicode_AsUCS4(fill_value, t, len, 0) == NULL) {
6771+
// Py_DECREF((PyObject*)array_to);
6772+
// return NULL;
6773+
// }
6774+
// }
6775+
// t += t_cp;
6776+
// d++;
6777+
// }
6778+
// }
6779+
6780+
67966781
// Returns NULL on error.
67976782
static inline PyObject*
67986783
AK_TM_map_no_fill(TriMapObject* tm,
@@ -6920,6 +6905,7 @@ AK_TM_map_fill(TriMapObject* tm,
69206905
return NULL;
69216906
}
69226907
}
6908+
// TODO: add special hanldig for unicode/bytes
69236909
else {
69246910
// Most simple is to fill with scalar, then overwrite values as needed; for object and flexible dtypes this is not efficient; for object dtypes, this obbligates us to decref the filled value when assigning
69256911
if (PyArray_FillWithScalar(array_to, fill_value)) { // -1 on error
@@ -6939,28 +6925,6 @@ AK_TM_map_fill(TriMapObject* tm,
69396925
return (PyObject*)array_to;
69406926
}
69416927

6942-
// this manually inserts string
6943-
// if (t_is_flexible) {
6944-
// // insert fill values
6945-
// Py_UCS4* t = (Py_UCS4*)PyArray_DATA(array_to);
6946-
// npy_intp t_cp = PyArray_DESCR(array_to)->elsize / UCS4_SIZE;
6947-
// Py_ssize_t len = PyUnicode_GET_LENGTH(fill_value) * UCS4_SIZE; // code points
6948-
// Py_ssize_t count = from_src ? tm->src_len : tm->dst_len;
6949-
// // NOTE: matches do not tell where a fill is needed
6950-
// npy_bool* d = from_src ? tm->src_match_data : tm->dst_match_data;
6951-
// npy_bool* d_end = d + count;
6952-
// while (d < d_end) {
6953-
// if (*d == NPY_FALSE) {
6954-
// if (PyUnicode_AsUCS4(fill_value, t, len, 0) == NULL) {
6955-
// Py_DECREF((PyObject*)array_to);
6956-
// return NULL;
6957-
// }
6958-
// }
6959-
// t += t_cp;
6960-
// d++;
6961-
// }
6962-
// }
6963-
69646928
static PyObject*
69656929
TriMap_map_src_fill(TriMapObject *self, PyObject *args) {
69666930
PyArrayObject* array_from;

0 commit comments

Comments
 (0)