Skip to content

Commit 49742b3

Browse files
committed
Shuffles order around a bit. Replaces memcpy with cast.
1 parent a94ae8d commit 49742b3

File tree

3 files changed

+42
-42
lines changed

3 files changed

+42
-42
lines changed

performance/reference/util.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,13 @@ def get_new_indexers_and_screen_ak(
244244
) -> tp.Tuple[np.ndarray, np.ndarray]:
245245
from arraykit import get_new_indexers_and_screen as ak_routine
246246

247+
if len(positions) > len(indexers):
248+
return np.unique(indexers, return_inverse=True)
249+
250+
# Will return same *objects* back if it was able to finish early.
247251
new_indexers, index_screen = ak_routine(indexers, positions)
248252
if new_indexers is indexers and index_screen is positions:
249-
return indexers, positions
253+
return positions, indexers
250254

251255
# Use a more helpful alias!
252256
element_locations = index_screen
@@ -257,4 +261,4 @@ def get_new_indexers_and_screen_ak(
257261
order_found = np.argsort(found_element_locations)
258262

259263
found_positions = positions[found_mask]
260-
return new_indexers, found_positions[order_found]
264+
return found_positions[order_found], new_indexers

src/_arraykit.c

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -574,37 +574,32 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
574574
575575
Equivalent Python code:
576576
577-
num_unique = len(positions) # 1
578-
element_locations = np.full(num_unique, num_unique, dtype=np.int64) # 2
579-
new_indexers = np.empty(len(indexers), dtype=np.int64) # 3
577+
num_unique = len(positions)
578+
element_locations = np.full(num_unique, num_unique, dtype=np.int64)
579+
new_indexers = np.empty(len(indexers), dtype=np.int64)
580580
581-
num_found = 0 # 4
581+
num_found = 0
582582
583-
for i, element in enumerate(indexers): # 5
584-
if element_locations[element] == num_unique: # 6
585-
element_locations[element] = num_found # 7
586-
num_found += 1 # 8
583+
for i, element in enumerate(indexers):
584+
if element_locations[element] == num_unique:
585+
element_locations[element] = num_found
586+
num_found += 1
587587
588-
if num_found == num_unique: # 9
589-
return indexers, positions # 10
588+
if num_found == num_unique:
589+
return positions, indexers
590590
591-
new_indexers[i] = element_locations[element] # 11
591+
new_indexers[i] = element_locations[element]
592592
593-
return new_indexers, element_locations
594-
# This return values will be used in CPython like this
595-
596-
# The rest of this will be implemented in Python since there is no
597-
# benefit to using Cpython for this.
593+
return element_locations, new_indexers
594+
# ...
595+
# NOTE: These return values will be used in a Python wrapper like this:
598596
found_mask = element_locations != num_unique
599597
600598
found_element_locations = element_locations[found_mask]
601599
order_found = np.argsort(found_element_locations)
602600
603601
found_positions = positions[found_mask]
604-
return found_positions[order_found]
605-
606-
index_screen = get_index_screen(element_locations, num_unique)
607-
return new_indexers, index_screen
602+
return found_positions[order_found], new_indexers
608603
*/
609604
PyArrayObject *indexers;
610605
PyArrayObject *positions;
@@ -615,7 +610,7 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
615610
&PyArray_Type, &indexers,
616611
&PyArray_Type, &positions
617612
))
618-
{
613+
{
619614
return NULL;
620615
}
621616

@@ -634,7 +629,7 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
634629
if (num_unique > PyArray_SIZE(indexers)) {
635630
// This algorithm is only optimal if the number of unique elements is
636631
// less than the number of elements in the indexers.
637-
// Otherwise, the most optimal code is ``np.unique(indexers, return_index=True)[1]``
632+
// Otherwise, the most optimal code is ``np.unique(indexers, return_index=True)``
638633
// and we don't want to re-implement that in C.
639634
PyErr_SetString(
640635
PyExc_ValueError,
@@ -691,11 +686,11 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
691686
// C-contiguous, F-contiguous, both, or neither.
692687
// See https://numpy.org/doc/stable/reference/c-api/iterator.html#simple-iteration-example
693688
NpyIter *indexer_iter = NpyIter_New(
694-
indexers,
695-
NPY_ITER_READONLY| NPY_ITER_EXTERNAL_LOOP,
696-
NPY_KEEPORDER,
697-
NPY_NO_CASTING,
698-
NULL
689+
indexers, // array
690+
NPY_ITER_READONLY | NPY_ITER_EXTERNAL_LOOP, // iter flags
691+
NPY_KEEPORDER, // order
692+
NPY_NO_CASTING, // casting
693+
NULL // dtype
699694
);
700695
if (indexer_iter == NULL) {
701696
Py_DECREF(element_locations);
@@ -727,7 +722,7 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
727722
npy_int64 element;
728723

729724
while (inner_size--) {
730-
memcpy (&element, data, sizeof (npy_int64));
725+
element = *((npy_int64 *)data);
731726

732727
if (element_location_values[element] == num_unique) {
733728
element_location_values[element] = num_found;

test/test_util.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
from arraykit import array_deepcopy
1717
from arraykit import isna_element
1818
from arraykit import dtype_from_element
19-
from performance.reference.util import get_new_indexers_and_screen_ak
19+
from performance.reference.util import get_new_indexers_and_screen_ak as get_new_indexers_and_screen_full
20+
from arraykit import get_new_indexers_and_screen
2021

2122
from performance.reference.util import mloc as mloc_ref
2223

@@ -384,50 +385,50 @@ def test_dtype_from_element_str_and_bytes_dtypes(self) -> None:
384385

385386
def test_get_new_indexers_and_screen_a(self) -> None:
386387
indexersA = np.array([9, 9, 9, 9, 0, 0, 1, 4, 5, 0, 0, 0, 1], dtype=np.int64)
387-
postA = get_new_indexers_and_screen_ak(indexersA, np.arange(10, dtype=np.int64))
388+
postA = get_new_indexers_and_screen_full(indexersA, np.arange(10, dtype=np.int64))
388389
assert indexersA.flags.c_contiguous
389390
assert indexersA.flags.f_contiguous
390391
assert tuple(map(list, postA)) == (
391-
[0, 0, 0, 0, 1, 1, 2, 3, 4, 1, 1, 1, 2],
392392
[9, 0, 1, 4, 5],
393+
[0, 0, 0, 0, 1, 1, 2, 3, 4, 1, 1, 1, 2],
393394
)
394395

395396
# Prove we can handle non-continuous arrays
396397
indexersB = np.full((len(indexersA), 3), -1, dtype=np.int64)
397398
indexersB[:,1] = indexersA.copy()
398399
assert not indexersB[:,1].flags.c_contiguous
399400
assert not indexersB[:,1].flags.f_contiguous
400-
postB = get_new_indexers_and_screen_ak(indexersB[:,1], np.arange(10, dtype=np.int64))
401+
postB = get_new_indexers_and_screen_full(indexersB[:,1], np.arange(10, dtype=np.int64))
401402
assert tuple(map(list, postA)) == tuple(map(list, postB))
402403

403404
indexersC = np.array([9, 9, 9, 9, 0, 0, 1, 4, 5, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=np.int64)
404-
postC = get_new_indexers_and_screen_ak(indexersC, positions=np.arange(15, dtype=np.int64))
405+
postC = get_new_indexers_and_screen_full(indexersC, positions=np.arange(15, dtype=np.int64))
405406
assert tuple(map(list, postC)) == (
406-
[0, 0, 0, 0, 1, 1, 2, 3, 4, 1, 1, 1, 2, 5, 6, 3, 4,7, 8, 9, 0, 10],
407407
[9, 0, 1, 4, 5, 2, 3, 6, 7, 8, 10],
408+
[0, 0, 0, 0, 1, 1, 2, 3, 4, 1, 1, 1, 2, 5, 6, 3, 4,7, 8, 9, 0, 10],
408409
)
409410

410411
indexersD = np.array([2, 1, 0, 2, 0, 1, 1, 2, 0], dtype=np.int64)
411-
postD = get_new_indexers_and_screen_ak(indexers=indexersD, positions=np.arange(3, dtype=np.int64))
412+
postD = get_new_indexers_and_screen_full(indexers=indexersD, positions=np.arange(3, dtype=np.int64))
412413
assert tuple(map(list, postD)) == (
413-
[2, 1, 0, 2, 0, 1, 1, 2, 0],
414414
[0, 1, 2],
415+
[2, 1, 0, 2, 0, 1, 1, 2, 0],
415416
)
416417

417418
def test_get_new_indexers_and_screen_b(self) -> None:
418419
indexersA = np.array([5], dtype=np.int64)
419420

420421
with self.assertRaises(ValueError):
421-
get_new_indexers_and_screen_ak(indexersA, np.arange(6, dtype=np.int64))
422+
get_new_indexers_and_screen(indexersA, np.arange(6, dtype=np.int64))
422423

423424
with self.assertRaises(ValueError):
424-
get_new_indexers_and_screen_ak(indexersA, np.arange(106, dtype=np.int64))
425+
get_new_indexers_and_screen(indexersA, np.arange(106, dtype=np.int64))
425426

426427
with self.assertRaises(ValueError):
427-
get_new_indexers_and_screen_ak(indexersA.astype(np.int32), np.arange(5))
428+
get_new_indexers_and_screen(indexersA.astype(np.int32), np.arange(5))
428429

429430
indexersB = np.arange(25, dtype=np.int64)
430-
postB = get_new_indexers_and_screen_ak(indexersB, indexersB)
431+
postB = get_new_indexers_and_screen(indexersB, indexersB)
431432
assert tuple(map(list, postB)) == (list(indexersB), list(indexersB))
432433

433434

0 commit comments

Comments
 (0)