@@ -692,39 +692,47 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
692
692
693
693
npy_int64 num_found = 0 ;
694
694
695
- NpyIter * iter ;
696
- NpyIter_IterNextFunc * iternext ;
697
- char * * dataptr ;
698
- npy_intp * strideptr ,* innersizeptr ;
699
- npy_int64 element ;
695
+ // Now, implement the core algorithm by looping over the ``indexers``.
696
+ // We need to use numpy's iteration API, as the ``indexers`` could be
697
+ // C-contiguous, F-contiguous, both, or neither.
698
+ // See https://numpy.org/doc/stable/reference/c-api/iterator.html#simple-iteration-example
700
699
701
- iter = NpyIter_New (
700
+ NpyIter * indexer_iter = NpyIter_New (
702
701
indexers ,
703
- NPY_ITER_READONLY | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK ,
702
+ NPY_ITER_READONLY | NPY_ITER_EXTERNAL_LOOP ,
704
703
NPY_KEEPORDER ,
705
704
NPY_NO_CASTING ,
706
705
NULL
707
706
);
708
- if (iter == NULL ) {
709
- return -1 ;
707
+ if (indexer_iter == NULL ) {
708
+ Py_DECREF (element_locations );
709
+ Py_DECREF (new_indexers );
710
+ return NULL ;
710
711
}
711
712
712
- iternext = NpyIter_GetIterNext (iter , NULL );
713
- if (iternext == NULL ) {
714
- NpyIter_Deallocate (iter );
715
- return -1 ;
713
+ // The iternext function gets stored in a local variable so it can be called repeatedly in an efficient manner.
714
+ NpyIter_IterNextFunc * indexer_iternext = NpyIter_GetIterNext (indexer_iter , NULL );
715
+ if (indexer_iternext == NULL ) {
716
+ NpyIter_Deallocate (indexer_iter );
717
+ Py_DECREF (element_locations );
718
+ Py_DECREF (new_indexers );
719
+ return NULL ;
716
720
}
717
- dataptr = NpyIter_GetDataPtrArray (iter );
718
- strideptr = NpyIter_GetInnerStrideArray (iter );
719
- innersizeptr = NpyIter_GetInnerLoopSizePtr (iter );
721
+
722
+ // All of these will be updated by the iterator
723
+ char * * dataptr = NpyIter_GetDataPtrArray (indexer_iter );
724
+ npy_intp * strideptr = NpyIter_GetInnerStrideArray (indexer_iter );
725
+ npy_intp * innersizeptr = NpyIter_GetInnerLoopSizePtr (indexer_iter );
720
726
721
727
size_t i = 0 ;
722
728
do {
729
+ // Get the inner loop data/stride/inner_size values
723
730
char * data = * dataptr ;
724
731
npy_intp stride = * strideptr ;
725
- npy_intp count = * innersizeptr ;
732
+ npy_intp inner_size = * innersizeptr ;
733
+ npy_int64 element ;
726
734
727
- while (count -- ) {
735
+ while (inner_size -- ) {
728
736
memcpy (& element , data , sizeof (long ));
729
737
730
738
if (element_location_values [element ] == num_unique )
@@ -738,7 +746,7 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
738
746
// If we have found every possible indexer, we can simply return
739
747
// back the inputs! Essentially, we can observe on <= single pass
740
748
// that we have the opportunity for re-use
741
- NpyIter_Deallocate (iter );
749
+ NpyIter_Deallocate (indexer_iter );
742
750
Py_DECREF (element_locations );
743
751
Py_DECREF (new_indexers );
744
752
return PyTuple_Pack (2 , indexers , positions );
@@ -751,9 +759,10 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
751
759
++ i ;
752
760
}
753
761
754
- } while (iternext (iter ));
762
+ // Increment the iterator to the next inner loop
763
+ } while (indexer_iternext (indexer_iter ));
755
764
756
- NpyIter_Deallocate (iter );
765
+ NpyIter_Deallocate (indexer_iter );
757
766
758
767
PyObject * result = PyTuple_Pack (2 , new_indexers , element_locations );
759
768
Py_DECREF (element_locations );
0 commit comments