@@ -3546,18 +3546,6 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) {
3546
3546
indices[count++] = p - p_start; \
3547
3547
} \
3548
3548
3549
- #define NONZERO_APPEND_OFFSET (offset ) do { \
3550
- if (AK_UNLIKELY(count == capacity)) { \
3551
- capacity <<= 1; \
3552
- indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\
3553
- if (indices == NULL) { \
3554
- return NULL; \
3555
- } \
3556
- } \
3557
- indices[count++] = p + offset - p_start; \
3558
- } while(0) \
3559
-
3560
-
3561
3549
// Given a Boolean, contiguous 1D array, return the index positions in an int64 array.
3562
3550
static inline PyObject *
3563
3551
AK_nonzero_1d (PyArrayObject * array ) {
@@ -3575,8 +3563,7 @@ AK_nonzero_1d(PyArrayObject* array) {
3575
3563
3576
3564
Py_ssize_t count = 0 ;
3577
3565
// the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size
3578
- Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 2 ;
3579
-
3566
+ Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8 ;
3580
3567
npy_int64 * indices = (npy_int64 * )malloc (sizeof (npy_int64 ) * capacity );
3581
3568
3582
3569
// array is contiguous, 1d, boolean
@@ -3585,9 +3572,10 @@ AK_nonzero_1d(PyArrayObject* array) {
3585
3572
npy_bool * p_end = p + count_max ;
3586
3573
npy_bool * p_end_roll = p_end - size_div .rem ;
3587
3574
3588
-
3575
+ NPY_BEGIN_THREADS_DEF ;
3576
+ NPY_BEGIN_THREADS ;
3589
3577
// Through experimentation it has been verified that doing full-size allocation of memory does not permit outperforming NumPy at 10_000_000 scale; but doing less optimizations does help.
3590
- // Doing esoteric things with bit masks does not generally improve perforamnce.
3578
+ // Using bit masks does not improve perforamnce over pointer arithmetic .
3591
3579
// Prescanning for all empty is very effective.
3592
3580
3593
3581
while (p < p_end_roll ) {
@@ -3616,6 +3604,7 @@ AK_nonzero_1d(PyArrayObject* array) {
3616
3604
if (* p ) {NONZERO_APPEND_INDEX ;}
3617
3605
p ++ ;
3618
3606
}
3607
+ NPY_END_THREADS ;
3619
3608
3620
3609
// npy_uint64 roll;
3621
3610
// while (p < p_end_roll) {
@@ -3694,7 +3683,6 @@ AK_nonzero_1d(PyArrayObject* array) {
3694
3683
PyArray_CLEARFLAGS ((PyArrayObject * )final , NPY_ARRAY_WRITEABLE );
3695
3684
return final ;
3696
3685
}
3697
-
3698
3686
#undef NONZERO_APPEND_INDEX
3699
3687
3700
3688
static PyObject *
@@ -3717,7 +3705,6 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
3717
3705
}
3718
3706
3719
3707
3720
-
3721
3708
static char * first_true_1d_kwarg_names [] = {
3722
3709
"array" ,
3723
3710
"forward" ,
@@ -5862,28 +5849,6 @@ static PyTypeObject BlockIndexType = {
5862
5849
// TriMap
5863
5850
//------------------------------------------------------------------------------
5864
5851
5865
- // NOTE: slice selection and assignment is much faster than array selection
5866
- // >>> a1 = np.arange(100_000)
5867
- // >>> slc = slice(50_000, 60_000)
5868
- // >>> alc = np.arange(50_000, 60_000)
5869
- // >>> %timeit a1[slc]
5870
- // 45.6 ns ± 0.133 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
5871
- // >>> %timeit a1[alc]
5872
- // 4.98 µs ± 12.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
5873
- // >>> %timeit a1[slc] = alc
5874
- // 873 ns ± 3.33 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
5875
- // >>> %timeit a1[alc] = alc
5876
- // 6.3 µs ± 25.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
5877
-
5878
- // -- array of integers
5879
- // self._src_many_from: tp.List[int] = []
5880
- // -- normal lists of objects?
5881
- // self._src_many_to: tp.List[slice] = [] // could be int-pairs
5882
-
5883
- // self._dst_many_from: tp.List[TNDArrayInt] = []
5884
- // self._dst_many_to: tp.List[slice] = [] // could be int-pairs
5885
-
5886
-
5887
5852
typedef struct TriMapOne {
5888
5853
Py_ssize_t from ; // signed
5889
5854
Py_ssize_t to ;
@@ -5899,7 +5864,6 @@ typedef struct TriMapManyFrom {
5899
5864
PyArrayObject * dst ;
5900
5865
} TriMapManyFrom ;
5901
5866
5902
-
5903
5867
typedef struct TriMapObject {
5904
5868
PyObject_HEAD
5905
5869
Py_ssize_t src_len ;
@@ -6285,8 +6249,10 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
6285
6249
goto error ;
6286
6250
}
6287
6251
6288
- npy_bool * final_src_match_data = (npy_bool * )PyArray_DATA ((PyArrayObject * )final_src_match );
6289
- npy_bool * final_dst_match_data = (npy_bool * )PyArray_DATA ((PyArrayObject * )final_dst_match );
6252
+ npy_bool * final_src_match_data = (npy_bool * )PyArray_DATA (
6253
+ (PyArrayObject * )final_src_match );
6254
+ npy_bool * final_dst_match_data = (npy_bool * )PyArray_DATA (
6255
+ (PyArrayObject * )final_dst_match );
6290
6256
6291
6257
TriMapOne * o ;
6292
6258
TriMapOne * o_end ;
@@ -6300,7 +6266,6 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
6300
6266
for (; o < o_end ; o ++ ) {
6301
6267
final_dst_match_data [o -> to ] = NPY_TRUE ;
6302
6268
}
6303
-
6304
6269
// many assign from src and dst into the same final positions
6305
6270
npy_bool * s ;
6306
6271
npy_bool * d ;
@@ -6333,7 +6298,6 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
6333
6298
if (final_dst_unmatched == NULL ) {
6334
6299
goto error ;
6335
6300
}
6336
-
6337
6301
tm -> final_src_fill = AK_nonzero_1d ((PyArrayObject * )final_src_unmatched );
6338
6302
if (tm -> final_src_fill == NULL ) {
6339
6303
goto error ;
@@ -6342,7 +6306,6 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
6342
6306
if (tm -> final_dst_fill == NULL ) {
6343
6307
goto error ;
6344
6308
}
6345
-
6346
6309
Py_DECREF (final_src_match );
6347
6310
Py_DECREF (final_dst_match );
6348
6311
Py_DECREF (final_src_unmatched );
@@ -6358,7 +6321,6 @@ TriMap_finalize(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
6358
6321
return NULL ;
6359
6322
}
6360
6323
6361
-
6362
6324
static PyObject *
6363
6325
TriMap_is_many (TriMapObject * self , PyObject * Py_UNUSED (unused )) {
6364
6326
if (!self -> finalized ) {
@@ -6482,7 +6444,6 @@ TriMap_dst_no_fill(TriMapObject *self, PyObject *Py_UNUSED(unused)) {
6482
6444
} \
6483
6445
} \
6484
6446
6485
-
6486
6447
// Based on `tm` state, transfer from src or from dst (depending on `from_src`) to a `array_to`, a newly created contiguous array that is compatible with the values in `array_from`. Returns -1 on error. This only needs to match to / from type combinations that are possible from `resolve_dtype`, i.e., bool never goes to integer.
6487
6448
static inline int
6488
6449
AK_TM_transfer (TriMapObject * tm ,
@@ -6686,8 +6647,8 @@ AK_TM_transfer(TriMapObject* tm,
6686
6647
return 0 ;
6687
6648
}
6688
6649
}
6689
- AK_DEBUG_MSG_OBJ ("array_to" , (PyObject * )array_to );
6690
- AK_DEBUG_MSG_OBJ ("array_from" , (PyObject * )array_from );
6650
+ // AK_DEBUG_MSG_OBJ("array_to", (PyObject*)array_to);
6651
+ // AK_DEBUG_MSG_OBJ("array_from", (PyObject*)array_from);
6691
6652
PyErr_SetString (PyExc_TypeError , "No handling for types" );
6692
6653
return -1 ;
6693
6654
}
@@ -6793,6 +6754,30 @@ AK_TM_fill_object(TriMapObject* tm,
6793
6754
return 0 ;
6794
6755
}
6795
6756
6757
+ // TODO: AK_TM_fill_flexible
6758
+ // this manually inserts string
6759
+ // if (t_is_flexible) {
6760
+ // // insert fill values
6761
+ // Py_UCS4* t = (Py_UCS4*)PyArray_DATA(array_to);
6762
+ // npy_intp t_cp = PyArray_DESCR(array_to)->elsize / UCS4_SIZE;
6763
+ // Py_ssize_t len = PyUnicode_GET_LENGTH(fill_value) * UCS4_SIZE; // code points
6764
+ // Py_ssize_t count = from_src ? tm->src_len : tm->dst_len;
6765
+ // // NOTE: matches do not tell where a fill is needed
6766
+ // npy_bool* d = from_src ? tm->src_match_data : tm->dst_match_data;
6767
+ // npy_bool* d_end = d + count;
6768
+ // while (d < d_end) {
6769
+ // if (*d == NPY_FALSE) {
6770
+ // if (PyUnicode_AsUCS4(fill_value, t, len, 0) == NULL) {
6771
+ // Py_DECREF((PyObject*)array_to);
6772
+ // return NULL;
6773
+ // }
6774
+ // }
6775
+ // t += t_cp;
6776
+ // d++;
6777
+ // }
6778
+ // }
6779
+
6780
+
6796
6781
// Returns NULL on error.
6797
6782
static inline PyObject *
6798
6783
AK_TM_map_no_fill (TriMapObject * tm ,
@@ -6920,6 +6905,7 @@ AK_TM_map_fill(TriMapObject* tm,
6920
6905
return NULL ;
6921
6906
}
6922
6907
}
6908
+ // TODO: add special hanldig for unicode/bytes
6923
6909
else {
6924
6910
// Most simple is to fill with scalar, then overwrite values as needed; for object and flexible dtypes this is not efficient; for object dtypes, this obbligates us to decref the filled value when assigning
6925
6911
if (PyArray_FillWithScalar (array_to , fill_value )) { // -1 on error
@@ -6939,28 +6925,6 @@ AK_TM_map_fill(TriMapObject* tm,
6939
6925
return (PyObject * )array_to ;
6940
6926
}
6941
6927
6942
- // this manually inserts string
6943
- // if (t_is_flexible) {
6944
- // // insert fill values
6945
- // Py_UCS4* t = (Py_UCS4*)PyArray_DATA(array_to);
6946
- // npy_intp t_cp = PyArray_DESCR(array_to)->elsize / UCS4_SIZE;
6947
- // Py_ssize_t len = PyUnicode_GET_LENGTH(fill_value) * UCS4_SIZE; // code points
6948
- // Py_ssize_t count = from_src ? tm->src_len : tm->dst_len;
6949
- // // NOTE: matches do not tell where a fill is needed
6950
- // npy_bool* d = from_src ? tm->src_match_data : tm->dst_match_data;
6951
- // npy_bool* d_end = d + count;
6952
- // while (d < d_end) {
6953
- // if (*d == NPY_FALSE) {
6954
- // if (PyUnicode_AsUCS4(fill_value, t, len, 0) == NULL) {
6955
- // Py_DECREF((PyObject*)array_to);
6956
- // return NULL;
6957
- // }
6958
- // }
6959
- // t += t_cp;
6960
- // d++;
6961
- // }
6962
- // }
6963
-
6964
6928
static PyObject *
6965
6929
TriMap_map_src_fill (TriMapObject * self , PyObject * args ) {
6966
6930
PyArrayObject * array_from ;
0 commit comments