You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
lldiv_tsize_div=lldiv((long long)count_max, 8); // quot, rem
3575
3575
3576
3576
Py_ssize_tcount=0;
3577
-
// the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large c
3577
+
// the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size
// p += 8; // no true within this 8 byte roll region
3591
-
// continue;
3592
-
// }
3593
-
// if (*p) {indices[count++] = p - p_start;}
3594
-
// p++;
3595
-
// if (*p) {indices[count++] = p - p_start;}
3596
-
// p++;
3597
-
// if (*p) {indices[count++] = p - p_start;}
3598
-
// p++;
3599
-
// if (*p) {indices[count++] = p - p_start;}
3600
-
// p++;
3601
-
// if (*p) {indices[count++] = p - p_start;}
3602
-
// p++;
3603
-
// if (*p) {indices[count++] = p - p_start;}
3604
-
// p++;
3605
-
// if (*p) {indices[count++] = p - p_start;}
3606
-
// p++;
3607
-
// if (*p) {indices[count++] = p - p_start;}
3608
-
// p++;
3609
-
// }
3610
-
// // at most three more indices remain
3611
-
// while (p < p_end) {
3612
-
// if (*p) {indices[count++] = p - p_start;}
3613
-
// p++;
3614
-
// }
3615
3588
3616
-
// while (p < p_end_roll) {
3617
-
// npy_uint64 roll = *(npy_uint64*)p;
3618
-
// if (roll == 0) {
3619
-
// p += 8; // no true within this 8 byte roll region
3620
-
// continue;
3621
-
// }
3622
-
// if (roll >> 56 & 0xFF) {NONZERO_APPEND_OFFSET(0);}
3623
-
// if (roll >> 48 & 0xFF) {NONZERO_APPEND_OFFSET(1);}
3624
-
// if (roll >> 40 & 0xFF) {NONZERO_APPEND_OFFSET(2);}
3625
-
// if (roll >> 32 & 0xFF) {NONZERO_APPEND_OFFSET(3);}
3626
-
// if (roll >> 24 & 0xFF) {NONZERO_APPEND_OFFSET(4);}
3627
-
// if (roll >> 16 & 0xFF) {NONZERO_APPEND_OFFSET(5);}
3628
-
// if (roll >> 8 & 0xFF) {NONZERO_APPEND_OFFSET(6);}
3629
-
// if (roll >> 0 & 0xFF) {NONZERO_APPEND_OFFSET(7);}
3630
-
// while (p < p_end) {
3631
-
// if (*p) {NONZERO_APPEND_OFFSET(0);}
3632
-
// p++;
3633
-
// }
3589
+
// Through experimentation it has been verified that doing full-size allocation of memory does not permit outperforming NumPy at 10_000_000 scale; but doing less optimizations does help.
3590
+
// Doing esoteric things with bit masks does not generally improve perforamnce.
0 commit comments