diff --git a/.gitignore b/.gitignore index 9f3ef475..5fa5a182 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,8 @@ tests/config.h # CMake build +# clangd +.cache/clangd/ #############BEGIN VISUAL STUDIO############ diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h index daeac9de..e08b0319 100644 --- a/include/roaring/containers/array.h +++ b/include/roaring/containers/array.h @@ -466,6 +466,20 @@ inline int array_container_index_equalorlarger(const array_container_t *arr, } } +/** + * Reads values from the array container into a boolean buffer. + * + * @param ac The array container to read from + * @param it Iterator state (index into the array) + * @param buf Boolean buffer to write to + * @param max_value Stop reading when it->current_value > this value. + * @param value_out Output parameter for the next value + * @return true if there are more values to read, false otherwise + */ +bool array_container_iterator_read_into_bool( + const array_container_t *ac, struct roaring_container_iterator_s *it, + bool *buf, uint16_t max_value, uint16_t *value_out); + /* * Adds all values in range [min,max] using hint: * nvals_less is the number of array values less than $min diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h index e60518fc..9ea2e479 100644 --- a/include/roaring/containers/bitset.h +++ b/include/roaring/containers/bitset.h @@ -505,6 +505,20 @@ int bitset_container_get_index(const bitset_container_t *container, uint16_t x); int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x); +/** + * Reads values from the bitset container into a boolean buffer. + * + * @param bc The bitset container to read from + * @param it Iterator state (index into the bitset) + * @param buf Boolean buffer to write to + * @param max_value Stop reading when it->current_value > this value. + * @param value_out Output parameter for the next value + * @return true if there are more values to read, false otherwise + */ +bool bitset_container_iterator_read_into_bool( + const bitset_container_t *bc, struct roaring_container_iterator_s *it, + bool *buf, uint16_t max_value, uint16_t *value_out); + #ifdef __cplusplus } } diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h index 5f7c7890..6bc61244 100644 --- a/include/roaring/containers/containers.h +++ b/include/roaring/containers/containers.h @@ -2477,6 +2477,24 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode, uint32_t count, uint32_t *consumed, uint16_t *value_out); +/** + * Iterate all entries within [it->current_value, max_value], and sets + * corresponding positions in `buf` to true. + * + * The `buf` array is filled starting from index 0, which corresponds to the + * initial iterator position `it`. For subsequent iterator positions `it_new`, + * set `buf[it_new->current_value - it->current_value]` to true. + * + * Returns true and sets `value_out` if a value is present after reading the + * entries. + * + * The initial `it` should have a value. + */ +bool container_iterator_read_into_bool(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, + bool *buf, uint16_t max_value, + uint16_t *value_out); + /** * Skips the next `skip_count` entries in the container iterator. Returns true * and sets `value_out` if a value is present after skipping. Returns false if diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h index 4f286968..de7018b9 100644 --- a/include/roaring/containers/run.h +++ b/include/roaring/containers/run.h @@ -709,6 +709,20 @@ static inline void run_container_remove_range(run_container_t *run, } } +/** + * Reads values from the run container into a boolean buffer. + * + * @param rc The run container to read from + * @param it Iterator state (index into the runs array) + * @param buf Boolean buffer to write to + * @param max_value Stop reading when it->current_value > this value. + * @param value_out Output parameter for the current/next value + * @return true if there are more values to read, false otherwise + */ +bool run_container_iterator_read_into_bool( + const run_container_t *rc, struct roaring_container_iterator_s *it, + bool *buf, uint16_t max_value, uint16_t *value_out); + #ifdef __cplusplus } } diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index 9823a408..5ebb03ac 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -555,6 +555,33 @@ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); */ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset); +/** + * Convert the bitmap within the range [range_start, range_end] to a dense bool + * array and output in `ans`. + * + * For each value at position `i` (where i ranges from 0 to + * range_end-range_start) in the output array, `ans[i]` is set to true if the + * value range_start + i is present in the bitmap. + * + * Caller is responsible to ensure that there is enough memory allocated, and + * that the memory is initialized to zero, e.g. + * + * ans = calloc((range_end - range_start + 1) * sizeof(bool)); + * + * For more control, see `roaring_uint32_iterator_move_equalorlarger` and + * `roaring_uint32_iterator_read_into_bool`. + */ +void roaring_bitmap_to_bool_array_range_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end, bool *ans); + +/** + * Same as `roaring_bitmap_to_bool_array_range_closed`, but the range is + * [range_start, range_end). + */ +void roaring_bitmap_to_bool_array_range(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end, bool *ans); /** * Convert the bitmap to a sorted array from `offset` by `limit`, output in * `ans`. @@ -1238,6 +1265,35 @@ uint32_t roaring_uint32_iterator_skip(roaring_uint32_iterator_t *it, uint32_t roaring_uint32_iterator_skip_backward(roaring_uint32_iterator_t *it, uint32_t count); +/** + * Iterate over `it` in range [it->current_value, max_value] and fill bool array + * `buf` from its beginning. + * + * This function satisfies semantics of iteration and can be used together with + * other iterator functions. + * + * Let `init_it` be the initial iterator and it has value, then for every + * iterated `it`, buf[init_it.current_value - it.current_value] will be set to + * true; other positions will remain to be false. The final `it` will be invalid + * or point to the first value > max_value. + * + * User should ensure that `buf` has enough space for holding the bool values. + * + * Here is an example: + * max_value(8) + * init_it(4) │ final_it(9) + * │ │ │ + * ▼ ▼ ▼ + * Values: 1 2 3 4 5 6 7 8 9 + * Roaring: x x x x x + * The result bool array: [1 0 0 1 1] + * Size of the bool array: 5 ▲ + * │ + * Beginning of the bool array + */ +void roaring_uint32_iterator_read_into_bool(roaring_uint32_iterator_t *it, + bool *buf, uint32_t max_value); + #ifdef __cplusplus } } diff --git a/microbenchmarks/bench.cpp b/microbenchmarks/bench.cpp index 1bfb0d0d..6724ee79 100644 --- a/microbenchmarks/bench.cpp +++ b/microbenchmarks/bench.cpp @@ -237,6 +237,21 @@ struct to_array64 { auto ToArray64 = BasicBench; BENCHMARK(ToArray64); +struct to_array_bool { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i < count; ++i) { + uint64_t card = roaring_bitmap_get_cardinality(bitmaps[i]); + roaring_bitmap_to_bool_array_range(bitmaps[i], 0, card, + array_buffer_bool); + marker += array_buffer_bool[0]; + } + return marker; + } +}; +auto ToArrayBool = BasicBench; +BENCHMARK(ToArrayBool); + struct iterate_all { static uint64_t run() { uint64_t marker = 0; diff --git a/microbenchmarks/bench.h b/microbenchmarks/bench.h index a408ffe0..9d4c89d5 100644 --- a/microbenchmarks/bench.h +++ b/microbenchmarks/bench.h @@ -42,6 +42,7 @@ roaring64_bitmap_t **bitmaps64 = NULL; Roaring64Map **bitmaps64cpp = NULL; uint32_t *array_buffer; uint64_t *array_buffer64; +bool *array_buffer_bool; uint32_t maxvalue = 0; uint32_t maxcard = 0; @@ -200,6 +201,7 @@ static roaring_bitmap_t **create_all_bitmaps(size_t *howmany, } array_buffer = (uint32_t *)malloc(maxcard * sizeof(uint32_t)); array_buffer64 = (uint64_t *)malloc(maxcard * sizeof(uint64_t)); + array_buffer_bool = (bool *)calloc(maxvalue + 1, sizeof(bool)); return answer; } diff --git a/src/containers/array.c b/src/containers/array.c index b632324d..fc416a65 100644 --- a/src/containers/array.c +++ b/src/containers/array.c @@ -562,6 +562,33 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base, return true; } +CROARING_ALLOW_UNALIGNED +bool array_container_iterator_read_into_bool(const array_container_t *ac, + roaring_container_iterator_t *it, + bool *buf, uint16_t max_value, + uint16_t *value_out) { + int32_t initial_index = it->index; + + if (max_value == UINT16_MAX) { + // TODO: SIMD optimization + while (it->index < ac->cardinality) { + buf[ac->array[it->index] - ac->array[initial_index]] = true; + it->index++; + } + return false; + } + + while (it->index < ac->cardinality && ac->array[it->index] <= max_value) { + buf[ac->array[it->index] - ac->array[initial_index]] = true; + it->index++; + } + if (it->index < ac->cardinality) { + *value_out = ac->array[it->index]; + return true; + } + return false; +} + #ifdef __cplusplus } } diff --git a/src/containers/bitset.c b/src/containers/bitset.c index 4b1076ee..9ef94f93 100644 --- a/src/containers/bitset.c +++ b/src/containers/bitset.c @@ -245,6 +245,54 @@ bool bitset_container_intersect(const bitset_container_t *src_1, return false; } +CROARING_ALLOW_UNALIGNED +bool bitset_container_iterator_read_into_bool(const bitset_container_t *bc, + roaring_container_iterator_t *it, + bool *buf, uint16_t max_value, + uint16_t *value_out) { + uint16_t max_wordindex = max_value / 64; + uint16_t wordindex = it->index / 64; + uint64_t word = bc->words[wordindex] & (UINT64_MAX << (it->index % 64)); + uint16_t initial_value = it->index; + // Remain the last word to process out of loop for reducing `if` branches + while (wordindex < max_wordindex) { + // TODO: SIMD optimization + while (word != 0) { + it->index = wordindex * 64 + roaring_trailing_zeroes(word); + buf[it->index - initial_value] = true; + word = word & (word - 1); + } + wordindex++; + if (wordindex < BITSET_CONTAINER_SIZE_IN_WORDS) { + word = bc->words[wordindex]; + } + } + // Process the last word (which is at max_wordindex) + while (word != 0) { + it->index = wordindex * 64 + roaring_trailing_zeroes(word); + if ((uint16_t)it->index > max_value) { + *value_out = it->index; + return true; + } + buf[it->index - initial_value] = true; + word = word & (word - 1); + } + wordindex++; + /// If the bitset is not drained, iterate to the next set bit. + while (wordindex < BITSET_CONTAINER_SIZE_IN_WORDS && + bc->words[wordindex] == 0) { + wordindex++; + } + if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) return false; + word = bc->words[wordindex]; + if (word != 0) { + it->index = wordindex * 64 + roaring_trailing_zeroes(word); + *value_out = it->index; + return true; + } + return false; +} + #if CROARING_IS_X64 #ifndef CROARING_WORDS_IN_AVX2_REG #define CROARING_WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) diff --git a/src/containers/containers.c b/src/containers/containers.c index 8a235ad3..387c3924 100644 --- a/src/containers/containers.c +++ b/src/containers/containers.c @@ -706,6 +706,28 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode, } } +bool container_iterator_read_into_bool(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, + bool *buf, uint16_t max_value, + uint16_t *value_out) { + c = container_unwrap_shared(c, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE: + return bitset_container_iterator_read_into_bool( + const_CAST_bitset(c), it, buf, max_value, value_out); + case ARRAY_CONTAINER_TYPE: + return array_container_iterator_read_into_bool( + const_CAST_array(c), it, buf, max_value, value_out); + case RUN_CONTAINER_TYPE: + return run_container_iterator_read_into_bool( + const_CAST_run(c), it, buf, max_value, value_out); + default: + assert(false); + roaring_unreachable; + return false; + } +} + bool container_iterator_skip(const container_t *c, uint8_t typecode, roaring_container_iterator_t *it, uint32_t skip_count, uint32_t *consumed_count, diff --git a/src/containers/run.c b/src/containers/run.c index 47e65b3f..50615a92 100644 --- a/src/containers/run.c +++ b/src/containers/run.c @@ -1127,6 +1127,52 @@ int run_container_to_uint32_array(void *vout, const run_container_t *cont, #endif +CROARING_ALLOW_UNALIGNED +bool run_container_iterator_read_into_bool(const run_container_t *rc, + roaring_container_iterator_t *it, + bool *buf, uint16_t max_value, + uint16_t *value_out) { + uint16_t initial_value = *value_out; + + // TODO: SIMD optimization + if (max_value == UINT16_MAX) { + while (it->index < rc->n_runs) { + uint16_t run_start = rc->runs[it->index].value; + uint16_t run_end = run_start + rc->runs[it->index].length; + // Start from current value if we're in the middle of a run + run_start = (*value_out >= run_start) ? *value_out : run_start; + memset(buf + run_start - initial_value, true, + run_end - run_start + 1); + it->index++; + } + return false; + } + + while (it->index < rc->n_runs) { + uint16_t run_start = rc->runs[it->index].value; + uint16_t run_end = run_start + rc->runs[it->index].length; + + // Start from current value if we're in the middle of a run + uint16_t start = (*value_out >= run_start) ? *value_out : run_start; + // max_value .. [start .. run_end] + if (max_value < start) { + *value_out = start; + return true; + } + // [start .. max_value .. run_end] + if (max_value < run_end) { + memset(buf + start - initial_value, true, max_value - start + 1); + *value_out = max_value + 1; + return true; + } + // [start .. run_end] .. max_value + memset(buf + start - initial_value, true, run_end - start + 1); + *value_out = run_end; + it->index++; + } + return false; +} + #ifdef __cplusplus } } diff --git a/src/roaring.c b/src/roaring.c index 0c220a29..d0d1a013 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -1880,6 +1880,33 @@ uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, return ret; } +void roaring_uint32_iterator_read_into_bool(roaring_uint32_iterator_t *it, + bool *buf, uint32_t max_value) { + uint32_t initial_value = it->current_value; + uint32_t highbits_of_max_value = (max_value & 0xFFFF0000); + uint16_t lowbits_of_max_value = (uint16_t)max_value; + bool *pos = buf; + while (it->has_value && it->current_value <= max_value) { + pos = buf + it->current_value - initial_value; + uint16_t low16 = (uint16_t)it->current_value; + uint16_t max_value_for_container = it->highbits == highbits_of_max_value + ? lowbits_of_max_value + : UINT16_MAX; + bool has_value = container_iterator_read_into_bool( + it->container, it->typecode, &it->container_it, pos, + max_value_for_container, &low16); + if (has_value) { + it->has_value = true; + it->current_value = it->highbits | low16; + // If the container still has values, we must have stopped because + // we read enough values. + return; + } + it->container_index++; + it->has_value = loadfirstvalue(it); + } +} + uint32_t roaring_uint32_iterator_skip(roaring_uint32_iterator_t *it, uint32_t count) { uint32_t ret = 0; @@ -3483,6 +3510,26 @@ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { return true; } +void roaring_bitmap_to_bool_array_range_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end, bool *ans) { + if (range_start > range_end) return; + roaring_uint32_iterator_t it; + roaring_iterator_init(r, &it); + if (!roaring_uint32_iterator_move_equalorlarger(&it, range_start)) return; + roaring_uint32_iterator_read_into_bool( + &it, ans + it.current_value - range_start, range_end); +} + +void roaring_bitmap_to_bool_array_range(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end, bool *ans) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) + return; + roaring_bitmap_to_bool_array_range_closed(r, (uint32_t)range_start, + (uint32_t)(range_end - 1), ans); +} + #ifdef __cplusplus } } diff --git a/tests/array_container_unit.c b/tests/array_container_unit.c index a55e612c..ebff9d14 100644 --- a/tests/array_container_unit.c +++ b/tests/array_container_unit.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -193,6 +194,82 @@ DEFINE_TEST(capacity_test) { array_container_free(array); } +DEFINE_TEST(iterator_read_into_bool_test) { + array_container_t* A = array_container_create(); + assert_non_null(A); + + // Variables to use. + uint16_t initial_value = 0; + uint16_t value_out = 0; + uint16_t max_value = 0; + roaring_container_iterator_t it; + const uint16_t max_elements = 600; + bool* ans_array = (bool*)calloc(max_elements, sizeof(bool)); + bool* bool_array; + + // Add values with gaps + for (uint16_t i = 100; i < 200; i += 5) { + array_container_add(A, i); + ans_array[i] = true; + } + for (uint16_t i = 500; i < max_elements; i += 3) { + array_container_add(A, i); + ans_array[i] = true; + } + + // Test 1: Read without max_value (read all) + it = container_init_iterator(A, ARRAY_CONTAINER_TYPE, &initial_value); + size_t res_size = max_elements - initial_value; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + value_out = initial_value; + bool has_more = array_container_iterator_read_into_bool( + A, &it, bool_array, UINT16_MAX, &value_out); + assert_false(has_more); // Should read all values + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + // Test 2: Read with max_value + it = container_init_iterator(A, ARRAY_CONTAINER_TYPE, &initial_value); + max_value = 300; + res_size = max_value - initial_value + 1; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + value_out = initial_value; + has_more = array_container_iterator_read_into_bool(A, &it, bool_array, + max_value, &value_out); + assert_true(has_more); + assert_true(value_out == 500); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + // Continue to read to the end. + initial_value = value_out; + res_size = max_elements - initial_value; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + has_more = array_container_iterator_read_into_bool(A, &it, bool_array, + UINT16_MAX, &value_out); + assert_false(has_more); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + // Test 3: Read from middle with max_value + uint32_t consumed; + it = container_init_iterator(A, ARRAY_CONTAINER_TYPE, &initial_value); + container_iterator_skip(A, ARRAY_CONTAINER_TYPE, &it, 10, &consumed, + &initial_value); + max_value = 548; + res_size = max_value - initial_value + 1; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + value_out = initial_value; + has_more = array_container_iterator_read_into_bool(A, &it, bool_array, + max_value, &value_out); + assert_true(has_more); + assert_true(value_out == 551); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + array_container_free(A); + free(ans_array); +} + /* This is a fixed-increment version of Java 8's SplittableRandom generator See http://dx.doi.org/10.1145/2714064.2660195 and http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html */ @@ -374,7 +451,8 @@ int main() { cmocka_unit_test(and_or_test), cmocka_unit_test(to_uint32_array_test), cmocka_unit_test(select_test), - cmocka_unit_test(capacity_test)}; + cmocka_unit_test(capacity_test), + cmocka_unit_test(iterator_read_into_bool_test)}; return cmocka_run_group_tests(tests, NULL, NULL); } diff --git a/tests/bitset_container_unit.c b/tests/bitset_container_unit.c index 2991023e..4e6e0194 100644 --- a/tests/bitset_container_unit.c +++ b/tests/bitset_container_unit.c @@ -6,9 +6,11 @@ #include #include #include +#include #include #include +#include #include #ifdef __cplusplus // stronger type checking errors if C built in C++ mode @@ -314,6 +316,83 @@ DEFINE_TEST(select_test) { bitset_container_free(B); } +DEFINE_TEST(iterator_read_into_bool_test) { + bitset_container_t* B = bitset_container_create(); + assert_non_null(B); + + // Variables to use. + uint16_t initial_value = 0; + uint16_t value_out = 0; + uint16_t max_value = 0; + roaring_container_iterator_t it; + const uint16_t max_elements = 600; + bool* ans_array = (bool*)calloc(max_elements, sizeof(bool)); + bool* bool_array; + + // Add values with gaps + for (uint16_t i = 100; i < 200; i += 3) { + bitset_container_set(B, i); + ans_array[i] = true; + } + for (uint16_t i = 500; i < max_elements; i += 2) { + bitset_container_set(B, i); + ans_array[i] = true; + } + B->cardinality = bitset_container_compute_cardinality(B); + + // Test 1: Read without max_value (read all) + it = container_init_iterator(B, BITSET_CONTAINER_TYPE, &initial_value); + value_out = initial_value; + size_t res_size = max_elements - initial_value; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + bool has_more = bitset_container_iterator_read_into_bool( + B, &it, bool_array, UINT16_MAX, &value_out); + assert_false(has_more); // Should read all values + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + // Test 2: Read with max_value + it = container_init_iterator(B, BITSET_CONTAINER_TYPE, &initial_value); + max_value = 300; + res_size = max_value - initial_value + 1; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + value_out = initial_value; + has_more = bitset_container_iterator_read_into_bool(B, &it, bool_array, + max_value, &value_out); + assert_true(has_more); + assert_true(value_out == 500); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + // Continue to read to the end. + initial_value = value_out; + res_size = max_elements - initial_value; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + has_more = bitset_container_iterator_read_into_bool(B, &it, bool_array, + UINT16_MAX, &value_out); + assert_false(has_more); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + // Test 3: Read from middle with max_value + uint32_t consumed; + it = container_init_iterator(B, BITSET_CONTAINER_TYPE, &initial_value); + container_iterator_skip(B, BITSET_CONTAINER_TYPE, &it, 10, &consumed, + &initial_value); + max_value = 550; + res_size = max_value - initial_value + 1; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + value_out = initial_value; + has_more = bitset_container_iterator_read_into_bool(B, &it, bool_array, + max_value, &value_out); + assert_true(has_more); + assert_true(value_out == 552); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + bitset_container_free(B); + free(ans_array); +} + int main() { const struct CMUnitTest tests[] = { cmocka_unit_test(hamming_test), @@ -326,6 +405,7 @@ int main() { cmocka_unit_test(to_uint32_array_test), cmocka_unit_test(select_test), cmocka_unit_test(test_bitset_compute_cardinality), + cmocka_unit_test(iterator_read_into_bool_test), }; return cmocka_run_group_tests(tests, NULL, NULL); diff --git a/tests/run_container_unit.c b/tests/run_container_unit.c index 15bdbf9c..544c0776 100644 --- a/tests/run_container_unit.c +++ b/tests/run_container_unit.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -216,13 +217,93 @@ DEFINE_TEST(remove_range_test) { run_container_free(run); } +DEFINE_TEST(iterator_read_into_bool_test) { + run_container_t* R = run_container_create(); + assert_non_null(R); + + // Variables to use. + uint16_t initial_value = 0; + uint16_t value_out = 0; + uint16_t max_value = 0; + roaring_container_iterator_t it; + const uint16_t max_elements = 600; + bool* ans_array = (bool*)calloc(max_elements, sizeof(bool)); + bool* bool_array; + + // Add runs with gaps + for (uint16_t i = 100; i < 200; i++) { + run_container_add(R, i); + ans_array[i] = true; + } + for (uint16_t i = 500; i < max_elements; i++) { + run_container_add(R, i); + ans_array[i] = true; + } + + // Test 1: Read without max_value (read all) + it = container_init_iterator(R, RUN_CONTAINER_TYPE, &initial_value); + size_t res_size = max_elements - initial_value; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + value_out = initial_value; + bool has_more = run_container_iterator_read_into_bool( + R, &it, bool_array, UINT16_MAX, &value_out); + assert_false(has_more); // Should read all values + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + // Test 2: Read with max_value + it = container_init_iterator(R, RUN_CONTAINER_TYPE, &initial_value); + max_value = 300; + res_size = max_value - initial_value + 1; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + value_out = initial_value; + has_more = run_container_iterator_read_into_bool(R, &it, bool_array, + max_value, &value_out); + assert_true(has_more); + assert_true(value_out == 500); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + // Continue to read to the end. + initial_value = value_out; + res_size = max_elements - initial_value; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + has_more = run_container_iterator_read_into_bool(R, &it, bool_array, + UINT16_MAX, &value_out); + assert_false(has_more); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + // Test 3: Read from middle with max_value + uint32_t consumed; + it = container_init_iterator(R, RUN_CONTAINER_TYPE, &initial_value); + container_iterator_skip(R, RUN_CONTAINER_TYPE, &it, 10, &consumed, + &initial_value); + max_value = 550; + res_size = max_value - initial_value + 1; + bool_array = (bool*)calloc(res_size, sizeof(bool)); + value_out = initial_value; + has_more = run_container_iterator_read_into_bool(R, &it, bool_array, + max_value, &value_out); + assert_true(has_more); + assert_true(value_out == 551); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + run_container_free(R); + free(ans_array); +} + int main() { tellmeall(); const struct CMUnitTest tests[] = { - cmocka_unit_test(printf_test), cmocka_unit_test(add_contains_test), - cmocka_unit_test(and_or_test), cmocka_unit_test(to_uint32_array_test), - cmocka_unit_test(select_test), cmocka_unit_test(remove_range_test), + cmocka_unit_test(printf_test), + cmocka_unit_test(add_contains_test), + cmocka_unit_test(and_or_test), + cmocka_unit_test(to_uint32_array_test), + cmocka_unit_test(select_test), + cmocka_unit_test(remove_range_test), + cmocka_unit_test(iterator_read_into_bool_test), }; return cmocka_run_group_tests(tests, NULL, NULL); diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index f3ef5a0e..1fb87ca9 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -4996,6 +4996,130 @@ DEFINE_TEST(issue538b) { roaring_bitmap_free(expected); } +DEFINE_TEST(test_roaring_bitmap_range_bool_array) { + roaring_bitmap_t *r = roaring_bitmap_create(); + + const uint32_t max_elements = 600; + bool *ans_array = (bool *)calloc(max_elements, sizeof(bool)); + + // Add some values with gaps + for (uint32_t i = 100; i < 200; i += 3) { + ans_array[i] = true; + roaring_bitmap_add(r, i); + } + for (uint32_t i = 500; i < max_elements; i += 2) { + ans_array[i] = true; + roaring_bitmap_add(r, i); + } + + // Test 1: Read a range that includes values + uint32_t range_start = 100; + uint32_t range_end = 300; + bool *bool_array = (bool *)calloc(range_end - range_start, sizeof(bool)); + roaring_bitmap_to_bool_array_range(r, range_start, range_end, bool_array); + + // Verify the bool array + assert_true(memcmp(ans_array + range_start, bool_array, + range_end - range_start) == 0); + free(bool_array); + + // Test 2: Read a range with no values + range_start = 300; + range_end = 400; + bool_array = (bool *)calloc(range_end - range_start, sizeof(bool)); + roaring_bitmap_to_bool_array_range(r, range_start, range_end, bool_array); + + for (size_t i = 0; i < range_end - range_start; i++) { + assert_false(bool_array[i]); + } + assert_true(memcmp(ans_array + range_start, bool_array, + range_end - range_start) == 0); + free(bool_array); + + // Test 3: Read a range that spans multiple containers + for (range_start = 0; range_start < max_elements; range_start += 100) { + for (uint32_t num_values = 100; range_start + num_values < max_elements; + num_values += 100) { + range_end = range_start + num_values; + bool_array = (bool *)calloc(range_end - range_start, sizeof(bool)); + roaring_bitmap_to_bool_array_range(r, range_start, range_end, + bool_array); + assert_true(memcmp(ans_array + range_start, bool_array, + range_end - range_start) == 0); + free(bool_array); + } + } + + for (range_start = 0; range_start < max_elements; range_start += 100) { + for (uint32_t num_values = 100; range_start + num_values < max_elements; + num_values += 100) { + range_end = range_start + num_values; + bool_array = + (bool *)calloc(range_end - range_start + 1, sizeof(bool)); + roaring_bitmap_to_bool_array_range_closed(r, range_start, range_end, + bool_array); + assert_true(memcmp(ans_array + range_start, bool_array, + range_end - range_start + 1) == 0); + free(bool_array); + } + } + + free(ans_array); + roaring_bitmap_free(r); +} + +DEFINE_TEST(test_roaring_uint32_iterator_read_into_bool) { + roaring_bitmap_t *r = roaring_bitmap_create(); + + const uint32_t max_elements = 4 * 70000; + bool *ans_array = (bool *)calloc(max_elements, sizeof(bool)); + // Add values in different containers + for (uint32_t i = 100; i < 200; i += 5) { + roaring_bitmap_add(r, i); + ans_array[i] = true; + } + // Construct array + for (uint32_t i = 500; i < 1000; i += 4) { + roaring_bitmap_add(r, i); + ans_array[i] = true; + } + // Construct bitset + for (uint32_t i = 70000; i < max_elements; i += 3) { + roaring_bitmap_add(r, i); + ans_array[i] = true; + } + + // Test 1: Read with max_value in the middle of values + roaring_uint32_iterator_t it; + roaring_iterator_init(r, &it); + uint32_t initial_value = it.current_value; + + uint32_t max_value = 150; + size_t res_size = max_value - initial_value + 1; + bool *bool_array = (bool *)calloc(res_size, sizeof(bool)); + roaring_uint32_iterator_read_into_bool(&it, bool_array, max_value); + assert_true(it.has_value && it.current_value == 155); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == 0); + free(bool_array); + + // Test 2: Read all remaining values + while (it.has_value) { + /// Check 200 values each time. + initial_value = it.current_value; + max_value = initial_value + 200; + if (max_value >= max_elements) max_value = max_elements - 1; + res_size = max_value - initial_value + 1; + bool_array = (bool *)calloc(res_size, sizeof(bool)); + roaring_uint32_iterator_read_into_bool(&it, bool_array, max_value); + assert_true(memcmp(ans_array + initial_value, bool_array, res_size) == + 0); + free(bool_array); + } + + free(ans_array); + roaring_bitmap_free(r); +} + DEFINE_TEST(issue_15jan2024) { roaring_bitmap_t *r1 = roaring_bitmap_create(); roaring_bitmap_add(r1, 1); @@ -5182,6 +5306,8 @@ int main() { #endif // ROARING_UNSAFE_FROZEN_TESTS cmocka_unit_test(issue_15jan2024), #endif + cmocka_unit_test(test_roaring_bitmap_range_bool_array), + cmocka_unit_test(test_roaring_uint32_iterator_read_into_bool), }; return cmocka_run_group_tests(tests, NULL, NULL);