Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ tests/config.h
# CMake
build

# clangd
.cache/clangd/


#############BEGIN VISUAL STUDIO############
Expand Down
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,39 @@ You should be aware that a convention bitset (`bitset_t *`) may use much more
memory than a Roaring bitmap in some cases. You should run benchmarks to determine
whether the conversion to a bitset has performance benefits in your case.


# Convert to boolean array (C)

This example shows how to convert a range of a Roaring bitmap to a boolean array using `roaring_bitmap_range_bool_array`:

```c
roaring_bitmap_t *r1 = roaring_bitmap_create();
for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
roaring_bitmap_add(r1, i);
}
for (uint32_t i = 100000; i < 500000; i+= 100) {
roaring_bitmap_add(r1, i);
}
roaring_bitmap_add_range(r1, 500000, 600000);

// Convert a range to boolean array
uint32_t range_start = 50; // Start from the 50th element
uint32_t range_end = 1000; // End at the 1000th element (not included)
bool *bool_array = malloc((range_end - range_start) * sizeof(bool));

// Convert range to boolean array
roaring_bitmap_range_bool_array(r1, range_start, range_end, bool_array);

// The bool_array now contains true/false for elements at positions [range_start, range_end)
// bool_array[i] is true if the (range_start+i) exists in the bitmap

// you must free the memory:
free(bool_array);
roaring_bitmap_free(r1);
```

This function stores each element's presence in a single byte as a boolean value, which can be useful when you need to work with boolean arrays directly for a specific range of the bitmap.

# Example (C++)


Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bitset_container_benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "benchmark.h"
#include "random.h"

#define DIV_CEIL_64K(denom) (((1 << 16) + ((denom)-1)) / (denom))
#define DIV_CEIL_64K(denom) (((1 << 16) + ((denom) - 1)) / (denom))

const int repeat = 500;

Expand Down
14 changes: 14 additions & 0 deletions include/roaring/containers/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,20 @@ inline int array_container_index_equalorlarger(const array_container_t *arr,
}
}

/**
* Reads values from the array container into a boolean buffer.
*
* @param ac The array container to read from
* @param it Iterator state (index into the array)
* @param buf Boolean buffer to write to
* @param max_value Stop reading when it->current_value >= this value.
* @param value_out Output parameter for the next value
* @return true if there are more values to read, false otherwise
*/
bool array_container_iterator_read_into_bool(
const array_container_t *ac, struct roaring_container_iterator_s *it,
bool *buf, uint32_t max_value, uint16_t *value_out);

/*
* Adds all values in range [min,max] using hint:
* nvals_less is the number of array values less than $min
Expand Down
14 changes: 14 additions & 0 deletions include/roaring/containers/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,20 @@ int bitset_container_get_index(const bitset_container_t *container, uint16_t x);
int bitset_container_index_equalorlarger(const bitset_container_t *container,
uint16_t x);

/**
* Reads values from the bitset container into a boolean buffer.
*
* @param bc The bitset container to read from
* @param it Iterator state (index into the bitset)
* @param buf Boolean buffer to write to
* @param max_value Stop reading when it->current_value >= this value.
* @param value_out Output parameter for the next value
* @return true if there are more values to read, false otherwise
*/
bool bitset_container_iterator_read_into_bool(
const bitset_container_t *bc, struct roaring_container_iterator_s *it,
bool *buf, uint32_t max_value, uint16_t *value_out);

#ifdef __cplusplus
}
}
Expand Down
18 changes: 18 additions & 0 deletions include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2477,6 +2477,24 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
uint32_t count, uint32_t *consumed,
uint16_t *value_out);

/**
* Iterate all entries within [it->current_value, max_value), and sets
* corresponding positions in `buf` to true.
*
* The `buf` array is filled starting from index 0, which corresponds to the
* initial iterator position `it`. For subsequent iterator positions `it_new`,
* set `buf[it_new->current_value - it->current_value]` to true.
*
* Returns true and sets `value_out` if a value is present after reading the
* entries.
*
* The initial `it` should have a value.
*/
bool container_iterator_read_into_bool(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
bool *buf, uint32_t max_value,
uint16_t *value_out);

/**
* Skips the next `skip_count` entries in the container iterator. Returns true
* and sets `value_out` if a value is present after skipping. Returns false if
Expand Down
16 changes: 15 additions & 1 deletion include/roaring/containers/run.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ typedef struct rle16_s rle16_t;

#ifdef __cplusplus
#define CROARING_MAKE_RLE16(val, len) \
{ (uint16_t)(val), (uint16_t)(len) } // no tagged structs until c++20
{(uint16_t)(val), (uint16_t)(len)} // no tagged structs until c++20
#else
#define CROARING_MAKE_RLE16(val, len) \
(rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
Expand Down Expand Up @@ -709,6 +709,20 @@ static inline void run_container_remove_range(run_container_t *run,
}
}

/**
* Reads values from the run container into a boolean buffer.
*
* @param rc The run container to read from
* @param it Iterator state (index into the runs array)
* @param buf Boolean buffer to write to
* @param max_value Stop reading when it->current_value >= this value.
* @param value_out Output parameter for the current/next value
* @return true if there are more values to read, false otherwise
*/
bool run_container_iterator_read_into_bool(
const run_container_t *rc, struct roaring_container_iterator_s *it,
bool *buf, uint32_t max_value, uint16_t *value_out);

#ifdef __cplusplus
}
}
Expand Down
47 changes: 47 additions & 0 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,24 @@ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
*/
bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset);

/**
* Convert the bitmap within the range [range_start, range_end) to a dense bool
* array and output in `ans`.
*
* For each value at position `i` (where i ranges from 0 to
* range_end-range_start) in the output array, `ans[i]` is set to true if the
* (range_start + i)-th element in the bitmap exists, and false otherwise.
*
* Caller is responsible to ensure that there is enough memory allocated, e.g.
*
* ans = malloc((range_end - range_start) * sizeof(bool));
*
* For more control, see `roaring_uint32_iterator_move_equalorlarger` and
* `roaring_uint32_iterator_read_into_bool`.
*/
void roaring_bitmap_range_bool_array(const roaring_bitmap_t *r,
uint32_t range_start, uint32_t range_end,
bool *ans);
/**
* Convert the bitmap to a sorted array from `offset` by `limit`, output in
* `ans`.
Expand Down Expand Up @@ -1208,6 +1226,35 @@ CROARING_DEPRECATED static inline void roaring_free_uint32_iterator(
uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
uint32_t *buf, uint32_t count);

/**
* Iterate over `it` in range [it->current_value, max_value) and fill bool array
* `buf` from its beginning.
*
* This function satisfies semantics of iteration and can be used together with
* other iterator functions.
*
* Let `init_it` be the initial iterator and it has value, then for every
* iterated `it`, buf[init_it.current_value - it.current_value] will be set to
* true; other positions will remain to be false. The final `it` will be invalid
* or point to the first value >= max_value.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will it be invalid, or just point one past the end?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If no element meets the requirement, It will point one past the end.

*
* User should ensure that `buf` has enough space for holding the bool values.
*
* Here is an example:
* final_it(8)
* init_it(4) max_value(8)
* │ │
* ▼ ▼
* Values: 1 2 3 4 5 6 7 8 9
* Roaring: x x x x x
* The result bool array: [1 0 0 1]
* Size of the bool array: 4 ▲
* │
* Beginning of the bool array
*/
void roaring_uint32_iterator_read_into_bool(roaring_uint32_iterator_t *it,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function roaring_bitmap_range_bool_array(r1, range_start, range_end, bool_array) is fine and easy to understand, but I don't understand the use case here, and I don't understand from the description what it does.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I presume that the user is expected to have an iterator that points at some initial value, but I don't understand how they would do it cleanly and what the purpose is.

Copy link
Author

@RinChanNOWWW RinChanNOWWW Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is used to iterate the giving it until max_value. When the function returns, the status of final it will be:

  • it->has_value == false
  • or
  • it->has_value == true && it->current_value >= max_value && prev(it)->current_value < max_value

There is a diagram to show what this function does:

                                     final_it(8)
                             it(4)  max_value(8)
                               │       │        
                               ▼       ▼        
               Values:   1 2 3 4 5 6 7 8 9      
               Roaring:    x   x     x x x      
 The result bool array:       [1 0 0 1]         
Size of the bool array: 4      ▲                
                               │                
                      Start of the bool array   

I will improve the comments and make it more clear.

bool *buf, uint32_t max_value);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The use of max_value feels odd, I would expect an API like this to take a count of booleans, e.g. to be consistent with roaring_uint32_iterator_read. Is there a reason to prefer taking a max_value here? It makes it less obvious for the caller to ensure they provide the right amount of space for the buffer.

Copy link
Author

@RinChanNOWWW RinChanNOWWW Jan 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I use max_value here because I think the meaning is a little different with the meaning of count of roaring_uint32_iterator_read. roaring_uint32_iterator_read will actually iterate count elements in the roaring bitmap while how many elements should be iterated is unknown, and we need to iterate the iterator until it->value >= max_value.


/** DEPRECATED, use `roaring_uint32_iterator_read`. */
CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator(
roaring_uint32_iterator_t *it, uint32_t *buf, uint32_t count) {
Expand Down
15 changes: 15 additions & 0 deletions microbenchmarks/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,21 @@ struct to_array64 {
auto ToArray64 = BasicBench<to_array64>;
BENCHMARK(ToArray64);

struct to_array_bool {
static uint64_t run() {
uint64_t marker = 0;
for (size_t i = 0; i < count; ++i) {
uint64_t card = roaring_bitmap_get_cardinality(bitmaps[i]);
roaring_bitmap_range_bool_array(bitmaps[i], 0, card,
array_buffer_bool);
marker += array_buffer_bool[0];
}
return marker;
}
};
auto ToArrayBool = BasicBench<to_array_bool>;
BENCHMARK(ToArrayBool);

struct iterate_all {
static uint64_t run() {
uint64_t marker = 0;
Expand Down
2 changes: 2 additions & 0 deletions microbenchmarks/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ roaring64_bitmap_t **bitmaps64 = NULL;
Roaring64Map **bitmaps64cpp = NULL;
uint32_t *array_buffer;
uint64_t *array_buffer64;
bool *array_buffer_bool;
uint32_t maxvalue = 0;
uint32_t maxcard = 0;

Expand Down Expand Up @@ -200,6 +201,7 @@ static roaring_bitmap_t **create_all_bitmaps(size_t *howmany,
}
array_buffer = (uint32_t *)malloc(maxcard * sizeof(uint32_t));
array_buffer64 = (uint64_t *)malloc(maxcard * sizeof(uint64_t));
array_buffer_bool = (bool *)malloc(maxvalue + 1);
return answer;
}

Expand Down
28 changes: 28 additions & 0 deletions src/containers/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,34 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base,
return true;
}

CROARING_ALLOW_UNALIGNED
bool array_container_iterator_read_into_bool(const array_container_t *ac,
roaring_container_iterator_t *it,
bool *buf, uint32_t max_value,
uint16_t *value_out) {
int32_t initial_index = it->index;

if (max_value > UINT16_MAX) {
// TODO: SIMD optimization
while (it->index < ac->cardinality) {
buf[ac->array[it->index] - ac->array[initial_index]] = true;
it->index++;
}
return false;
}

while (it->index < ac->cardinality &&
(uint32_t)ac->array[it->index] < max_value) {
buf[ac->array[it->index] - ac->array[initial_index]] = true;
it->index++;
}
if (it->index < ac->cardinality) {
*value_out = ac->array[it->index];
return true;
}
return false;
}

#ifdef __cplusplus
}
}
Expand Down
49 changes: 49 additions & 0 deletions src/containers/bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,55 @@ bool bitset_container_intersect(const bitset_container_t *src_1,
return false;
}

CROARING_ALLOW_UNALIGNED
bool bitset_container_iterator_read_into_bool(const bitset_container_t *bc,
roaring_container_iterator_t *it,
bool *buf, uint32_t max_value,
uint16_t *value_out) {
uint32_t max_wordindex = max_value / 64;
if (max_wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) {
max_wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
}
uint32_t wordindex = it->index / 64;
uint64_t word = bc->words[wordindex] & (UINT64_MAX << (it->index % 64));
uint16_t initial_value = it->index;
// Remain the last word to process out of loop for reducing `if` branches
while (wordindex < max_wordindex) {
// TODO: SIMD optimization
while (word != 0) {
it->index = wordindex * 64 + roaring_trailing_zeroes(word);
buf[it->index - initial_value] = true;
word = word & (word - 1);
}
wordindex++;
if (wordindex < BITSET_CONTAINER_SIZE_IN_WORDS) {
word = bc->words[wordindex];
}
}
// Process the last word (which is at max_wordindex)
while (word != 0) {
it->index = wordindex * 64 + roaring_trailing_zeroes(word);
if ((uint32_t)it->index >= max_value) {
*value_out = it->index;
return true;
}
buf[it->index - initial_value] = true;
word = word & (word - 1);
}

/// If the bitset is not drained, iterate to the next set bit.
while (word == 0 && (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
wordindex++;
word = bc->words[wordindex];
}
if (word != 0) {
it->index = wordindex * 64 + roaring_trailing_zeroes(word);
*value_out = it->index;
return true;
}
return false;
}

#if CROARING_IS_X64
#ifndef CROARING_WORDS_IN_AVX2_REG
#define CROARING_WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
Expand Down
22 changes: 22 additions & 0 deletions src/containers/containers.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,28 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
}
}

bool container_iterator_read_into_bool(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
bool *buf, uint32_t max_value,
uint16_t *value_out) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
return bitset_container_iterator_read_into_bool(
const_CAST_bitset(c), it, buf, max_value, value_out);
case ARRAY_CONTAINER_TYPE:
return array_container_iterator_read_into_bool(
const_CAST_array(c), it, buf, max_value, value_out);
case RUN_CONTAINER_TYPE:
return run_container_iterator_read_into_bool(
const_CAST_run(c), it, buf, max_value, value_out);
default:
assert(false);
roaring_unreachable;
return false;
}
}

bool container_iterator_skip(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
uint32_t skip_count, uint32_t *consumed_count,
Expand Down
Loading