@@ -248,6 +248,68 @@ TEST(ArrayIterator, StdMerge) {
248248 ASSERT_EQ (values, expected);
249249}
250250
251+ // Custom ValueAccessor for DictionaryArray that decodes values
252+ struct TestDictionaryValueAccessor {
253+ using ValueType = std::string_view;
254+
255+ inline ValueType operator ()(const DictionaryArray& array, int64_t index) {
256+ // Get the dictionary index for this position
257+ int64_t dict_index = array.GetValueIndex (index);
258+
259+ // Get the dictionary and cast it to StringArray
260+ auto dict = checked_pointer_cast<StringArray>(array.dictionary ());
261+
262+ // Return the decoded string value
263+ return dict->GetView (dict_index);
264+ }
265+ };
266+
267+ TEST (ArrayIterator, CustomValueAccessorDictionary) {
268+ // Create a dictionary array with string values
269+ auto dict = ArrayFromJSON (utf8 (), R"( ["apple", "banana", "cherry", "date"])" );
270+ auto indices = ArrayFromJSON (int32 (), " [0, 1, 2, 3, 2, 1, 0, null, 3]" );
271+
272+ auto dict_type = dictionary (int32 (), utf8 ());
273+ auto dict_array = std::make_shared<DictionaryArray>(dict_type, indices, dict);
274+
275+ // Use custom accessor to iterate over decoded values
276+ ArrayIterator<DictionaryArray, TestDictionaryValueAccessor> it (*dict_array);
277+
278+ // Test basic access
279+ ASSERT_EQ (*it, " apple" );
280+ ASSERT_EQ (it[1 ], " banana" );
281+ ASSERT_EQ (it[2 ], " cherry" );
282+ ASSERT_EQ (it[3 ], " date" );
283+ ASSERT_EQ (it[4 ], " cherry" );
284+ ASSERT_EQ (it[5 ], " banana" );
285+ ASSERT_EQ (it[6 ], " apple" );
286+ ASSERT_EQ (it[7 ], nullopt ); // null index
287+ ASSERT_EQ (it[8 ], " date" );
288+
289+ // Test iteration
290+ std::vector<optional<std::string_view>> values;
291+ for (auto end = it + 9 ; it != end; ++it) {
292+ values.push_back (*it);
293+ }
294+
295+ std::vector<optional<std::string_view>> expected{
296+ " apple" , " banana" , " cherry" , " date" , " cherry" , " banana" , " apple" , nullopt , " date" };
297+ ASSERT_EQ (values, expected);
298+
299+ // Test with algorithms - find a specific value
300+ ArrayIterator<DictionaryArray, TestDictionaryValueAccessor> begin (*dict_array);
301+ ArrayIterator<DictionaryArray, TestDictionaryValueAccessor> end (*dict_array,
302+ dict_array->length ());
303+
304+ auto found = std::find (begin, end, " cherry" );
305+ ASSERT_NE (found, end);
306+ ASSERT_EQ (found.index (), 2 ); // First occurrence of "cherry"
307+
308+ // Count occurrences of "banana"
309+ auto count = std::count (begin, end, " banana" );
310+ ASSERT_EQ (count, 2 );
311+ }
312+
251313TEST (ChunkedArrayIterator, Basics) {
252314 auto result = ChunkedArrayFromJSON (int32 (), {R"( [4, 5, null])" , R"( [6])" });
253315 auto it = Begin<Int32Type>(*result);
@@ -545,5 +607,64 @@ TEST(ChunkedArrayIterator, ForEachIterator) {
545607 ASSERT_EQ (values, expected);
546608}
547609
610+ TEST (ChunkedArrayIterator, CustomValueAccessorDictionary) {
611+ // Create multiple dictionary arrays with the same dictionary
612+ auto dict = ArrayFromJSON (utf8 (), R"( ["red", "green", "blue", "yellow"])" );
613+
614+ auto indices1 = ArrayFromJSON (int32 (), " [0, 1, 2]" );
615+ auto indices2 = ArrayFromJSON (int32 (), " [3, 2, null]" );
616+ auto indices3 = ArrayFromJSON (int32 (), " [1, 0, 3, 2]" );
617+
618+ auto dict_type = dictionary (int32 (), utf8 ());
619+ auto dict_array1 = std::make_shared<DictionaryArray>(dict_type, indices1, dict);
620+ auto dict_array2 = std::make_shared<DictionaryArray>(dict_type, indices2, dict);
621+ auto dict_array3 = std::make_shared<DictionaryArray>(dict_type, indices3, dict);
622+
623+ // Create chunked array from dictionary arrays
624+ auto chunked_array = std::make_shared<ChunkedArray>(
625+ std::vector<std::shared_ptr<Array>>{dict_array1, dict_array2, dict_array3},
626+ dict_type);
627+
628+ // Use custom accessor to iterate over decoded values across chunks
629+ auto it =
630+ Begin<DictionaryType, DictionaryArray, TestDictionaryValueAccessor>(*chunked_array);
631+ auto end =
632+ End<DictionaryType, DictionaryArray, TestDictionaryValueAccessor>(*chunked_array);
633+
634+ // Test sequential access across chunks
635+ ASSERT_EQ (*it, " red" ); // chunk 0, index 0
636+ ASSERT_EQ (*(it + 1 ), " green" ); // chunk 0, index 1
637+ ASSERT_EQ (*(it + 2 ), " blue" ); // chunk 0, index 2
638+ ASSERT_EQ (*(it + 3 ), " yellow" ); // chunk 1, index 0
639+ ASSERT_EQ (*(it + 4 ), " blue" ); // chunk 1, index 1
640+ ASSERT_EQ (*(it + 5 ), nullopt ); // chunk 1, index 2 (null)
641+ ASSERT_EQ (*(it + 6 ), " green" ); // chunk 2, index 0
642+ ASSERT_EQ (*(it + 7 ), " red" ); // chunk 2, index 1
643+ ASSERT_EQ (*(it + 8 ), " yellow" ); // chunk 2, index 2
644+ ASSERT_EQ (*(it + 9 ), " blue" ); // chunk 2, index 3
645+
646+ // Collect all values
647+ std::vector<optional<std::string_view>> values;
648+
649+ for (auto elem : Iterate<DictionaryType, DictionaryArray, TestDictionaryValueAccessor>(
650+ *chunked_array)) {
651+ values.push_back (elem);
652+ }
653+
654+ std::vector<optional<std::string_view>> expected{" red" , " green" , " blue" , " yellow" ,
655+ " blue" , nullopt , " green" , " red" ,
656+ " yellow" , " blue" };
657+ ASSERT_EQ (values, expected);
658+
659+ // Test with algorithms - count occurrences of "blue"
660+ auto count = std::count (it, end, " blue" );
661+ ASSERT_EQ (count, 3 );
662+
663+ // Find first occurrence of "yellow"
664+ auto found = std::find (it, end, " yellow" );
665+ ASSERT_NE (found, end);
666+ ASSERT_EQ (found.index (), 3 );
667+ }
668+
548669} // namespace stl
549670} // namespace arrow
0 commit comments