@@ -53,7 +53,9 @@ use futures::{ready, Stream, StreamExt, TryStreamExt};
5353use itertools:: Itertools ;
5454use log:: debug;
5555use parquet:: arrow:: arrow_reader:: metrics:: ArrowReaderMetrics ;
56- use parquet:: arrow:: arrow_reader:: { ArrowReaderMetadata , ArrowReaderOptions } ;
56+ use parquet:: arrow:: arrow_reader:: {
57+ ArrowReaderMetadata , ArrowReaderOptions , RowSelectionPolicy ,
58+ } ;
5759use parquet:: arrow:: async_reader:: AsyncFileReader ;
5860use parquet:: arrow:: { ParquetRecordBatchStreamBuilder , ProjectionMask } ;
5961use parquet:: file:: metadata:: { PageIndexPolicy , ParquetMetaDataReader } ;
@@ -87,6 +89,8 @@ pub(super) struct ParquetOpener {
8789 pub pushdown_filters : bool ,
8890 /// Should the filters be reordered to optimize the scan?
8991 pub reorder_filters : bool ,
92+ /// Should we force the reader to use RowSelections for filtering
93+ pub force_filter_selections : bool ,
9094 /// Should the page index be read from parquet files, if present, to skip
9195 /// data pages
9296 pub enable_page_index : bool ,
@@ -147,6 +151,7 @@ impl FileOpener for ParquetOpener {
147151 let partition_fields = self . partition_fields . clone ( ) ;
148152 let reorder_predicates = self . reorder_filters ;
149153 let pushdown_filters = self . pushdown_filters ;
154+ let force_filter_selections = self . force_filter_selections ;
150155 let coerce_int96 = self . coerce_int96 ;
151156 let enable_bloom_filter = self . enable_bloom_filter ;
152157 let enable_row_group_stats_pruning = self . enable_row_group_stats_pruning ;
@@ -347,6 +352,10 @@ impl FileOpener for ParquetOpener {
347352 }
348353 } ;
349354 } ;
355+ if force_filter_selections {
356+ builder =
357+ builder. with_row_selection_policy ( RowSelectionPolicy :: Selectors ) ;
358+ }
350359
351360 // Determine which row groups to actually read. The idea is to skip
352361 // as many row groups as possible based on the metadata and query
@@ -887,6 +896,7 @@ mod test {
887896 partition_fields : vec ! [ ] ,
888897 pushdown_filters : false , // note that this is false!
889898 reorder_filters : false ,
899+ force_filter_selections : false ,
890900 enable_page_index : false ,
891901 enable_bloom_filter : false ,
892902 schema_adapter_factory : Arc :: new ( DefaultSchemaAdapterFactory ) ,
@@ -960,6 +970,7 @@ mod test {
960970 ) ) ] ,
961971 pushdown_filters : false , // note that this is false!
962972 reorder_filters : false ,
973+ force_filter_selections : false ,
963974 enable_page_index : false ,
964975 enable_bloom_filter : false ,
965976 schema_adapter_factory : Arc :: new ( DefaultSchemaAdapterFactory ) ,
@@ -1049,6 +1060,7 @@ mod test {
10491060 ) ) ] ,
10501061 pushdown_filters : false , // note that this is false!
10511062 reorder_filters : false ,
1063+ force_filter_selections : false ,
10521064 enable_page_index : false ,
10531065 enable_bloom_filter : false ,
10541066 schema_adapter_factory : Arc :: new ( DefaultSchemaAdapterFactory ) ,
@@ -1141,6 +1153,7 @@ mod test {
11411153 ) ) ] ,
11421154 pushdown_filters : true , // note that this is true!
11431155 reorder_filters : true ,
1156+ force_filter_selections : false ,
11441157 enable_page_index : false ,
11451158 enable_bloom_filter : false ,
11461159 schema_adapter_factory : Arc :: new ( DefaultSchemaAdapterFactory ) ,
@@ -1233,6 +1246,7 @@ mod test {
12331246 ) ) ] ,
12341247 pushdown_filters : false , // note that this is false!
12351248 reorder_filters : false ,
1249+ force_filter_selections : false ,
12361250 enable_page_index : false ,
12371251 enable_bloom_filter : false ,
12381252 schema_adapter_factory : Arc :: new ( DefaultSchemaAdapterFactory ) ,
@@ -1383,6 +1397,7 @@ mod test {
13831397 partition_fields : vec ! [ ] ,
13841398 pushdown_filters : true ,
13851399 reorder_filters : false ,
1400+ force_filter_selections : false ,
13861401 enable_page_index : false ,
13871402 enable_bloom_filter : false ,
13881403 schema_adapter_factory : Arc :: new ( CustomSchemaAdapterFactory ) ,
0 commit comments