@@ -111,16 +111,21 @@ def read_parquet(
111
111
engine_kwargs = engine_kwargs or {}
112
112
filesystem = validate_coerce_filesystem (path , filesystem , storage_options )
113
113
114
- # Load pandas parquet metadata
115
- metadata = _load_parquet_pandas_metadata (
114
+ # Load using pyarrow to handle parquet files and directories across filesystems
115
+ dataset = pq . ParquetDataset (
116
116
path ,
117
117
filesystem = filesystem ,
118
- storage_options = storage_options ,
119
- engine_kwargs = engine_kwargs ,
118
+ #validate_schema=False,
119
+ use_legacy_dataset = False ,
120
+ ** engine_kwargs ,
121
+ ** kwargs ,
120
122
)
121
123
124
+ metadata = dataset .schema .pandas_metadata
125
+
122
126
# If columns specified, prepend index columns to it
123
127
if columns is not None :
128
+ all_columns = set (column ['name' ] for column in metadata .get ('columns' , []))
124
129
index_col_metadata = metadata .get ('index_columns' , [])
125
130
extra_index_columns = []
126
131
for idx_metadata in index_col_metadata :
@@ -130,20 +135,12 @@ def read_parquet(
130
135
name = idx_metadata .get ('name' , None )
131
136
else :
132
137
name = None
133
-
134
- if name is not None and name not in columns :
138
+ if name is not None and name not in columns and name in all_columns :
135
139
extra_index_columns .append (name )
136
140
137
141
columns = extra_index_columns + list (columns )
138
142
139
- # Load using pyarrow to handle parquet files and directories across filesystems
140
- df = pq .ParquetDataset (
141
- path ,
142
- filesystem = filesystem ,
143
- validate_schema = False ,
144
- ** engine_kwargs ,
145
- ** kwargs ,
146
- ).read (columns = columns ).to_pandas ()
143
+ df = dataset .read (columns = columns ).to_pandas ()
147
144
148
145
# Return result
149
146
return GeoDataFrame (df )
0 commit comments