Skip to content

Commit d102aa0

Browse files
authored
Remove all use of parquet's validate_schema (#110)
1 parent 64a573c commit d102aa0

File tree

1 file changed

+11
-14
lines changed

1 file changed

+11
-14
lines changed

spatialpandas/io/parquet.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -111,16 +111,21 @@ def read_parquet(
111111
engine_kwargs = engine_kwargs or {}
112112
filesystem = validate_coerce_filesystem(path, filesystem, storage_options)
113113

114-
# Load pandas parquet metadata
115-
metadata = _load_parquet_pandas_metadata(
114+
# Load using pyarrow to handle parquet files and directories across filesystems
115+
dataset = pq.ParquetDataset(
116116
path,
117117
filesystem=filesystem,
118-
storage_options=storage_options,
119-
engine_kwargs=engine_kwargs,
118+
#validate_schema=False,
119+
use_legacy_dataset=False,
120+
**engine_kwargs,
121+
**kwargs,
120122
)
121123

124+
metadata = dataset.schema.pandas_metadata
125+
122126
# If columns specified, prepend index columns to it
123127
if columns is not None:
128+
all_columns = set(column['name'] for column in metadata.get('columns', []))
124129
index_col_metadata = metadata.get('index_columns', [])
125130
extra_index_columns = []
126131
for idx_metadata in index_col_metadata:
@@ -130,20 +135,12 @@ def read_parquet(
130135
name = idx_metadata.get('name', None)
131136
else:
132137
name = None
133-
134-
if name is not None and name not in columns:
138+
if name is not None and name not in columns and name in all_columns:
135139
extra_index_columns.append(name)
136140

137141
columns = extra_index_columns + list(columns)
138142

139-
# Load using pyarrow to handle parquet files and directories across filesystems
140-
df = pq.ParquetDataset(
141-
path,
142-
filesystem=filesystem,
143-
validate_schema=False,
144-
**engine_kwargs,
145-
**kwargs,
146-
).read(columns=columns).to_pandas()
143+
df = dataset.read(columns=columns).to_pandas()
147144

148145
# Return result
149146
return GeoDataFrame(df)

0 commit comments

Comments
 (0)