From 313c3b766dbefcae132c27dc8ba0bd7c4fe4e332 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Mon, 10 Mar 2025 08:43:52 -0600 Subject: [PATCH] fix: handle proj:geometry too --- crates/core/data/proj-geometry.json | 219 ++++++++++++++++++++++++++++ crates/core/src/geoarrow/mod.rs | 109 ++++++++------ crates/duckdb/src/lib.rs | 4 +- 3 files changed, 283 insertions(+), 49 deletions(-) create mode 100644 crates/core/data/proj-geometry.json diff --git a/crates/core/data/proj-geometry.json b/crates/core/data/proj-geometry.json new file mode 100644 index 00000000..eeb39f6e --- /dev/null +++ b/crates/core/data/proj-geometry.json @@ -0,0 +1,219 @@ +{ + "id": "13_031131113310_10200100BCB1A500", + "bbox": [ + -105.11886301297739, + 39.921164, + -105.08955732614571, + 39.93148147273625 + ], + "type": "Feature", + "links": [ + { + "rel": "collection", + "type": "application/json", + "href": "https://stac.eoapi.dev/collections/MAXAR_Marshall_Fire_21_Update" + }, + { + "rel": "parent", + "type": "application/json", + "href": "https://stac.eoapi.dev/collections/MAXAR_Marshall_Fire_21_Update" + }, + { + "rel": "root", + "type": "application/json", + "href": "https://stac.eoapi.dev/" + }, + { + "rel": "self", + "type": "application/geo+json", + "href": "https://stac.eoapi.dev/collections/MAXAR_Marshall_Fire_21_Update/items/13_031131113310_10200100BCB1A500" + } + ], + "assets": { + "visual": { + "href": "s3://maxar-opendata/events/Marshall-Fire-21-Update/ard/13/031131113310/2021-12-30/10200100BCB1A500-visual.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "roles": [ + "visual" + ], + "title": "Visual Image", + "eo:bands": [ + { + "name": "BAND_P_V", + "description": "Pan Visual" + } + ], + "alternate": { + "public": { + "href": "https://maxar-opendata.s3.amazonaws.com/events/Marshall-Fire-21-Update/ard/13/031131113310/2021-12-30/10200100BCB1A500-visual.tif", + "title": "Public Access" + } + }, + "proj:bbox": [ + 489843.75, + 4414843.75, + 495156.25, + 4420156.25 + ], + "proj:shape": [ + 17408, + 17408 + ], + "proj:transform": [ + 0.30517578125, + 0.0, + 489843.75, + 0.0, + -0.30517578125, + 4420156.25, + 0.0, + 0.0, + 1.0 + ] + }, + "data-mask": { + "href": "s3://maxar-opendata/events/Marshall-Fire-21-Update/ard/13/031131113310/2021-12-30/10200100BCB1A500-data-mask.gpkg", + "type": "application/geopackage+sqlite3", + "roles": [ + "data-mask" + ], + "title": "Data Mask", + "alternate": { + "public": { + "href": "https://maxar-opendata.s3.amazonaws.com/events/Marshall-Fire-21-Update/ard/13/031131113310/2021-12-30/10200100BCB1A500-data-mask.gpkg", + "title": "Public Access" + } + } + }, + "pan_analytic": { + "href": "s3://maxar-opendata/events/Marshall-Fire-21-Update/ard/13/031131113310/2021-12-30/10200100BCB1A500-pan.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "roles": [ + "data" + ], + "title": "Panchromatic Image", + "eo:bands": [ + { + "name": "BAND_P", + "description": "Pan" + } + ], + "alternate": { + "public": { + "href": "https://maxar-opendata.s3.amazonaws.com/events/Marshall-Fire-21-Update/ard/13/031131113310/2021-12-30/10200100BCB1A500-pan.tif", + "title": "Public Access" + } + }, + "proj:bbox": [ + 489843.75, + 4414843.75, + 495156.25, + 4420156.25 + ], + "proj:shape": [ + 9963, + 9963 + ], + "proj:transform": [ + 0.5332229248218409, + 0.0, + 489843.75, + 0.0, + -0.5332229248218409, + 4420156.25, + 0.0, + 0.0, + 1.0 + ] + } + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -105.11886301297739, + 39.931455438601695 + ], + [ + -105.08955732614571, + 39.93148147273625 + ], + [ + -105.089584, + 39.921164 + ], + [ + -105.1188435853324, + 39.922036287398555 + ], + [ + -105.11886301297739, + 39.931455438601695 + ] + ] + ] + }, + "collection": "MAXAR_Marshall_Fire_21_Update", + "properties": { + "gsd": 0.54, + "quadkey": "031131113310", + "datetime": "2021-12-30T21:00:35Z", + "platform": "WV01", + "utm_zone": 13, + "grid:code": "MXRA-Z13-031131113310", + "proj:bbox": [ + 489843.75, + 4419011.094275919, + 492347.77631417435, + 4420156.25 + ], + "proj:epsg": 32613, + "catalog_id": "10200100BCB1A500", + "view:azimuth": 307.1, + "proj:geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 489843.75, + 4420156.25 + ], + [ + 492347.77631417435, + 4420156.25 + ], + [ + 492344.3494406195, + 4419011.094275919 + ], + [ + 489844.01777441875, + 4419113.00086445 + ], + [ + 489843.75, + 4420156.25 + ] + ] + ] + }, + "tile:data_area": 2.7, + "view:off_nadir": 17.7, + "tile:clouds_area": 0.0, + "view:sun_azimuth": 208.9, + "view:sun_elevation": 21.5, + "tile:clouds_percent": 0, + "ard_metadata_version": "0.0.1", + "view:incidence_angle": 70.9 + }, + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/view/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.0.0/schema.json", + "https://stac-extensions.github.io/eo/v1.0.0/schema.json", + "https://stac-extensions.github.io/raster/v1.1.0/schema.json", + "https://stac-extensions.github.io/grid/v1.0.0/schema.json", + "https://stac-extensions.github.io/alternate-assets/v1.1.0/schema.json" + ] +} \ No newline at end of file diff --git a/crates/core/src/geoarrow/mod.rs b/crates/core/src/geoarrow/mod.rs index 021b749b..d1de5ea4 100644 --- a/crates/core/src/geoarrow/mod.rs +++ b/crates/core/src/geoarrow/mod.rs @@ -29,6 +29,8 @@ const DATETIME_COLUMNS: [&str; 8] = [ "unpublished", ]; +const GEOMETRY_COLUMNS: [&str; 2] = ["geometry", "proj:geometry"]; + /// Converts an [ItemCollection] to a [Table]. /// /// Any invalid attributes in the items (e.g. top-level attributes that conflict @@ -146,62 +148,65 @@ pub fn from_table(table: Table) -> Result { } /// Converts a geometry column to geoarrow native type. -pub fn with_native_geometry( - mut record_batch: RecordBatch, - column_name: &str, -) -> Result { - if let Some((index, _)) = record_batch.schema().column_with_name(column_name) { - let geometry_column = record_batch.remove_column(index); - let binary_array: GenericByteArray> = - geometry_column.as_binary::().clone(); - let wkb_array = WKBArray::new(binary_array, Default::default()); - let geometry_array = geoarrow::io::wkb::from_wkb( - &wkb_array, - NativeType::Geometry(CoordType::Interleaved), - false, - )?; - let mut columns = record_batch.columns().to_vec(); - let mut schema_builder = SchemaBuilder::from(&*record_batch.schema()); - schema_builder.push(geometry_array.extension_field()); - let schema = schema_builder.finish(); - columns.push(geometry_array.to_array_ref()); - record_batch = RecordBatch::try_new(schema.into(), columns)?; +pub fn with_native_geometries(mut record_batch: RecordBatch) -> Result { + for column_name in GEOMETRY_COLUMNS { + if let Some((index, _)) = record_batch.schema().column_with_name(column_name) { + let geometry_column = record_batch.remove_column(index); + let binary_array: GenericByteArray> = + geometry_column.as_binary::().clone(); + let wkb_array = WKBArray::new(binary_array, Default::default()); + let geometry_array = geoarrow::io::wkb::from_wkb( + &wkb_array, + NativeType::Geometry(CoordType::Interleaved), + false, + )?; + let mut columns = record_batch.columns().to_vec(); + let mut schema_builder = SchemaBuilder::from(&*record_batch.schema()); + schema_builder.push(geometry_array.extension_field()); + let schema = schema_builder.finish(); + columns.push(geometry_array.to_array_ref()); + record_batch = RecordBatch::try_new(schema.into(), columns)?; + } } Ok(record_batch) } /// Converts a geometry column to geoarrow.wkb. -pub fn with_wkb_geometry(mut record_batch: RecordBatch, column_name: &str) -> Result { - if let Some((index, field)) = record_batch.schema().column_with_name(column_name) { - let geometry_column = record_batch.remove_column(index); - let wkb_array = geoarrow::io::wkb::to_wkb::(&NativeArrayDyn::from_arrow_array( - &geometry_column, - field, - )?); - let mut columns = record_batch.columns().to_vec(); - let mut schema_builder = SchemaBuilder::from(&*record_batch.schema()); - schema_builder.push(wkb_array.extension_field()); - let schema = schema_builder.finish(); - columns.push(wkb_array.to_array_ref()); - record_batch = RecordBatch::try_new(schema.into(), columns)?; +pub fn with_wkb_geometries(mut record_batch: RecordBatch) -> Result { + for column_name in GEOMETRY_COLUMNS { + if let Some((index, field)) = record_batch.schema().column_with_name(column_name) { + let geometry_column = record_batch.remove_column(index); + let wkb_array = geoarrow::io::wkb::to_wkb::(&NativeArrayDyn::from_arrow_array( + &geometry_column, + field, + )?); + let mut columns = record_batch.columns().to_vec(); + let mut schema_builder = SchemaBuilder::from(&*record_batch.schema()); + schema_builder.push(wkb_array.extension_field()); + let schema = schema_builder.finish(); + columns.push(wkb_array.to_array_ref()); + record_batch = RecordBatch::try_new(schema.into(), columns)?; + } } Ok(record_batch) } /// Adds geoarrow wkb metadata to a geometry column. -pub fn add_wkb_metadata(mut record_batch: RecordBatch, column_name: &str) -> Result { - if let Some((index, field)) = record_batch.schema().column_with_name(column_name) { - let mut metadata = field.metadata().clone(); - let _ = metadata.insert( - "ARROW:extension:name".to_string(), - "geoarrow.wkb".to_string(), - ); - let field = field.clone().with_metadata(metadata); - let mut schema_builder = SchemaBuilder::from(&*record_batch.schema()); - let field_ref = schema_builder.field_mut(index); - *field_ref = field.into(); - let schema = schema_builder.finish(); - record_batch = record_batch.with_schema(schema.into())?; +pub fn add_wkb_metadata(mut record_batch: RecordBatch) -> Result { + for column_name in GEOMETRY_COLUMNS { + if let Some((index, field)) = record_batch.schema().column_with_name(column_name) { + let mut metadata = field.metadata().clone(); + let _ = metadata.insert( + "ARROW:extension:name".to_string(), + "geoarrow.wkb".to_string(), + ); + let field = field.clone().with_metadata(metadata); + let mut schema_builder = SchemaBuilder::from(&*record_batch.schema()); + let field_ref = schema_builder.field_mut(index); + *field_ref = field.into(); + let schema = schema_builder.finish(); + record_batch = record_batch.with_schema(schema.into())?; + } } Ok(record_batch) } @@ -253,6 +258,16 @@ mod tests { let (mut record_batches, _) = table.into_inner(); assert_eq!(record_batches.len(), 1); let record_batch = record_batches.pop().unwrap(); - let _ = super::with_wkb_geometry(record_batch, "geometry").unwrap(); + let _ = super::with_wkb_geometries(record_batch).unwrap(); + } + + #[test] + fn with_wkb_geometries() { + let item: Item = crate::read("examples/proj-geometry.json").unwrap(); + let table = super::to_table(vec![item]).unwrap(); + let (mut record_batches, _) = table.into_inner(); + assert_eq!(record_batches.len(), 1); + let record_batch = record_batches.pop().unwrap(); + let _ = super::with_wkb_geometries(record_batch).unwrap(); } } diff --git a/crates/duckdb/src/lib.rs b/crates/duckdb/src/lib.rs index 25d6684c..51247c4b 100644 --- a/crates/duckdb/src/lib.rs +++ b/crates/duckdb/src/lib.rs @@ -342,9 +342,9 @@ impl Client { .query_arrow(duckdb::params_from_iter(query.params))? .map(|record_batch| { let record_batch = if self.config.convert_wkb { - stac::geoarrow::with_native_geometry(record_batch, "geometry")? + stac::geoarrow::with_native_geometries(record_batch)? } else { - stac::geoarrow::add_wkb_metadata(record_batch, "geometry")? + stac::geoarrow::add_wkb_metadata(record_batch)? }; Ok(record_batch) })