diff --git a/crates/polyglot-sql/src/builder.rs b/crates/polyglot-sql/src/builder.rs index ab3f815..8c64335 100644 --- a/crates/polyglot-sql/src/builder.rs +++ b/crates/polyglot-sql/src/builder.rs @@ -1617,6 +1617,8 @@ impl SelectBuilder { copy_grants: false, using_template: None, rollup: None, + with_partition_columns: vec![], + with_connection: None, })) } diff --git a/crates/polyglot-sql/src/dialects/mod.rs b/crates/polyglot-sql/src/dialects/mod.rs index 1ffe050..64b0188 100644 --- a/crates/polyglot-sql/src/dialects/mod.rs +++ b/crates/polyglot-sql/src/dialects/mod.rs @@ -26107,6 +26107,8 @@ impl Dialect { copy_grants: false, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }; return Expression::CreateTable(Box::new(ct)); } diff --git a/crates/polyglot-sql/src/expressions.rs b/crates/polyglot-sql/src/expressions.rs index 950ed95..5b76fc3 100644 --- a/crates/polyglot-sql/src/expressions.rs +++ b/crates/polyglot-sql/src/expressions.rs @@ -5841,6 +5841,15 @@ pub struct CreateTable { /// StarRocks: ROLLUP (r1(col1, col2), r2(col1)) #[serde(default, skip_serializing_if = "Option::is_none")] pub rollup: Option, + /// WITH PARTITION COLUMNS (col_name col_type, ...) — currently used by BigQuery + /// for hive-partitioned external tables. Not dialect-prefixed since the syntax + /// could appear in other engines. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub with_partition_columns: Vec, + /// WITH CONNECTION `project.region.connection` — currently used by BigQuery + /// for external tables that reference a Cloud Resource connection. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub with_connection: Option, } /// Teradata index specification for CREATE TABLE @@ -5907,6 +5916,8 @@ impl CreateTable { copy_grants: false, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, } } } diff --git a/crates/polyglot-sql/src/generator.rs b/crates/polyglot-sql/src/generator.rs index 2ab3033..219fa65 100644 --- a/crates/polyglot-sql/src/generator.rs +++ b/crates/polyglot-sql/src/generator.rs @@ -7493,6 +7493,51 @@ impl Generator { self.generate_expression(&on_prop.this)?; } + // BigQuery: WITH PARTITION COLUMNS (col_name col_type, ...) + if !ct.with_partition_columns.is_empty() { + if self.config.pretty { + self.write_newline(); + } else { + self.write_space(); + } + self.write_keyword("WITH PARTITION COLUMNS"); + self.write(" ("); + if self.config.pretty { + self.write_newline(); + self.indent_level += 1; + for (i, col) in ct.with_partition_columns.iter().enumerate() { + if i > 0 { + self.write(","); + self.write_newline(); + } + self.write_indent(); + self.generate_column_def(col)?; + } + self.indent_level -= 1; + self.write_newline(); + } else { + for (i, col) in ct.with_partition_columns.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.generate_column_def(col)?; + } + } + self.write(")"); + } + + // BigQuery: WITH CONNECTION `project.region.connection` + if let Some(ref conn) = ct.with_connection { + if self.config.pretty { + self.write_newline(); + } else { + self.write_space(); + } + self.write_keyword("WITH CONNECTION"); + self.write_space(); + self.generate_table(conn)?; + } + // Output SchemaCommentProperty BEFORE WITH properties (Presto/Hive/Spark style) // For ClickHouse, SchemaCommentProperty goes after AS SELECT, handled later if !is_clickhouse { diff --git a/crates/polyglot-sql/src/parser.rs b/crates/polyglot-sql/src/parser.rs index b5cd1f7..f156eb5 100644 --- a/crates/polyglot-sql/src/parser.rs +++ b/crates/polyglot-sql/src/parser.rs @@ -10950,10 +10950,123 @@ impl Parser { copy_grants: false, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } - // Handle WITH properties before columns/AS (e.g., CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1) + // Handle WITH properties before columns/AS. + // BigQuery EXTERNAL tables use WITH differently (WITH PARTITION COLUMNS, + // WITH CONNECTION), so handle that complete flow here and early-return. + let is_bigquery_external = is_special_modifier + && table_modifier == Some("EXTERNAL") + && matches!( + self.config.dialect, + Some(crate::dialects::DialectType::BigQuery) + ); + + if is_bigquery_external { + // BigQuery: CREATE EXTERNAL TABLE [IF NOT EXISTS] name + // [(col_name col_type, ...)] + // [WITH PARTITION COLUMNS (col_name col_type, ...)] + // [WITH CONNECTION `project.region.connection`] + // OPTIONS (key = value, ...) + + // Parse optional column definitions + let (columns, constraints) = if self.check(TokenType::LParen) { + self.advance(); // consume ( + let result = self.parse_column_definitions()?; + self.expect(TokenType::RParen)?; + result + } else { + (Vec::new(), Vec::new()) + }; + + let mut with_partition_columns = Vec::new(); + let mut with_connection = None; + let mut properties = Vec::new(); + + // Parse WITH PARTITION COLUMNS / WITH CONNECTION in any order. + // Note: duplicate WITH PARTITION COLUMNS clauses silently overwrite; + // this isn't a real-world concern so we don't error on it. + while self.check(TokenType::With) { + let save = self.current; + self.advance(); // consume WITH + + if self.check(TokenType::Partition) { + // WITH PARTITION COLUMNS (col_name col_type, ...) + self.advance(); // consume PARTITION + if !self.match_identifier("COLUMNS") { + return Err(self.parse_error( + "Expected COLUMNS after WITH PARTITION", + )); + } + if self.check(TokenType::LParen) { + self.advance(); // consume ( + let (part_cols, _) = self.parse_column_definitions()?; + self.expect(TokenType::RParen)?; + with_partition_columns = part_cols; + } + } else if self.match_identifier("CONNECTION") { + // WITH CONNECTION `project.region.connection` + with_connection = Some(self.parse_table_ref()?); + } else { + // Not a BQ clause - revert and break + self.current = save; + break; + } + } + + // Parse OPTIONS (...) + if let Some(opts) = self.parse_bigquery_options_property()? { + properties.push(opts); + } + + return Ok(Expression::CreateTable(Box::new(CreateTable { + name, + on_cluster: on_cluster.clone(), + columns, + constraints, + if_not_exists, + temporary, + or_replace, + table_modifier: Some("EXTERNAL".to_string()), + // BigQuery EXTERNAL tables don't support AS SELECT + as_select: None, + as_select_parenthesized: false, + // Not applicable to BigQuery — Snowflake/Teradata/other-dialect features + on_commit: None, + clone_source: None, + clone_at_clause: None, + shallow_clone: false, + is_copy: false, + leading_comments, + // BigQuery EXTERNAL uses OPTIONS(), not generic WITH (key=value) properties + with_properties: Vec::new(), + teradata_post_name_options: teradata_post_name_options.clone(), + with_data: None, + with_statistics: None, + teradata_indexes: Vec::new(), + with_cte: None, + // BigQuery OPTIONS are stored here via parse_bigquery_options_property() + properties, + partition_of: None, + post_table_properties: Vec::new(), + mysql_table_options: Vec::new(), + inherits: Vec::new(), + on_property: None, + // COPY GRANTS is a Snowflake feature, not applicable to BigQuery + copy_grants: false, + // USING TEMPLATE is a Snowflake feature, not applicable to BigQuery + using_template: None, + rollup: None, + // BigQuery-specific fields parsed above + with_partition_columns, + with_connection, + }))); + } + + // Generic WITH properties (e.g., CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1) let with_properties = if self.match_token(TokenType::With) { self.parse_with_properties()? } else { @@ -11004,6 +11117,8 @@ impl Parser { copy_grants, using_template, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } @@ -11169,6 +11284,8 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } else { self.parse_table_ref()? @@ -11208,6 +11325,8 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } @@ -11312,6 +11431,8 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } @@ -11371,10 +11492,13 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } } + // For DYNAMIC/ICEBERG/EXTERNAL tables, columns might be optional (use AS SELECT or other syntax) // Check if we have a left paren for columns or if we're going straight to options if !self.check(TokenType::LParen) && is_special_modifier { @@ -11453,6 +11577,8 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } @@ -11495,6 +11621,8 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } @@ -11535,6 +11663,8 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } @@ -11591,6 +11721,8 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } @@ -11642,6 +11774,8 @@ impl Parser { copy_grants, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))); } } @@ -12703,6 +12837,8 @@ impl Parser { copy_grants, using_template: None, rollup, + with_partition_columns: Vec::new(), + with_connection: None, }))) } @@ -12850,6 +12986,8 @@ impl Parser { copy_grants: false, using_template: None, rollup: None, + with_partition_columns: Vec::new(), + with_connection: None, }))) } @@ -56127,6 +56265,338 @@ mod tests { assert_eq!(generated, expected); } + #[test] + fn test_bigquery_create_external_table_basic() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE t OPTIONS (format='CSV', uris=['gs://bucket/path/*'])"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!( + "Expected CreateTable, got {:?}", + std::mem::discriminant(other) + ), + }; + + assert_eq!( + create.table_modifier.as_deref(), + Some("EXTERNAL"), + "Expected EXTERNAL table modifier" + ); + assert!( + create + .properties + .iter() + .any(|p| matches!(p, Expression::OptionsProperty(_))), + "Expected typed OPTIONS property" + ); + } + + #[test] + fn test_bigquery_create_external_table_with_columns() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE t (id INT64, name STRING) OPTIONS (format='CSV')"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!( + "Expected CreateTable, got {:?}", + std::mem::discriminant(other) + ), + }; + + assert_eq!( + create.table_modifier.as_deref(), + Some("EXTERNAL"), + ); + assert_eq!(create.columns.len(), 2, "Expected 2 column definitions"); + assert_eq!(create.columns[0].name.name, "id"); + assert_eq!(create.columns[1].name.name, "name"); + } + + #[test] + fn test_bigquery_create_external_table_with_partition_columns() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE t WITH PARTITION COLUMNS (dt DATE, region STRING) OPTIONS (format='PARQUET', uris=['gs://bucket/*'])"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!( + "Expected CreateTable, got {:?}", + std::mem::discriminant(other) + ), + }; + + assert_eq!( + create.with_partition_columns.len(), + 2, + "Expected 2 partition columns" + ); + assert_eq!(create.with_partition_columns[0].name.name, "dt"); + assert_eq!(create.with_partition_columns[1].name.name, "region"); + } + + #[test] + fn test_bigquery_create_external_table_with_connection() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE t WITH CONNECTION `project.us.my_connection` OPTIONS (format='CSV')"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!( + "Expected CreateTable, got {:?}", + std::mem::discriminant(other) + ), + }; + + assert!( + create.with_connection.is_some(), + "Expected WITH CONNECTION to be set" + ); + } + + #[test] + fn test_bigquery_create_external_table_full() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE `project.dataset.my_table` WITH PARTITION COLUMNS (dt DATE) WITH CONNECTION `project.us.my_conn` OPTIONS (format='PARQUET', uris=['gs://bucket/path/*'])"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!( + "Expected CreateTable, got {:?}", + std::mem::discriminant(other) + ), + }; + + assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL")); + assert_eq!(create.with_partition_columns.len(), 1); + assert!(create.with_connection.is_some()); + assert!( + create + .properties + .iter() + .any(|p| matches!(p, Expression::OptionsProperty(_))), + ); + } + + #[test] + fn test_bigquery_create_external_table_roundtrip() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE t OPTIONS (format='CSV', uris=['gs://bucket/*'])"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap(); + assert_eq!(generated, sql); + } + + #[test] + fn test_bigquery_create_external_table_with_partition_columns_roundtrip() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE t WITH PARTITION COLUMNS (dt DATE, region STRING) OPTIONS (format='PARQUET')"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap(); + assert_eq!(generated, sql); + } + + #[test] + fn test_bigquery_create_external_table_with_connection_roundtrip() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE t WITH CONNECTION `project.us.my_connection` OPTIONS (format='CSV')"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap(); + assert_eq!(generated, sql); + } + + #[test] + fn test_bigquery_create_external_table_full_roundtrip() { + use crate::DialectType; + + let sql = "CREATE EXTERNAL TABLE `project.dataset.my_table` WITH PARTITION COLUMNS (dt DATE) WITH CONNECTION `project.us.conn` OPTIONS (format='PARQUET', uris=['gs://bucket/*'])"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap(); + assert_eq!(generated, sql); + } + + // === BigQuery WITH syntax compatibility tests === + // These verify that the is_bigquery_external guard does NOT break other BigQuery syntaxes. + + #[test] + fn test_bigquery_create_table_as_select_no_with() { + use crate::DialectType; + let sql = "CREATE TABLE my_table AS SELECT 1 AS id, 'hello' AS name"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap(); + assert_eq!(generated, sql); + } + + #[test] + fn test_bigquery_create_table_with_cte_in_as_select() { + use crate::DialectType; + // CTE (WITH ... AS) inside the AS SELECT clause — the WITH is part of the query, not table properties + let sql = "CREATE TABLE my_table AS WITH cte AS (SELECT 1 AS id) SELECT * FROM cte"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.as_select.is_some(), "Expected AS SELECT with CTE"); + assert!(create.table_modifier.is_none(), "Should NOT have EXTERNAL modifier"); + } + + #[test] + fn test_bigquery_create_table_with_multiple_ctes() { + use crate::DialectType; + let sql = "CREATE TABLE result AS WITH cte1 AS (SELECT 1 AS a), cte2 AS (SELECT 2 AS b) SELECT * FROM cte1 CROSS JOIN cte2"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.as_select.is_some(), "Expected AS SELECT with multiple CTEs"); + } + + #[test] + fn test_bigquery_create_table_partition_cluster_options_roundtrip() { + use crate::DialectType; + let sql = "CREATE TABLE t1 PARTITION BY dt CLUSTER BY region OPTIONS (description='partitioned') AS SELECT CURRENT_DATE() AS dt, 'us' AS region"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.table_modifier.is_none()); + assert!( + create.properties.iter().any(|p| matches!(p, Expression::PartitionByProperty(_))), + "Expected PARTITION BY" + ); + assert!( + create.properties.iter().any(|p| matches!(p, Expression::ClusterByColumnsProperty(_))), + "Expected CLUSTER BY" + ); + assert!( + create.properties.iter().any(|p| matches!(p, Expression::OptionsProperty(_))), + "Expected OPTIONS" + ); + } + + #[test] + fn test_bigquery_create_or_replace_table_as_select() { + use crate::DialectType; + let sql = "CREATE OR REPLACE TABLE my_table AS SELECT 1 AS id"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.or_replace); + assert!(create.table_modifier.is_none()); + } + + #[test] + fn test_bigquery_create_table_if_not_exists_with_columns() { + use crate::DialectType; + let sql = "CREATE TABLE IF NOT EXISTS my_table (id INT64, name STRING)"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.if_not_exists); + assert_eq!(create.columns.len(), 2); + } + + #[test] + fn test_bigquery_external_table_if_not_exists() { + use crate::DialectType; + let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS ext_t OPTIONS (format='CSV')"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.if_not_exists); + assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL")); + } + + #[test] + fn test_bigquery_or_replace_external_table() { + use crate::DialectType; + let sql = "CREATE OR REPLACE EXTERNAL TABLE ext_t OPTIONS (format='CSV')"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.or_replace); + assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL")); + } + + #[test] + fn test_bigquery_external_table_pr_description_sql() { + use crate::DialectType; + // Exact SQL from the PR description — the query that was failing before this fix + let sql = r#"CREATE EXTERNAL TABLE IF NOT EXISTS `my_project.my_dataset.my_table` +WITH PARTITION COLUMNS ( + table_name STRING, + sync_date DATE, + start_date DATE, + end_date DATE, + sync_id STRING +) +OPTIONS ( + format = 'PARQUET', + uris = ['gs://my-bucket/data/table_name=my_table/*'], + hive_partition_uri_prefix = 'gs://my-bucket/data', + require_hive_partition_filter = false +)"#; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.if_not_exists); + assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL")); + assert_eq!(create.with_partition_columns.len(), 5, "Expected 5 partition columns"); + assert!(!create.properties.is_empty(), "Expected OPTIONS properties"); + } + + #[test] + fn test_bigquery_create_external_table_reversed_clauses() { + use crate::DialectType; + let sql = "CREATE EXTERNAL TABLE t WITH CONNECTION `project.us.my_conn` WITH PARTITION COLUMNS (dt DATE) OPTIONS (format='PARQUET', uris=['gs://bucket/*'])"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.with_connection.is_some(), "Expected WITH CONNECTION"); + assert_eq!(create.with_partition_columns.len(), 1, "Expected 1 partition column"); + // Roundtrip: generator always emits PARTITION COLUMNS before CONNECTION + let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap(); + let expected = "CREATE EXTERNAL TABLE t WITH PARTITION COLUMNS (dt DATE) WITH CONNECTION `project.us.my_conn` OPTIONS (format='PARQUET', uris=['gs://bucket/*'])"; + assert_eq!(generated, expected); + } + + #[test] + fn test_bigquery_create_external_table_bare_partition_columns() { + use crate::DialectType; + let sql = "CREATE EXTERNAL TABLE t WITH PARTITION COLUMNS OPTIONS (format='PARQUET', uris=['gs://bucket/*'])"; + let parsed = crate::parse(sql, DialectType::BigQuery).unwrap(); + let create = match &parsed[0] { + Expression::CreateTable(ct) => ct, + other => panic!("Expected CreateTable, got {:?}", std::mem::discriminant(other)), + }; + assert!(create.with_partition_columns.is_empty(), "Bare WITH PARTITION COLUMNS should produce empty partition column list"); + assert!(!create.properties.is_empty(), "Expected OPTIONS"); + } + #[test] fn test_parse_drop_table() { let result = Parser::parse_sql("DROP TABLE IF EXISTS users CASCADE").unwrap();