diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 8c23b9b1e4..34b951320a 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -371,6 +371,7 @@ class BigQuery(Dialect): EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True QUERY_RESULTS_ARE_STRUCTS = True JSON_EXTRACT_SCALAR_SCALAR_ONLY = True + DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-' diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index f1ada85545..4dc15d99f1 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -703,6 +703,13 @@ class Dialect(metaclass=_Dialect): so we map the ExplodingGenerateSeries expression to "generate_series" string. """ + DEFAULT_NULL_TYPE = exp.DataType.Type.UNKNOWN + """ + The default type of NULL for producing the correct projection type. + + For example, in BigQuery the default type of the NULL value is INT64. + """ + # --- Autofilled --- tokenizer_class = Tokenizer diff --git a/sqlglot/optimizer/annotate_types.py b/sqlglot/optimizer/annotate_types.py index 950ea9acb1..39020155fb 100644 --- a/sqlglot/optimizer/annotate_types.py +++ b/sqlglot/optimizer/annotate_types.py @@ -261,15 +261,16 @@ def annotate(self, expression: E, annotate_scope: bool = True) -> E: # This takes care of non-traversable expressions self._annotate_expression(expression) - # Replace NULL type with UNKNOWN, since the former is not an actual type; + # Replace NULL type with the default type of the targeted dialect, since the former is not an actual type; # it is mostly used to aid type coercion, e.g. in query set operations. for expr in self._null_expressions.values(): - expr.type = exp.DataType.Type.UNKNOWN + expr.type = self.dialect.DEFAULT_NULL_TYPE return expression def annotate_scope(self, scope: Scope) -> None: selects = {} + for name, source in scope.sources.items(): if not isinstance(source, Scope): continue diff --git a/tests/fixtures/optimizer/annotate_types.sql b/tests/fixtures/optimizer/annotate_types.sql index a9fde2beea..6227f6f98a 100644 --- a/tests/fixtures/optimizer/annotate_types.sql +++ b/tests/fixtures/optimizer/annotate_types.sql @@ -122,3 +122,7 @@ DATETIME; # dialect: bigquery CASE WHEN TRUE THEN TIMESTAMP '2020-02-02 00:00:00' ELSE '2010-01-01' END; TIMESTAMP; + +# dialect: bigquery +NULL; +INT64; diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 05d0f79f6d..a1f6cbd445 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -1937,3 +1937,15 @@ def test_deep_ast_type_annotation(self): annotated = annotate_types(parse_one(binary_sql), schema={"t": {"a": "INT"}}) self.assertEqual(annotated.sql(), binary_sql) self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.INT) + + def test_null_coerce_annotation(self): + null_sql = "SELECT t.foo FROM (SELECT CAST(1 AS BIGDECIMAL) AS foo UNION ALL SELECT NULL AS foo) AS t" + annotated = parse_and_optimize(annotate_types, null_sql, "bigquery", dialect="bigquery") + + self.assertEqual(annotated.sql(), null_sql) + self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.BIGDECIMAL) + + null_sql = "SELECT t.foo FROM (SELECT NULL AS foo UNION ALL SELECT CAST(1 AS BIGDECIMAL) AS foo) AS t" + annotated = parse_and_optimize(annotate_types, null_sql, "bigquery", dialect="bigquery") + self.assertEqual(annotated.sql(), null_sql) + self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.BIGDECIMAL)