Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sqlglot/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ class BigQuery(Dialect):
EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True
QUERY_RESULTS_ARE_STRUCTS = True
JSON_EXTRACT_SCALAR_SCALAR_ONLY = True
DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT

# https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap
INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-'
Expand Down
7 changes: 7 additions & 0 deletions sqlglot/dialects/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,13 @@ class Dialect(metaclass=_Dialect):
so we map the ExplodingGenerateSeries expression to "generate_series" string.
"""

DEFAULT_NULL_TYPE = exp.DataType.Type.UNKNOWN
"""
The default type of NULL for producing the correct projection type.

For example, in BigQuery the default type of the NULL value is INT64.
"""

# --- Autofilled ---

tokenizer_class = Tokenizer
Expand Down
5 changes: 3 additions & 2 deletions sqlglot/optimizer/annotate_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,15 +261,16 @@ def annotate(self, expression: E, annotate_scope: bool = True) -> E:
# This takes care of non-traversable expressions
self._annotate_expression(expression)

# Replace NULL type with UNKNOWN, since the former is not an actual type;
# Replace NULL type with the default type of the targeted dialect, since the former is not an actual type;
# it is mostly used to aid type coercion, e.g. in query set operations.
for expr in self._null_expressions.values():
expr.type = exp.DataType.Type.UNKNOWN
expr.type = self.dialect.DEFAULT_NULL_TYPE

return expression

def annotate_scope(self, scope: Scope) -> None:
selects = {}

for name, source in scope.sources.items():
if not isinstance(source, Scope):
continue
Expand Down
4 changes: 4 additions & 0 deletions tests/fixtures/optimizer/annotate_types.sql
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,7 @@ DATETIME;
# dialect: bigquery
CASE WHEN TRUE THEN TIMESTAMP '2020-02-02 00:00:00' ELSE '2010-01-01' END;
TIMESTAMP;

# dialect: bigquery
NULL;
INT64;
12 changes: 12 additions & 0 deletions tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1937,3 +1937,15 @@ def test_deep_ast_type_annotation(self):
annotated = annotate_types(parse_one(binary_sql), schema={"t": {"a": "INT"}})
self.assertEqual(annotated.sql(), binary_sql)
self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.INT)

def test_null_coerce_annotation(self):
null_sql = "SELECT t.foo FROM (SELECT CAST(1 AS BIGDECIMAL) AS foo UNION ALL SELECT NULL AS foo) AS t"
annotated = parse_and_optimize(annotate_types, null_sql, "bigquery", dialect="bigquery")

self.assertEqual(annotated.sql(), null_sql)
self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.BIGDECIMAL)

null_sql = "SELECT t.foo FROM (SELECT NULL AS foo UNION ALL SELECT CAST(1 AS BIGDECIMAL) AS foo) AS t"
annotated = parse_and_optimize(annotate_types, null_sql, "bigquery", dialect="bigquery")
self.assertEqual(annotated.sql(), null_sql)
self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.BIGDECIMAL)