Skip to content

Commit a7303e1

Browse files
committed
materialize-snowflake: use SQL nulls instead of JSON nulls for null variants
When querying data from staged JSON files, Snowflake interprets a present JSON `null` value as a JSON null rather than a SQL null, and we end up with values in variant columns that aren't SQL null, but are actually JSON null instead. The most expedient way to fix this it seems to is to do some casting in the queries that interact with the staged JSON files, replacing JSON nulls with SQL nulls where needed.
1 parent cec508d commit a7303e1

File tree

3 files changed

+11
-11
lines changed

3 files changed

+11
-11
lines changed

materialize-snowflake/.snapshots/TestSQLGeneration

+2-2
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ ALTER TABLE "a-schema".key_value ALTER COLUMN
9595
--- Begin "a-schema".key_value mergeInto ---
9696
MERGE INTO "a-schema".key_value AS l
9797
USING (
98-
SELECT $1[0] AS key1, $1[1] AS key2, $1[2] AS "key!binary", $1[3] AS array, $1[4] AS binary, $1[5] AS boolean, $1[6] AS flow_published_at, $1[7] AS integer, $1[8] AS integerGt64Bit, $1[9] AS integerWithUserDDL, $1[10] AS multiple, $1[11] AS number, $1[12] AS numberCastToString, $1[13] AS object, $1[14] AS string, $1[15] AS stringInteger, $1[16] AS stringInteger39Chars, $1[17] AS stringInteger66Chars, $1[18] AS stringNumber, $1[19] AS flow_document
98+
SELECT $1[0] AS key1, $1[1] AS key2, $1[2] AS "key!binary", NULLIF($1[3], PARSE_JSON('null')) AS array, $1[4] AS binary, $1[5] AS boolean, $1[6] AS flow_published_at, $1[7] AS integer, $1[8] AS integerGt64Bit, $1[9] AS integerWithUserDDL, NULLIF($1[10], PARSE_JSON('null')) AS multiple, $1[11] AS number, $1[12] AS numberCastToString, NULLIF($1[13], PARSE_JSON('null')) AS object, $1[14] AS string, $1[15] AS stringInteger, $1[16] AS stringInteger39Chars, $1[17] AS stringInteger66Chars, $1[18] AS stringNumber, $1[19] AS flow_document
9999
FROM test-file
100100
) AS r
101101
ON
@@ -125,7 +125,7 @@ SELECT 0, "a-schema".key_value.flow_document
125125
COPY INTO "a-schema".key_value (
126126
key1, key2, "key!binary", array, binary, boolean, flow_published_at, integer, integerGt64Bit, integerWithUserDDL, multiple, number, numberCastToString, object, string, stringInteger, stringInteger39Chars, stringInteger66Chars, stringNumber, flow_document
127127
) FROM (
128-
SELECT $1[0] AS key1, $1[1] AS key2, $1[2] AS "key!binary", $1[3] AS array, $1[4] AS binary, $1[5] AS boolean, $1[6] AS flow_published_at, $1[7] AS integer, $1[8] AS integerGt64Bit, $1[9] AS integerWithUserDDL, $1[10] AS multiple, $1[11] AS number, $1[12] AS numberCastToString, $1[13] AS object, $1[14] AS string, $1[15] AS stringInteger, $1[16] AS stringInteger39Chars, $1[17] AS stringInteger66Chars, $1[18] AS stringNumber, $1[19] AS flow_document
128+
SELECT $1[0] AS key1, $1[1] AS key2, $1[2] AS "key!binary", NULLIF($1[3], PARSE_JSON('null')) AS array, $1[4] AS binary, $1[5] AS boolean, $1[6] AS flow_published_at, $1[7] AS integer, $1[8] AS integerGt64Bit, $1[9] AS integerWithUserDDL, NULLIF($1[10], PARSE_JSON('null')) AS multiple, $1[11] AS number, $1[12] AS numberCastToString, NULLIF($1[13], PARSE_JSON('null')) AS object, $1[14] AS string, $1[15] AS stringInteger, $1[16] AS stringInteger39Chars, $1[17] AS stringInteger66Chars, $1[18] AS stringNumber, $1[19] AS flow_document
129129
FROM test-file
130130
);
131131
--- End "a-schema".key_value copyInto ---

materialize-snowflake/sqlgen.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ CREATE PIPE {{ template "pipe_name" . }}
249249
) FROM (
250250
SELECT {{ range $ind, $key := $.Table.Columns }}
251251
{{- if $ind }}, {{ end -}}
252-
$1[{{$ind}}] AS {{$key.Identifier -}}
252+
{{ if eq $key.DDL "VARIANT" }}NULLIF($1[{{$ind}}], PARSE_JSON('null')){{ else }}$1[{{$ind}}]{{ end }} AS {{$key.Identifier -}}
253253
{{- end }}
254254
FROM @flow_v1
255255
);
@@ -264,7 +264,7 @@ COPY INTO {{ $.Table.Identifier }} (
264264
) FROM (
265265
SELECT {{ range $ind, $key := $.Table.Columns }}
266266
{{- if $ind }}, {{ end -}}
267-
$1[{{$ind}}] AS {{$key.Identifier -}}
267+
{{ if eq $key.DDL "VARIANT" }}NULLIF($1[{{$ind}}], PARSE_JSON('null')){{ else }}$1[{{$ind}}]{{ end }} AS {{$key.Identifier -}}
268268
{{- end }}
269269
FROM {{ $.File }}
270270
);
@@ -276,7 +276,7 @@ MERGE INTO {{ $.Table.Identifier }} AS l
276276
USING (
277277
SELECT {{ range $ind, $key := $.Table.Columns }}
278278
{{- if $ind }}, {{ end -}}
279-
$1[{{$ind}}] AS {{$key.Identifier -}}
279+
{{ if eq $key.DDL "VARIANT" }}NULLIF($1[{{$ind}}], PARSE_JSON('null')){{ else }}$1[{{$ind}}]{{ end }} AS {{$key.Identifier -}}
280280
{{- end }}
281281
FROM {{ $.File }}
282282
) AS r

tests/materialize/materialize-snowflake/snapshot.json

+6-6
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
},
5959
"multiple_types": {
6060
"Table": "multiple_types",
61-
"Query": "\nCOPY INTO multiple_types (\n\tid, array_int, binary_field, bool_field, float_field, flow_published_at, multiple, nested, nullable_int, str_field, flow_document\n) FROM (\n\tSELECT $1[0] AS id, $1[1] AS array_int, $1[2] AS binary_field, $1[3] AS bool_field, $1[4] AS float_field, $1[5] AS flow_published_at, $1[6] AS multiple, $1[7] AS nested, $1[8] AS nullable_int, $1[9] AS str_field, $1[10] AS flow_document\n\tFROM <uuid>\n);\n",
61+
"Query": "\nCOPY INTO multiple_types (\n\tid, array_int, binary_field, bool_field, float_field, flow_published_at, multiple, nested, nullable_int, str_field, flow_document\n) FROM (\n\tSELECT $1[0] AS id, NULLIF($1[1], PARSE_JSON('null')) AS array_int, $1[2] AS binary_field, $1[3] AS bool_field, $1[4] AS float_field, $1[5] AS flow_published_at, NULLIF($1[6], PARSE_JSON('null')) AS multiple, NULLIF($1[7], PARSE_JSON('null')) AS nested, $1[8] AS nullable_int, $1[9] AS str_field, $1[10] AS flow_document\n\tFROM <uuid>\n);\n",
6262
"StagedDir": "<uuid>",
6363
"PipeName": "",
6464
"PipeFiles": null,
@@ -148,7 +148,7 @@
148148
},
149149
"multiple_types": {
150150
"Table": "multiple_types",
151-
"Query": "\nMERGE INTO multiple_types AS l\nUSING (\n\tSELECT $1[0] AS id, $1[1] AS array_int, $1[2] AS binary_field, $1[3] AS bool_field, $1[4] AS float_field, $1[5] AS flow_published_at, $1[6] AS multiple, $1[7] AS nested, $1[8] AS nullable_int, $1[9] AS str_field, $1[10] AS flow_document\n\tFROM <uuid>\n) AS r\nON \n\tl.id = r.id AND l.id >= 1 AND l.id <= 10\nWHEN MATCHED AND r.flow_document='delete' THEN\n\tDELETE\nWHEN MATCHED THEN\n\tUPDATE SET l.array_int = r.array_int, l.binary_field = r.binary_field, l.bool_field = r.bool_field, l.float_field = r.float_field, l.flow_published_at = r.flow_published_at, l.multiple = r.multiple, l.nested = r.nested, l.nullable_int = r.nullable_int, l.str_field = r.str_field, l.flow_document = r.flow_document\nWHEN NOT MATCHED and r.flow_document!='delete' THEN\n\tINSERT (id, array_int, binary_field, bool_field, float_field, flow_published_at, multiple, nested, nullable_int, str_field, flow_document)\n\tVALUES (r.id, r.array_int, r.binary_field, r.bool_field, r.float_field, r.flow_published_at, r.multiple, r.nested, r.nullable_int, r.str_field, r.flow_document);\n",
151+
"Query": "\nMERGE INTO multiple_types AS l\nUSING (\n\tSELECT $1[0] AS id, NULLIF($1[1], PARSE_JSON('null')) AS array_int, $1[2] AS binary_field, $1[3] AS bool_field, $1[4] AS float_field, $1[5] AS flow_published_at, NULLIF($1[6], PARSE_JSON('null')) AS multiple, NULLIF($1[7], PARSE_JSON('null')) AS nested, $1[8] AS nullable_int, $1[9] AS str_field, $1[10] AS flow_document\n\tFROM <uuid>\n) AS r\nON \n\tl.id = r.id AND l.id >= 1 AND l.id <= 10\nWHEN MATCHED AND r.flow_document='delete' THEN\n\tDELETE\nWHEN MATCHED THEN\n\tUPDATE SET l.array_int = r.array_int, l.binary_field = r.binary_field, l.bool_field = r.bool_field, l.float_field = r.float_field, l.flow_published_at = r.flow_published_at, l.multiple = r.multiple, l.nested = r.nested, l.nullable_int = r.nullable_int, l.str_field = r.str_field, l.flow_document = r.flow_document\nWHEN NOT MATCHED and r.flow_document!='delete' THEN\n\tINSERT (id, array_int, binary_field, bool_field, float_field, flow_published_at, multiple, nested, nullable_int, str_field, flow_document)\n\tVALUES (r.id, r.array_int, r.binary_field, r.bool_field, r.float_field, r.flow_published_at, r.multiple, r.nested, r.nullable_int, r.str_field, r.flow_document);\n",
152152
"StagedDir": "<uuid>",
153153
"PipeName": "",
154154
"PipeFiles": null,
@@ -219,7 +219,7 @@
219219
},
220220
"multiple_types": {
221221
"PipeName": "",
222-
"Query": "\nMERGE INTO multiple_types AS l\nUSING (\n\tSELECT $1[0] AS id, $1[1] AS array_int, $1[2] AS binary_field, $1[3] AS bool_field, $1[4] AS float_field, $1[5] AS flow_published_at, $1[6] AS multiple, $1[7] AS nested, $1[8] AS nullable_int, $1[9] AS str_field, $1[10] AS flow_document\n\tFROM <uuid>\n) AS r\nON \n\tl.id = r.id AND l.id >= 1 AND l.id <= 10\nWHEN MATCHED AND r.flow_document='delete' THEN\n\tDELETE\nWHEN MATCHED THEN\n\tUPDATE SET l.array_int = r.array_int, l.binary_field = r.binary_field, l.bool_field = r.bool_field, l.float_field = r.float_field, l.flow_published_at = r.flow_published_at, l.multiple = r.multiple, l.nested = r.nested, l.nullable_int = r.nullable_int, l.str_field = r.str_field, l.flow_document = r.flow_document\nWHEN NOT MATCHED and r.flow_document!='delete' THEN\n\tINSERT (id, array_int, binary_field, bool_field, float_field, flow_published_at, multiple, nested, nullable_int, str_field, flow_document)\n\tVALUES (r.id, r.array_int, r.binary_field, r.bool_field, r.float_field, r.flow_published_at, r.multiple, r.nested, r.nullable_int, r.str_field, r.flow_document);\n",
222+
"Query": "\nMERGE INTO multiple_types AS l\nUSING (\n\tSELECT $1[0] AS id, NULLIF($1[1], PARSE_JSON('null')) AS array_int, $1[2] AS binary_field, $1[3] AS bool_field, $1[4] AS float_field, $1[5] AS flow_published_at, NULLIF($1[6], PARSE_JSON('null')) AS multiple, NULLIF($1[7], PARSE_JSON('null')) AS nested, $1[8] AS nullable_int, $1[9] AS str_field, $1[10] AS flow_document\n\tFROM <uuid>\n) AS r\nON \n\tl.id = r.id AND l.id >= 1 AND l.id <= 10\nWHEN MATCHED AND r.flow_document='delete' THEN\n\tDELETE\nWHEN MATCHED THEN\n\tUPDATE SET l.array_int = r.array_int, l.binary_field = r.binary_field, l.bool_field = r.bool_field, l.float_field = r.float_field, l.flow_published_at = r.flow_published_at, l.multiple = r.multiple, l.nested = r.nested, l.nullable_int = r.nullable_int, l.str_field = r.str_field, l.flow_document = r.flow_document\nWHEN NOT MATCHED and r.flow_document!='delete' THEN\n\tINSERT (id, array_int, binary_field, bool_field, float_field, flow_published_at, multiple, nested, nullable_int, str_field, flow_document)\n\tVALUES (r.id, r.array_int, r.binary_field, r.bool_field, r.float_field, r.flow_published_at, r.multiple, r.nested, r.nullable_int, r.str_field, r.flow_document);\n",
223223
"StagedDir": "<uuid>",
224224
"Table": "multiple_types",
225225
"Version": "ffffffffffffffff"
@@ -548,7 +548,7 @@
548548
"FLOAT_FIELD": 88.88,
549549
"FLOW_PUBLISHED_AT": "1970-01-01T01:00:21Z",
550550
"ID": "8",
551-
"MULTIPLE": "null",
551+
"MULTIPLE": null,
552552
"NESTED": "{\n \"id\": \"i8\"\n}",
553553
"NULLABLE_INT": "8",
554554
"STR_FIELD": "str8 v2"
@@ -560,7 +560,7 @@
560560
"FLOAT_FIELD": 99.99,
561561
"FLOW_PUBLISHED_AT": "1970-01-01T01:00:22Z",
562562
"ID": "9",
563-
"MULTIPLE": "null",
563+
"MULTIPLE": null,
564564
"NESTED": "{\n \"id\": \"i9\"\n}",
565565
"NULLABLE_INT": null,
566566
"STR_FIELD": "str9 v2"
@@ -572,7 +572,7 @@
572572
"FLOAT_FIELD": 1010.101,
573573
"FLOW_PUBLISHED_AT": "1970-01-01T01:00:23Z",
574574
"ID": "10",
575-
"MULTIPLE": "null",
575+
"MULTIPLE": null,
576576
"NESTED": "{\n \"id\": \"i10\"\n}",
577577
"NULLABLE_INT": "10",
578578
"STR_FIELD": "str10 v2"

0 commit comments

Comments
 (0)