From 8d3a25d437a8181917ec4563bc830aea10982f95 Mon Sep 17 00:00:00 2001 From: Winnie Date: Fri, 19 Sep 2025 17:06:38 -0700 Subject: [PATCH 1/9] Added mode last struct --- sql/mozfun/map/mode_last_struct/README.md | 0 sql/mozfun/map/mode_last_struct/metadata.yaml | 4 + sql/mozfun/map/mode_last_struct/udf.sql | 154 ++++++++++++++++++ 3 files changed, 158 insertions(+) create mode 100644 sql/mozfun/map/mode_last_struct/README.md create mode 100644 sql/mozfun/map/mode_last_struct/metadata.yaml create mode 100644 sql/mozfun/map/mode_last_struct/udf.sql diff --git a/sql/mozfun/map/mode_last_struct/README.md b/sql/mozfun/map/mode_last_struct/README.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sql/mozfun/map/mode_last_struct/metadata.yaml b/sql/mozfun/map/mode_last_struct/metadata.yaml new file mode 100644 index 00000000000..3aa3168c7d4 --- /dev/null +++ b/sql/mozfun/map/mode_last_struct/metadata.yaml @@ -0,0 +1,4 @@ +--- +description: Given an array of structs, return the most frequently occurring full struct; + break ties by the latest occurrence (mode-last). Equality is on the entire struct, not per-field. +friendly_name: Map Mode Last Struct diff --git a/sql/mozfun/map/mode_last_struct/udf.sql b/sql/mozfun/map/mode_last_struct/udf.sql new file mode 100644 index 00000000000..3e538a19f6e --- /dev/null +++ b/sql/mozfun/map/mode_last_struct/udf.sql @@ -0,0 +1,154 @@ +CREATE OR REPLACE FUNCTION map.mode_last_struct(entries ANY TYPE) AS ( + ( + SELECT AS STRUCT + s.* + FROM + ( + SELECT + s, + COUNT(*) AS freq, + MAX(pos) AS last_pos + FROM + UNNEST(entries) AS s + WITH OFFSET pos + GROUP BY + s + ) + ORDER BY + freq DESC, + last_pos DESC + LIMIT + 1 + ) +); + +-- Tests +SELECT + -- 1) Most frequent wins (Berlin appears twice) + assert.equals( + TO_JSON_STRING( + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + ), + TO_JSON_STRING( + map.mode_last_struct( + [ + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT('Munich' AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + ] + ) + ) + ), + -- 2) Tie -> latest wins (Berlin x2, Munich x2, last element is Munich) + assert.equals( + TO_JSON_STRING( + STRUCT('Munich' AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + ), + TO_JSON_STRING( + map.mode_last_struct( + [ + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT('Munich' AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT( + 'Munich' AS city, + 'BY' AS subdivision1, + NULL AS subdivision2, + 'DE' AS country + ) -- latest among the tied + ] + ) + ) + ), + -- 3) FULL-struct equality: different subdivision2 means different value + assert.equals( + TO_JSON_STRING( + STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country) + ), + TO_JSON_STRING( + map.mode_last_struct( + [ + STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'B' AS subdivision2, 'DE' AS country), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country) + ] + ) + ) + ), + -- 4) Single element returns itself + assert.equals( + TO_JSON_STRING( + STRUCT('Cologne' AS city, 'NW' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + ), + TO_JSON_STRING( + map.mode_last_struct( + [STRUCT('Cologne' AS city, 'NW' AS subdivision1, NULL AS subdivision2, 'DE' AS country)] + ) + ) + ), +-- 5) Tie between NULL struct and a non-NULL struct; latest wins -> expect Berlin +SELECT + assert.equals( + TO_JSON_STRING( + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + ), + TO_JSON_STRING( + map.mode_last_struct( + [ + CAST( + NULL + AS + STRUCT + ), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + CAST( + NULL + AS + STRUCT + ), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + NULL AS subdivision2, + 'DE' AS country + ) -- latest among the tied + ] + ) + ) + ), +-- 6) NULL struct occurs most frequently -> expect NULL + assert.equals( + TO_JSON_STRING(NULL), + TO_JSON_STRING( + map.mode_last_struct( + [ + CAST( + NULL + AS + STRUCT + ), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + CAST( + NULL + AS + STRUCT + ) + ] + ) + ) + ), +-- 7) City is NULL but other fields present; that exact struct is most frequent -> expect that struct (with city = NULL) + assert.equals( + TO_JSON_STRING( + STRUCT(NULL AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + ), + TO_JSON_STRING( + map.mode_last_struct( + [ + STRUCT(NULL AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT(NULL AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + ] + ) + ) + ); From 601af8491b70de0edb599deecc60ef4a37edaeb1 Mon Sep 17 00:00:00 2001 From: Winnie Date: Fri, 19 Sep 2025 17:14:16 -0700 Subject: [PATCH 2/9] Added udf comment --- sql/mozfun/map/mode_last_struct/udf.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/mozfun/map/mode_last_struct/udf.sql b/sql/mozfun/map/mode_last_struct/udf.sql index 3e538a19f6e..5825fbb6511 100644 --- a/sql/mozfun/map/mode_last_struct/udf.sql +++ b/sql/mozfun/map/mode_last_struct/udf.sql @@ -1,3 +1,8 @@ +/* +Return the most frequent STRUCT from an array, breaking ties by latest occurrence +(i.e., mode_last over whole structs). Use to keep related fields aggregated together. +See also: `map.mode_last`, which determines each value using `stats.mode_last`. +*/ CREATE OR REPLACE FUNCTION map.mode_last_struct(entries ANY TYPE) AS ( ( SELECT AS STRUCT From 303f6aa9d16fb544ffbf348e7b40fc226518dece Mon Sep 17 00:00:00 2001 From: Winnie Date: Fri, 19 Sep 2025 17:16:35 -0700 Subject: [PATCH 3/9] Removed extra select --- sql/mozfun/map/mode_last_struct/udf.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/mozfun/map/mode_last_struct/udf.sql b/sql/mozfun/map/mode_last_struct/udf.sql index 5825fbb6511..0e043ca4450 100644 --- a/sql/mozfun/map/mode_last_struct/udf.sql +++ b/sql/mozfun/map/mode_last_struct/udf.sql @@ -92,7 +92,6 @@ SELECT ) ), -- 5) Tie between NULL struct and a non-NULL struct; latest wins -> expect Berlin -SELECT assert.equals( TO_JSON_STRING( STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country) From dd988a3c695a20f9c5a778ad6a594115abb608a6 Mon Sep 17 00:00:00 2001 From: Winnie Date: Mon, 22 Sep 2025 10:57:03 -0700 Subject: [PATCH 4/9] Fixed null in tests --- sql/mozfun/map/mode_last_struct/udf.sql | 30 ++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/sql/mozfun/map/mode_last_struct/udf.sql b/sql/mozfun/map/mode_last_struct/udf.sql index 0e043ca4450..dbae86be6dd 100644 --- a/sql/mozfun/map/mode_last_struct/udf.sql +++ b/sql/mozfun/map/mode_last_struct/udf.sql @@ -47,7 +47,12 @@ SELECT -- 2) Tie -> latest wins (Berlin x2, Munich x2, last element is Munich) assert.equals( TO_JSON_STRING( - STRUCT('Munich' AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + STRUCT( + 'Munich' AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) ), TO_JSON_STRING( map.mode_last_struct( @@ -83,7 +88,12 @@ SELECT -- 4) Single element returns itself assert.equals( TO_JSON_STRING( - STRUCT('Cologne' AS city, 'NW' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + STRUCT( + 'Cologne' AS city, + 'NW' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) ), TO_JSON_STRING( map.mode_last_struct( @@ -94,7 +104,12 @@ SELECT -- 5) Tie between NULL struct and a non-NULL struct; latest wins -> expect Berlin assert.equals( TO_JSON_STRING( - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) ), TO_JSON_STRING( map.mode_last_struct( @@ -122,7 +137,7 @@ SELECT ), -- 6) NULL struct occurs most frequently -> expect NULL assert.equals( - TO_JSON_STRING(NULL), + TO_JSON_STRING(CAST(NULL AS STRING)), TO_JSON_STRING( map.mode_last_struct( [ @@ -144,7 +159,12 @@ SELECT -- 7) City is NULL but other fields present; that exact struct is most frequent -> expect that struct (with city = NULL) assert.equals( TO_JSON_STRING( - STRUCT(NULL AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + STRUCT( + CAST(NULL AS STRING) AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) ), TO_JSON_STRING( map.mode_last_struct( From 9359eefe049496b64694ce3239bb9703c4c0af01 Mon Sep 17 00:00:00 2001 From: Winnie Date: Mon, 22 Sep 2025 11:18:18 -0700 Subject: [PATCH 5/9] Fixed tests with null values --- sql/mozfun/map/mode_last_struct/udf.sql | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/mozfun/map/mode_last_struct/udf.sql b/sql/mozfun/map/mode_last_struct/udf.sql index dbae86be6dd..de168d5c135 100644 --- a/sql/mozfun/map/mode_last_struct/udf.sql +++ b/sql/mozfun/map/mode_last_struct/udf.sql @@ -32,7 +32,12 @@ SELECT -- 1) Most frequent wins (Berlin appears twice) assert.equals( TO_JSON_STRING( - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) ), TO_JSON_STRING( map.mode_last_struct( From c6041229882f6b808cf03d058c9d2b6a8f627991 Mon Sep 17 00:00:00 2001 From: Winnie Date: Mon, 22 Sep 2025 11:42:12 -0700 Subject: [PATCH 6/9] Renamed folder --- .../README.md | 0 .../metadata.yaml | 3 ++- .../udf.sql | 14 +++++++------- 3 files changed, 9 insertions(+), 8 deletions(-) rename sql/mozfun/map/{mode_last_struct => mode_last_struct_retain_nulls}/README.md (100%) rename sql/mozfun/map/{mode_last_struct => mode_last_struct_retain_nulls}/metadata.yaml (73%) rename sql/mozfun/map/{mode_last_struct => mode_last_struct_retain_nulls}/udf.sql (93%) diff --git a/sql/mozfun/map/mode_last_struct/README.md b/sql/mozfun/map/mode_last_struct_retain_nulls/README.md similarity index 100% rename from sql/mozfun/map/mode_last_struct/README.md rename to sql/mozfun/map/mode_last_struct_retain_nulls/README.md diff --git a/sql/mozfun/map/mode_last_struct/metadata.yaml b/sql/mozfun/map/mode_last_struct_retain_nulls/metadata.yaml similarity index 73% rename from sql/mozfun/map/mode_last_struct/metadata.yaml rename to sql/mozfun/map/mode_last_struct_retain_nulls/metadata.yaml index 3aa3168c7d4..d79763c2b41 100644 --- a/sql/mozfun/map/mode_last_struct/metadata.yaml +++ b/sql/mozfun/map/mode_last_struct_retain_nulls/metadata.yaml @@ -1,4 +1,5 @@ --- description: Given an array of structs, return the most frequently occurring full struct; break ties by the latest occurrence (mode-last). Equality is on the entire struct, not per-field. -friendly_name: Map Mode Last Struct + Nulls are retained. +friendly_name: Map Mode Last Struct Retain Nulls diff --git a/sql/mozfun/map/mode_last_struct/udf.sql b/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql similarity index 93% rename from sql/mozfun/map/mode_last_struct/udf.sql rename to sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql index de168d5c135..d2086a55243 100644 --- a/sql/mozfun/map/mode_last_struct/udf.sql +++ b/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql @@ -3,7 +3,7 @@ Return the most frequent STRUCT from an array, breaking ties by latest occurrenc (i.e., mode_last over whole structs). Use to keep related fields aggregated together. See also: `map.mode_last`, which determines each value using `stats.mode_last`. */ -CREATE OR REPLACE FUNCTION map.mode_last_struct(entries ANY TYPE) AS ( +CREATE OR REPLACE FUNCTION map.mode_last_struct_retain_nulls(entries ANY TYPE) AS ( ( SELECT AS STRUCT s.* @@ -40,7 +40,7 @@ SELECT ) ), TO_JSON_STRING( - map.mode_last_struct( + map.mode_last_struct_retain_nulls( [ STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), STRUCT('Munich' AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country), @@ -60,7 +60,7 @@ SELECT ) ), TO_JSON_STRING( - map.mode_last_struct( + map.mode_last_struct_retain_nulls( [ STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), STRUCT('Munich' AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country), @@ -81,7 +81,7 @@ SELECT STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country) ), TO_JSON_STRING( - map.mode_last_struct( + map.mode_last_struct_retain_nulls( [ STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country), STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'B' AS subdivision2, 'DE' AS country), @@ -101,7 +101,7 @@ SELECT ) ), TO_JSON_STRING( - map.mode_last_struct( + map.mode_last_struct_retain_nulls( [STRUCT('Cologne' AS city, 'NW' AS subdivision1, NULL AS subdivision2, 'DE' AS country)] ) ) @@ -117,7 +117,7 @@ SELECT ) ), TO_JSON_STRING( - map.mode_last_struct( + map.mode_last_struct_retain_nulls( [ CAST( NULL @@ -144,7 +144,7 @@ SELECT assert.equals( TO_JSON_STRING(CAST(NULL AS STRING)), TO_JSON_STRING( - map.mode_last_struct( + map.mode_last_struct_retain_nulls( [ CAST( NULL From adc278ebaa46c639fbd7ceb27589902ce4220275 Mon Sep 17 00:00:00 2001 From: Winnie Date: Mon, 22 Sep 2025 12:13:06 -0700 Subject: [PATCH 7/9] Fixed udf tests --- .../map/mode_last_struct_retain_nulls/udf.sql | 130 +++++++++++++----- 1 file changed, 99 insertions(+), 31 deletions(-) diff --git a/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql b/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql index d2086a55243..568ffd4d60d 100644 --- a/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql +++ b/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql @@ -42,9 +42,24 @@ SELECT TO_JSON_STRING( map.mode_last_struct_retain_nulls( [ - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), - STRUCT('Munich' AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country), - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Munich' AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) ] ) ) @@ -62,13 +77,28 @@ SELECT TO_JSON_STRING( map.mode_last_struct_retain_nulls( [ - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), - STRUCT('Munich' AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country), - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), STRUCT( 'Munich' AS city, 'BY' AS subdivision1, - NULL AS subdivision2, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Munich' AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, 'DE' AS country ) -- latest among the tied ] @@ -102,7 +132,14 @@ SELECT ), TO_JSON_STRING( map.mode_last_struct_retain_nulls( - [STRUCT('Cologne' AS city, 'NW' AS subdivision1, NULL AS subdivision2, 'DE' AS country)] + [ + STRUCT( + 'Cologne' AS city, + 'NW' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) + ] ) ) ), @@ -119,21 +156,28 @@ SELECT TO_JSON_STRING( map.mode_last_struct_retain_nulls( [ - CAST( - NULL - AS - STRUCT + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country ), - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), - CAST( - NULL - AS - STRUCT + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country ), STRUCT( 'Berlin' AS city, 'BE' AS subdivision1, - NULL AS subdivision2, + CAST(NULL AS STRING) AS subdivision2, 'DE' AS country ) -- latest among the tied ] @@ -142,20 +186,34 @@ SELECT ), -- 6) NULL struct occurs most frequently -> expect NULL assert.equals( - TO_JSON_STRING(CAST(NULL AS STRING)), + TO_JSON_STRING( + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country + ) + ), TO_JSON_STRING( map.mode_last_struct_retain_nulls( [ - CAST( - NULL - AS - STRUCT + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country ), - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), - CAST( - NULL - AS - STRUCT + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country ) ] ) @@ -172,11 +230,21 @@ SELECT ) ), TO_JSON_STRING( - map.mode_last_struct( + map.mode_last_struct_retain_nulls( [ - STRUCT(NULL AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT( + CAST(NULL AS STRING) AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), - STRUCT(NULL AS city, 'BY' AS subdivision1, NULL AS subdivision2, 'DE' AS country) + STRUCT( + CAST(NULL AS STRING) AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) ] ) ) From 8dcabf60722aef6ba0a7464c7137fcac51ce1210 Mon Sep 17 00:00:00 2001 From: Winnie Date: Mon, 22 Sep 2025 12:18:02 -0700 Subject: [PATCH 8/9] Fixed comment --- sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql b/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql index 568ffd4d60d..9526ceb730f 100644 --- a/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql +++ b/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql @@ -1,6 +1,6 @@ /* Return the most frequent STRUCT from an array, breaking ties by latest occurrence -(i.e., mode_last over whole structs). Use to keep related fields aggregated together. +(i.e., mode_last_retain_nulls over whole structs). Use to keep related fields aggregated together. See also: `map.mode_last`, which determines each value using `stats.mode_last`. */ CREATE OR REPLACE FUNCTION map.mode_last_struct_retain_nulls(entries ANY TYPE) AS ( From 46e732237a48322816c04a76b8157fe778d3ecaf Mon Sep 17 00:00:00 2001 From: Winnie Date: Mon, 22 Sep 2025 14:09:51 -0700 Subject: [PATCH 9/9] Use struct equals --- .../map/mode_last_struct_retain_nulls/udf.sql | 358 ++++++++---------- 1 file changed, 165 insertions(+), 193 deletions(-) diff --git a/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql b/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql index 9526ceb730f..7c2a51555d6 100644 --- a/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql +++ b/sql/mozfun/map/mode_last_struct_retain_nulls/udf.sql @@ -30,222 +30,194 @@ CREATE OR REPLACE FUNCTION map.mode_last_struct_retain_nulls(entries ANY TYPE) A -- Tests SELECT -- 1) Most frequent wins (Berlin appears twice) - assert.equals( - TO_JSON_STRING( - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) + assert.struct_equals( + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country ), - TO_JSON_STRING( - map.mode_last_struct_retain_nulls( - [ - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ), - STRUCT( - 'Munich' AS city, - 'BY' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ), - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) - ] - ) + map.mode_last_struct_retain_nulls( + [ + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Munich' AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) + ] ) ), -- 2) Tie -> latest wins (Berlin x2, Munich x2, last element is Munich) - assert.equals( - TO_JSON_STRING( - STRUCT( - 'Munich' AS city, - 'BY' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) + assert.struct_equals( + STRUCT( + 'Munich' AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country ), - TO_JSON_STRING( - map.mode_last_struct_retain_nulls( - [ - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ), - STRUCT( - 'Munich' AS city, - 'BY' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ), - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ), - STRUCT( - 'Munich' AS city, - 'BY' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) -- latest among the tied - ] - ) + map.mode_last_struct_retain_nulls( + [ + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Munich' AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + 'Munich' AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) -- latest among the tied + ] ) ), -- 3) FULL-struct equality: different subdivision2 means different value - assert.equals( - TO_JSON_STRING( - STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country) - ), - TO_JSON_STRING( - map.mode_last_struct_retain_nulls( - [ - STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country), - STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'B' AS subdivision2, 'DE' AS country), - STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country) - ] - ) + assert.struct_equals( + STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country), + map.mode_last_struct_retain_nulls( + [ + STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'B' AS subdivision2, 'DE' AS country), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, 'A' AS subdivision2, 'DE' AS country) + ] ) ), -- 4) Single element returns itself - assert.equals( - TO_JSON_STRING( - STRUCT( - 'Cologne' AS city, - 'NW' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) + assert.struct_equals( + STRUCT( + 'Cologne' AS city, + 'NW' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country ), - TO_JSON_STRING( - map.mode_last_struct_retain_nulls( - [ - STRUCT( - 'Cologne' AS city, - 'NW' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) - ] - ) + map.mode_last_struct_retain_nulls( + [ + STRUCT( + 'Cologne' AS city, + 'NW' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) + ] ) ), -- 5) Tie between NULL struct and a non-NULL struct; latest wins -> expect Berlin - assert.equals( - TO_JSON_STRING( - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) + assert.struct_equals( + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country ), - TO_JSON_STRING( - map.mode_last_struct_retain_nulls( - [ - STRUCT( - CAST(NULL AS STRING) AS city, - CAST(NULL AS STRING) AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - CAST(NULL AS STRING) AS country - ), - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ), - STRUCT( - CAST(NULL AS STRING) AS city, - CAST(NULL AS STRING) AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - CAST(NULL AS STRING) AS country - ), - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) -- latest among the tied - ] - ) + map.mode_last_struct_retain_nulls( + [ + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country + ), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country + ), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) -- latest among the tied + ] ) ), -- 6) NULL struct occurs most frequently -> expect NULL - assert.equals( - TO_JSON_STRING( - STRUCT( - CAST(NULL AS STRING) AS city, - CAST(NULL AS STRING) AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - CAST(NULL AS STRING) AS country - ) + assert.struct_equals( + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country ), - TO_JSON_STRING( - map.mode_last_struct_retain_nulls( - [ - STRUCT( - CAST(NULL AS STRING) AS city, - CAST(NULL AS STRING) AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - CAST(NULL AS STRING) AS country - ), - STRUCT( - 'Berlin' AS city, - 'BE' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ), - STRUCT( - CAST(NULL AS STRING) AS city, - CAST(NULL AS STRING) AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - CAST(NULL AS STRING) AS country - ) - ] - ) + map.mode_last_struct_retain_nulls( + [ + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country + ), + STRUCT( + 'Berlin' AS city, + 'BE' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT( + CAST(NULL AS STRING) AS city, + CAST(NULL AS STRING) AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + CAST(NULL AS STRING) AS country + ) + ] ) ), -- 7) City is NULL but other fields present; that exact struct is most frequent -> expect that struct (with city = NULL) - assert.equals( - TO_JSON_STRING( - STRUCT( - CAST(NULL AS STRING) AS city, - 'BY' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) + assert.struct_equals( + STRUCT( + CAST(NULL AS STRING) AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country ), - TO_JSON_STRING( - map.mode_last_struct_retain_nulls( - [ - STRUCT( - CAST(NULL AS STRING) AS city, - 'BY' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ), - STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), - STRUCT( - CAST(NULL AS STRING) AS city, - 'BY' AS subdivision1, - CAST(NULL AS STRING) AS subdivision2, - 'DE' AS country - ) - ] - ) + map.mode_last_struct_retain_nulls( + [ + STRUCT( + CAST(NULL AS STRING) AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ), + STRUCT('Berlin' AS city, 'BE' AS subdivision1, NULL AS subdivision2, 'DE' AS country), + STRUCT( + CAST(NULL AS STRING) AS city, + 'BY' AS subdivision1, + CAST(NULL AS STRING) AS subdivision2, + 'DE' AS country + ) + ] ) );