From 7c2b77e6838b31aadeb1566edcb18a99ddd38273 Mon Sep 17 00:00:00 2001 From: Ilana Segall Date: Thu, 31 Jul 2025 08:49:05 -0700 Subject: [PATCH 1/5] metadata, query, schema for reported content in hnt --- .../report_content_v1/metadata.yaml | 28 ++++++++++++++ .../report_content_v1/query.sql | 24 ++++++++++++ .../report_content_v1/schema.yaml | 37 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml create mode 100644 sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/query.sql create mode 100644 sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/schema.yaml diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml new file mode 100644 index 00000000000..85901c724f2 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml @@ -0,0 +1,28 @@ +friendly_name: Reported Content +description: "Rerported organic content\nFor each report, we collect:\n submission\ + \ date\n card type\n corpus item id\n report reason\n section\n section position\n\ + \ title \n topic\n url" +owners: +- isegall@mozilla.com +labels: + application: newtab + incremental: true + schedule: daily + dag: bqetl_newtab + owner1: isegall + table_type: event_level +scheduling: + dag_name: bqetl_newtab +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - channel + - country +references: {} +require_column_descriptions: false diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/query.sql b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/query.sql new file mode 100644 index 00000000000..1e014994467 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/query.sql @@ -0,0 +1,24 @@ +WITH events AS ( + SELECT + DATE(submission_timestamp) AS submission_date, + mozfun.map.get_key(event.extra, 'card_type') AS card_type, + mozfun.map.get_key(event.extra, 'corpus_item_id') AS corpus_item_id, + mozfun.map.get_key(event.extra, 'report_reason') AS report_reason, + mozfun.map.get_key(event.extra, 'section') AS section, + mozfun.map.get_key(event.extra, 'section_position') AS section_position, + mozfun.map.get_key(event.extra, 'title') AS title, + mozfun.map.get_key(event.extra, 'topic') AS topic, + mozfun.map.get_key(event.extra, 'url') AS url + FROM + `moz-fx-data-shared-prod.firefox_desktop.newtab` AS e + CROSS JOIN + UNNEST(e.events) AS event + WHERE + DATE(submission_timestamp) = @submission_date + AND event.category = 'newtab' + AND event.name = 'report_content_submit' +) +SELECT + * +FROM + events diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/schema.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/schema.yaml new file mode 100644 index 00000000000..00eb856a6d9 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/schema.yaml @@ -0,0 +1,37 @@ +fields: +- name: submission_date + type: DATE + mode: NULLABLE + description: Day the event was received in the newtab content ping +- name: card_type + type: STRING + mode: NULLABLE + description: The type of the content card (e.g., "spoc", "organic") +- name: corpus_item_id + type: STRING + mode: NULLABLE + description: content identifier +- name: section + type: STRING + mode: NULLABLE + description: If click belongs in a section, the name of the section +- name: section_position + type: STRING + mode: NULLABLE + description: If click belongs in a section, the numeric position of the section +- name: report_reason + type: STRING + mode: NULLABLE + description: The reason selected by the user when reporting the content +- name: title + type: STRING + mode: NULLABLE + description: Title of the recommendation. +- name: topic + type: STRING + mode: NULLABLE + description: The topic of the recommendation. Like "entertainment". +- name: url + type: STRING + description: URL of the recommendation. + mode: NULLABLE From 4bd2436fab692b89595af6fc42666b9a27ea2edc Mon Sep 17 00:00:00 2001 From: ilanasegall Date: Thu, 31 Jul 2025 09:25:21 -0700 Subject: [PATCH 2/5] Update sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml updated newlines Co-authored-by: Jared Snyder --- .../report_content_v1/metadata.yaml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml index 85901c724f2..3f81cb86bd6 100644 --- a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml @@ -1,7 +1,17 @@ friendly_name: Reported Content -description: "Rerported organic content\nFor each report, we collect:\n submission\ - \ date\n card type\n corpus item id\n report reason\n section\n section position\n\ - \ title \n topic\n url" +description:|- + Reported organic content + + For each report, we collect + submission date + card type + corpus item id + report reason + section + section position + title + topic + url owners: - isegall@mozilla.com labels: From f0deb9e3a3a07b5833f1af957e6a5cdda58b5518 Mon Sep 17 00:00:00 2001 From: Ilana Segall Date: Thu, 31 Jul 2025 09:44:58 -0700 Subject: [PATCH 3/5] fix yaml formatting --- .../report_content_v1/metadata.yaml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml index 85901c724f2..fb50b25b4bb 100644 --- a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml @@ -1,7 +1,16 @@ friendly_name: Reported Content -description: "Rerported organic content\nFor each report, we collect:\n submission\ - \ date\n card type\n corpus item id\n report reason\n section\n section position\n\ - \ title \n topic\n url" +description: |- + Reported organic content + The granularity is one row per day per each of the following dimensions: + submission_date + card type + corpus item id + report reason + section + section position + title + topic + url owners: - isegall@mozilla.com labels: From ae2388fb8f867b4036e67130b5ea9a233581fd77 Mon Sep 17 00:00:00 2001 From: Ilana Segall Date: Thu, 31 Jul 2025 10:31:03 -0700 Subject: [PATCH 4/5] remove clustering fields --- .../firefox_desktop_derived/report_content_v1/metadata.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml index c1c253490db..caf0b318a93 100644 --- a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml @@ -30,9 +30,5 @@ bigquery: require_partition_filter: true expiration_days: null range_partitioning: null - clustering: - fields: - - channel - - country references: {} require_column_descriptions: false From 2230bbabd7d0803f58dbe5c57c2fc76c08fa5b49 Mon Sep 17 00:00:00 2001 From: Ilana Segall Date: Mon, 25 Aug 2025 13:44:40 -0700 Subject: [PATCH 5/5] add backfill for reported content --- .../report_content_v1/backfill.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/backfill.yaml diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/backfill.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/backfill.yaml new file mode 100644 index 00000000000..31b3e1f5cea --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/backfill.yaml @@ -0,0 +1,11 @@ +2025-08-25: + start_date: 2025-03-31 + end_date: 2025-07-31 + reason: backfill for request in DS-4333 + watchers: + - isegall@mozilla.com + status: Initiate + shredder_mitigation: false + override_retention_limit: false + override_depends_on_past_end_date: false + ignore_date_partition_offset: false