From 87da9b79569c30c82609897cb74b66201a34d57d Mon Sep 17 00:00:00 2001 From: Claas Augner <495429+caugner@users.noreply.github.com> Date: Mon, 1 Sep 2025 15:58:30 +0200 Subject: [PATCH] chore(mdn_popularities): remove mdn_yari Glean data Now that the popularities for August were calculated, we no longer need the yari data. --- .../mdn_popularities_v1/query.py | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py b/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py index 50c9c0510b9..e28c1610618 100644 --- a/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py +++ b/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py @@ -10,24 +10,14 @@ from google.cloud import bigquery, storage QUERY_TEMPLATE = """\ -WITH events_stream AS - (SELECT JSON_VALUE(event_extra.url) AS url - FROM `moz-fx-data-shared-prod.mdn_fred.events_stream` - WHERE DATE(submission_timestamp) BETWEEN DATE_TRUNC(@submission_date, MONTH) AND LAST_DAY(@submission_date) - AND client_info.app_channel = 'prod' - AND event_name = 'page_load' - AND JSON_VALUE(event_extra.url) LIKE "https://developer.mozilla.org/%/docs/%" - AND JSON_VALUE(event_extra.title) != 'Page not found | MDN' - UNION ALL SELECT JSON_VALUE(event_extra.url) AS url - FROM `moz-fx-data-shared-prod.mdn_yari.events_stream` - WHERE DATE(submission_timestamp) BETWEEN DATE_TRUNC(@submission_date, MONTH) AND LAST_DAY(@submission_date) - AND client_info.app_channel = 'prod' - AND event_name = 'page_load' - AND JSON_VALUE(event_extra.url) LIKE "https://developer.mozilla.org/%/docs/%" - AND JSON_VALUE(event_extra.title) NOT LIKE '%Page not found | MDN' ) -SELECT REGEXP_EXTRACT(url, r'^https://developer.mozilla.org(/.+?/docs/[^?#]+)') AS Page, +SELECT REGEXP_EXTRACT(JSON_VALUE(event_extra.url), r'^https://developer.mozilla.org(/.+?/docs/[^?#]+)') AS Page, COUNT(*) AS Pageviews -FROM events_stream +FROM `moz-fx-data-shared-prod.mdn_fred.events_stream` +WHERE DATE(submission_timestamp) BETWEEN DATE_TRUNC(@submission_date, MONTH) AND LAST_DAY(@submission_date) + AND client_info.app_channel = 'prod' + AND event_name = 'page_load' + AND JSON_VALUE(event_extra.url) LIKE "https://developer.mozilla.org/%/docs/%" + AND JSON_VALUE(event_extra.title) != 'Page not found | MDN' GROUP BY Page ORDER BY Pageviews DESC """