diff --git a/sql/2024/media/animated_gif_bpp.sql b/sql/2024/media/animated_gif_bpp.sql
new file mode 100644
index 00000000000..7aa492a9bd3
--- /dev/null
+++ b/sql/2024/media/animated_gif_bpp.sql
@@ -0,0 +1,82 @@
+#standardSQL
+# what is the bpp of animated vs non-animated GIFs?
+# animated_gif_bpp.sql
+
+
+# ImageMagick reports big images as having, e.g., "1.29097M" pixels. This means ~1.2 million pixels, but BigQuery doesn't know that.
+CREATE TEMPORARY FUNCTION magickPixels(imageMagickNumberString STRING)
+RETURNS INT64
+LANGUAGE js AS r'''
+
+if (!imageMagickNumberString) { return null; }
+const matched = imageMagickNumberString.match(/([\d\.]+)(\w+)?$/);
+const multiples = {
+ 'K': 1e3,
+ 'M': 1e6,
+ 'G': 1e9,
+ 'T': 1e12
+}
+if ( matched && matched[1] ) {
+ return Math.round(
+ parseFloat( matched[1] ) * ( multiples[ matched[2] ] || 1 )
+ );
+} else {
+ return null;
+}
+
+''';
+
+# ImageMagick reports bytesizes in a friendly, human readable format. Just return bytes
+CREATE TEMPORARY FUNCTION magickBytes(imageMagickNumberString STRING)
+RETURNS INT64
+LANGUAGE js AS r'''
+
+if (!imageMagickNumberString) { return 0; }
+const matched = imageMagickNumberString.match(/([\d\.]+)(\w+)$/);
+const multiples = {
+ 'B': 1,
+ 'KB': 1e3,
+ 'MB': 1e6,
+ 'GB': 1e9,
+ 'TB': 1e12
+}
+if ( matched && matched[1] && matched[2] ) {
+ return Math.round(
+ parseFloat( matched[1] ) * multiples[ matched[2] ]
+ );
+} else {
+ return null;
+}
+
+''';
+
+WITH gifs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ CAST(JSON_VALUE(payload, '$._image_details.animated') AS BOOL) AS is_animated,
+ (magickBytes(JSON_VALUE(payload, '$._image_details.magick.filesize')) * 8) /
+ magickPixels(JSON_VALUE(payload, '$._image_details.magick.numberPixels')) AS bits_per_pixel
+ FROM
+ `requests.2024_06_01_*`
+ WHERE
+ JSON_VALUE(payload, '$._image_details.detected_type') = 'gif' AND
+ magickPixels(JSON_VALUE(payload, '$._image_details.magick.numberPixels')) > 1 AND
+ JSON_VALUE(payload, '$._image_details.animated') IS NOT NULL
+)
+
+SELECT
+ percentile,
+ client,
+ is_animated,
+ APPROX_QUANTILES(bits_per_pixel, 1000)[OFFSET(percentile * 10)] AS bpp
+FROM
+ gifs,
+ UNNEST([10, 25, 50, 75, 90]) AS percentile
+GROUP BY
+ percentile,
+ client,
+ is_animated
+ORDER BY
+ percentile,
+ client,
+ is_animated
diff --git a/sql/2024/media/animated_gif_count.sql b/sql/2024/media/animated_gif_count.sql
new file mode 100644
index 00000000000..01af1506452
--- /dev/null
+++ b/sql/2024/media/animated_gif_count.sql
@@ -0,0 +1,39 @@
+#standardSQL
+# what percent of gifs are animated?
+# animated_gif_count.sql
+
+# ImageMagick reports big images as having, e.g., "1.29097M" pixels. This means ~1.2 million pixels, but BigQuery doesn't know that.
+CREATE TEMPORARY FUNCTION magickPixels(imageMagickNumberString STRING)
+RETURNS INT64
+LANGUAGE js AS r'''
+
+if (!imageMagickNumberString) { return null; }
+const matched = imageMagickNumberString.match(/([\d\.]+)(\w+)?$/);
+const multiples = {
+ 'K': 1e3,
+ 'M': 1e6,
+ 'G': 1e9,
+ 'T': 1e12
+}
+if ( matched && matched[1] ) {
+ return Math.round(
+ parseFloat( matched[1] ) * ( multiples[ matched[2] ] || 1 )
+ );
+} else {
+ return null;
+}
+
+''';
+
+SELECT
+ _TABLE_SUFFIX AS client,
+ COUNT(0) AS total_gifs,
+ COUNTIF(CAST(JSON_VALUE(payload, '$._image_details.animated') AS BOOL)) AS total_animated_gifs,
+ COUNTIF(CAST(JSON_VALUE(payload, '$._image_details.animated') AS BOOL)) / COUNT(0) AS pct_animated_gifs
+FROM
+ `requests.2024_06_01_*`
+WHERE
+ JSON_VALUE(payload, '$._image_details.detected_type') = 'gif' AND
+ magickPixels(JSON_VALUE(payload, '$._image_details.magick.numberPixels')) > 1
+GROUP BY
+ client
diff --git a/sql/2024/media/animated_gif_framecount.sql b/sql/2024/media/animated_gif_framecount.sql
new file mode 100644
index 00000000000..531052b462b
--- /dev/null
+++ b/sql/2024/media/animated_gif_framecount.sql
@@ -0,0 +1,50 @@
+#standardSQL
+# distribution of animated GIF framecounts
+# animated_gif_framecount.sql
+
+# ImageMagick reports big images as having, e.g., "1.29097M" pixels. This means ~1.2 million pixels, but BigQuery doesn't know that.
+CREATE TEMPORARY FUNCTION magickPixels(imageMagickNumberString STRING)
+RETURNS INT64
+LANGUAGE js AS r'''
+
+if (!imageMagickNumberString) { return null; }
+const matched = imageMagickNumberString.match(/([\d\.]+)(\w+)?$/);
+const multiples = {
+ 'K': 1e3,
+ 'M': 1e6,
+ 'G': 1e9,
+ 'T': 1e12
+}
+if ( matched && matched[1] ) {
+ return Math.round(
+ parseFloat( matched[1] ) * ( multiples[ matched[2] ] || 1 )
+ );
+} else {
+ return null;
+}
+
+''';
+
+WITH framecounts AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ CAST(JSON_VALUE(payload, '$._image_details.metadata.GIF.FrameCount') AS NUMERIC) AS framecount
+ FROM `requests.2024_06_01_*`
+ WHERE
+ JSON_VALUE(payload, '$._image_details.metadata.GIF.FrameCount') IS NOT NULL AND
+ magickPixels(JSON_VALUE(payload, '$._image_details.magick.numberPixels')) > 1
+)
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(framecount, 1000)[OFFSET(percentile * 10)] AS framecount
+FROM
+ framecounts,
+ UNNEST([10, 25, 50, 75, 90, 100]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/at_least_one_image_request.sql b/sql/2024/media/at_least_one_image_request.sql
new file mode 100644
index 00000000000..9d514b663b9
--- /dev/null
+++ b/sql/2024/media/at_least_one_image_request.sql
@@ -0,0 +1,13 @@
+#standardSQL
+# What % of pages load at least one image?
+# ✅ at_least_one_image_request.sql
+
+SELECT
+ _TABLE_SUFFIX AS client,
+ COUNTIF(reqImg > 0) AS atLeastOneImgReqCount,
+ COUNT(0) AS total,
+ SAFE_DIVIDE(COUNTIF(reqImg > 0), COUNT(0)) AS atLeastOneImgReqPct
+FROM
+ `httparchive.summary_pages.2024_06_01_*`
+GROUP BY
+ client
diff --git a/sql/2024/media/at_least_one_img.sql b/sql/2024/media/at_least_one_img.sql
new file mode 100644
index 00000000000..420af6df10f
--- /dev/null
+++ b/sql/2024/media/at_least_one_img.sql
@@ -0,0 +1,31 @@
+#standardSQL
+# What % of pages have at least one ?
+# at_least_one_img.sql
+
+CREATE TEMPORARY FUNCTION numberOfImages(images_string STRING)
+RETURNS INT64
+LANGUAGE js AS '''
+try {
+ return JSON.parse(images_string).filter( i => parseInt(i.approximateResourceWidth) > 1 && parseInt(i.approximateResourceWidth) > 1 ).length;
+} catch {
+ return 0;
+}
+''';
+
+WITH numImgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ numberOfImages(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')) AS numberOfImages
+ FROM
+ `httparchive.pages.2024_06_01_*`
+)
+
+SELECT
+ client,
+ COUNTIF(numberOfImages > 0) AS atLeastOneCount,
+ COUNT(0) AS total,
+ SAFE_DIVIDE(COUNTIF(numberOfImages > 0), COUNT(0)) AS atLeastOnePct
+FROM
+ numImgs
+GROUP BY
+ client
diff --git a/sql/2024/media/bytes_and_dimensions.sql b/sql/2024/media/bytes_and_dimensions.sql
new file mode 100644
index 00000000000..9461f755995
--- /dev/null
+++ b/sql/2024/media/bytes_and_dimensions.sql
@@ -0,0 +1,174 @@
+#standardSQL
+# Measuring img loaded bytes and dimensions
+# bytes_and_dimensions.sql
+
+CREATE TEMPORARY FUNCTION getSrcsetInfo(responsiveImagesJsonString STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+
+function pithyType( { contentType, url } ) {
+ const subtypeMap = {
+ 'svg+xml': 'svg',
+ 'svgz': 'svg',
+ 'jpeg': 'jpg',
+ 'jfif': 'jpg',
+ 'x-png': 'png',
+ 'vnd.microsoft.icon': 'ico',
+ 'x-icon': 'ico',
+ 'jxr': 'jxr',
+ 'vnd.ms-photo': 'jxr',
+ 'hdp': 'jxr',
+ 'wdp': 'jxr',
+ 'jpf': 'jp2',
+ 'jpx': 'jp2',
+ 'jpm': 'jp2',
+ 'mj2': 'jp2',
+ 'x-jp2-container': 'jp2',
+ 'x-jp2-codestream': 'jp2',
+ 'x-jpeg2000-image': 'jp2',
+ 'heic': 'heif',
+ 'x-ms-bmp': 'bmp',
+ 'x-pict': 'pict',
+ 'tif': 'tiff',
+ 'x-tif': 'tiff',
+ 'x-tiff': 'tiff',
+ 'vnd.mozilla.apng': 'apng',
+ // identities
+ 'apng': 'apng',
+ 'jpg': 'jpg',
+ 'jp2': 'jp2',
+ 'png': 'png',
+ 'gif': 'gif',
+ 'ico': 'ico',
+ 'webp': 'webp',
+ 'avif': 'avif',
+ 'tiff': 'tiff',
+ 'flif': 'flif',
+ 'heif': 'heif',
+ 'jxl': 'jxl',
+ 'avif-sequence': 'avif-sequence', // keep separate from single frames...
+ 'heic-sequence': 'heic-sequence',
+ 'bmp': 'bmp',
+ 'pict': 'pict'
+ };
+
+ function normalizeSubtype( subtype ) {
+ if ( subtypeMap[ subtype ] ) {
+ return subtypeMap[ subtype ];
+ }
+ return 'unknown'; // switch between:
+ // `subtype`
+ // to see everything, check if there's anything else worth capturing
+ // `'unknown'`
+ // to make results manageable
+ }
+
+ // if it's a data url, take the mime type from there, done.
+ if ( url &&
+ typeof url === "string" ) {
+ const match = url.toLowerCase().match( /^data:image\\/([\\w\\-\\.\\+]+)/ );
+ if ( match && match[ 1 ] ) {
+ return normalizeSubtype( match[ 1 ] );
+ }
+ }
+
+ // if we get a content-type header, use it!
+ if ( contentType &&
+ typeof contentType === "string" ) {
+ const match = contentType.toLowerCase().match( /image\\/([\\w\\-\\.\\+]+)/ );
+ if ( match && match[ 1 ] ) {
+ return normalizeSubtype( match[ 1 ] );
+ }
+ }
+
+ // otherwise fall back to extension in the URL
+ if ( url &&
+ typeof url === "string" ) {
+ const splitOnSlashes = url.split("/");
+ if ( splitOnSlashes.length > 1 ) {
+ const afterLastSlash = splitOnSlashes[ splitOnSlashes.length - 1 ],
+ splitOnDots = afterLastSlash.split(".");
+ if ( splitOnDots.length > 1 ) {
+ return normalizeSubtype(
+ splitOnDots[ splitOnDots.length - 1 ]
+ .toLowerCase()
+ .replace( /^(\\w+)[\\?\\&\\#].*/, '$1' ) // strip query params
+ );
+ }
+ }
+ }
+
+ // otherwise throw up our hands
+ return 'unknown';
+ }
+
+ const parsed = JSON.parse( responsiveImagesJsonString );
+ if ( parsed && parsed.map ) {
+ const dataRegEx = new RegExp('^data');
+ return parsed.map( d => ({
+ imgURL: d.url,
+ approximateResourceWidth: Math.floor( d.approximateResourceWidth || 0 ),
+ approximateResourceHeight: Math.floor( d.approximateResourceHeight || 0 ),
+ byteSize: Math.floor( d.byteSize || 0 ),
+ bitsPerPixel: parseFloat( d.bitsPerPixel || 0 ),
+ isPixel: d.approximateResourceWidth == 1 && d.approximateResourceHeight == 1,
+ isDataURL: dataRegEx.test(d.url),
+ resourceFormat: pithyType({ contentType: d.mimeType, url: d.url })
+ }) );
+ }
+''';
+
+WITH imgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ url AS pageURL,
+ imgURL,
+ approximateResourceWidth,
+ approximateResourceHeight,
+ byteSize,
+ bitsPerPixel,
+ isPixel,
+ isDataURL,
+ (approximateResourceWidth * approximateResourceHeight) / 1000000 AS megapixels,
+ (approximateResourceWidth / approximateResourceHeight) AS aspectRatio,
+ resourceFormat
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getSrcsetInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')))
+),
+
+percentiles AS (
+ SELECT
+ client,
+ APPROX_QUANTILES(approximateResourceWidth, 1000) AS resourceWidthPercentiles,
+ APPROX_QUANTILES(approximateResourceHeight, 1000) AS resourceHeightPercentiles,
+ APPROX_QUANTILES(aspectRatio, 1000) AS aspectRatioPercentiles,
+ APPROX_QUANTILES(megapixels, 1000) AS megapixelsPercentiles,
+ APPROX_QUANTILES(byteSize, 1000) AS byteSizePercentiles,
+ APPROX_QUANTILES(bitsPerPixel, 1000) AS bitsPerPixelPercentiles,
+ COUNT(0) AS imgCount
+ FROM
+ imgs
+ WHERE
+ approximateResourceWidth > 1 AND
+ approximateResourceHeight > 1
+ GROUP BY
+ client
+)
+
+SELECT
+ percentile,
+ client,
+ imgCount,
+ resourceWidthPercentiles[OFFSET(percentile * 10)] AS resourceWidth,
+ resourceHeightPercentiles[OFFSET(percentile * 10)] AS resourceHeight,
+ aspectRatioPercentiles[OFFSET(percentile * 10)] AS aspectRatio,
+ megapixelsPercentiles[OFFSET(percentile * 10)] AS megapixels,
+ byteSizePercentiles[OFFSET(percentile * 10)] AS byteSize,
+ bitsPerPixelPercentiles[OFFSET(percentile * 10)] AS bitsPerPixel
+FROM
+ percentiles,
+ UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile
+ORDER BY
+ imgCount DESC,
+ percentile
diff --git a/sql/2024/media/bytes_and_dimensions_by_format.sql b/sql/2024/media/bytes_and_dimensions_by_format.sql
new file mode 100644
index 00000000000..8bca387cbf4
--- /dev/null
+++ b/sql/2024/media/bytes_and_dimensions_by_format.sql
@@ -0,0 +1,174 @@
+#standardSQL
+# Measuring img loaded bytes and dimensions, broken down by detected format
+# bytes_and_dimensions_by_format.sql
+
+CREATE TEMPORARY FUNCTION getSrcsetInfo(responsiveImagesJsonString STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+
+function pithyType( { contentType, url } ) {
+ const subtypeMap = {
+ 'svg+xml': 'svg',
+ 'svgz': 'svg',
+ 'jpeg': 'jpg',
+ 'jfif': 'jpg',
+ 'x-png': 'png',
+ 'vnd.microsoft.icon': 'ico',
+ 'x-icon': 'ico',
+ 'jxr': 'jxr',
+ 'vnd.ms-photo': 'jxr',
+ 'hdp': 'jxr',
+ 'wdp': 'jxr',
+ 'jpf': 'jp2',
+ 'jpx': 'jp2',
+ 'jpm': 'jp2',
+ 'mj2': 'jp2',
+ 'x-jp2-container': 'jp2',
+ 'x-jp2-codestream': 'jp2',
+ 'x-jpeg2000-image': 'jp2',
+ 'heic': 'heif',
+ 'x-ms-bmp': 'bmp',
+ 'x-pict': 'pict',
+ 'tif': 'tiff',
+ 'x-tif': 'tiff',
+ 'x-tiff': 'tiff',
+ 'vnd.mozilla.apng': 'apng',
+ // identities
+ 'apng': 'apng',
+ 'jpg': 'jpg',
+ 'jp2': 'jp2',
+ 'png': 'png',
+ 'gif': 'gif',
+ 'ico': 'ico',
+ 'webp': 'webp',
+ 'avif': 'avif',
+ 'tiff': 'tiff',
+ 'flif': 'flif',
+ 'heif': 'heif',
+ 'jxl': 'jxl',
+ 'avif-sequence': 'avif-sequence', // keep separate from single frames...
+ 'heic-sequence': 'heic-sequence',
+ 'bmp': 'bmp',
+ 'pict': 'pict'
+ };
+
+ function normalizeSubtype( subtype ) {
+ if ( subtypeMap[ subtype ] ) {
+ return subtypeMap[ subtype ];
+ }
+ return 'unknown'; // switch between:
+ // `subtype`
+ // to see everything, check if there's anything else worth capturing
+ // `'unknown'`
+ // to make results manageable
+ }
+
+ // if it's a data url, take the mime type from there, done.
+ if ( url && typeof url === "string" ) {
+ const match = url.toLowerCase().match( /^data:image\\/([\\w\\-\\.\\+]+)/ );
+ if ( match && match[ 1 ] ) {
+ return normalizeSubtype( match[ 1 ] );
+ }
+ }
+
+ // if we get a content-type header, use it!
+ if ( contentType && typeof contentType === "string" ) {
+ const match = contentType.toLowerCase().match( /image\\/([\\w\\-\\.\\+]+)/ );
+ if ( match && match[ 1 ] ) {
+ return normalizeSubtype( match[ 1 ] );
+ }
+ }
+
+ // otherwise fall back to extension in the URL
+ if ( url && typeof url === "string" ) {
+ const splitOnSlashes = url.split("/");
+ if ( splitOnSlashes.length > 1 ) {
+ const afterLastSlash = splitOnSlashes[ splitOnSlashes.length - 1 ],
+ splitOnDots = afterLastSlash.split(".");
+ if ( splitOnDots.length > 1 ) {
+ return normalizeSubtype(
+ splitOnDots[ splitOnDots.length - 1 ]
+ .toLowerCase()
+ .replace( /^(\\w+)[\\?\\&\\#].*/, '$1' ) // strip query params
+ );
+ }
+ }
+ }
+
+ // otherwise throw up our hands
+ return 'unknown';
+ }
+
+ const parsed = JSON.parse( responsiveImagesJsonString );
+ if ( parsed && parsed.map ) {
+ const dataRegEx = new RegExp('^data');
+ return parsed.map( d => ({
+ imgURL: d.url,
+ approximateResourceWidth: Math.floor( d.approximateResourceWidth || 0 ),
+ approximateResourceHeight: Math.floor( d.approximateResourceHeight || 0 ),
+ byteSize: Math.floor( d.byteSize || 0 ),
+ bitsPerPixel: parseFloat( d.bitsPerPixel || 0 ),
+ isPixel: d.approximateResourceWidth == 1 && d.approximateResourceHeight == 1,
+ isDataURL: dataRegEx.test(d.url),
+ resourceFormat: pithyType({ contentType: d.mimeType, url: d.url })
+ }) );
+ }
+''';
+
+WITH imgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ url AS pageURL,
+ imgURL,
+ approximateResourceWidth,
+ approximateResourceHeight,
+ byteSize,
+ bitsPerPixel,
+ isPixel,
+ isDataURL,
+ (approximateResourceWidth * approximateResourceHeight) / 1000000 AS megapixels,
+ (approximateResourceWidth / approximateResourceHeight) AS aspectRatio,
+ resourceFormat
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getSrcsetInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')))
+),
+
+percentiles AS (
+ SELECT
+ client,
+ resourceFormat,
+ APPROX_QUANTILES(approximateResourceWidth, 1000) AS resourceWidthPercentiles,
+ APPROX_QUANTILES(approximateResourceHeight, 1000) AS resourceHeightPercentiles,
+ APPROX_QUANTILES(aspectRatio, 1000) AS aspectRatioPercentiles,
+ APPROX_QUANTILES(megapixels, 1000) AS megapixelsPercentiles,
+ APPROX_QUANTILES(byteSize, 1000) AS byteSizePercentiles,
+ APPROX_QUANTILES(bitsPerPixel, 1000) AS bitsPerPixelPercentiles,
+ COUNT(0) AS imgCount
+ FROM
+ imgs
+ WHERE
+ approximateResourceWidth > 1 AND
+ approximateResourceHeight > 1
+ GROUP BY
+ client,
+ resourceFormat
+)
+
+SELECT
+ percentile,
+ client,
+ resourceFormat,
+ imgCount,
+ resourceWidthPercentiles[OFFSET(percentile * 10)] AS resourceWidth,
+ resourceHeightPercentiles[OFFSET(percentile * 10)] AS resourceHeight,
+ aspectRatioPercentiles[OFFSET(percentile * 10)] AS aspectRatio,
+ megapixelsPercentiles[OFFSET(percentile * 10)] AS megapixels,
+ byteSizePercentiles[OFFSET(percentile * 10)] AS byteSize,
+ bitsPerPixelPercentiles[OFFSET(percentile * 10)] AS bitsPerPixel
+FROM
+ percentiles,
+ UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile
+ORDER BY
+ imgCount DESC,
+ percentile
diff --git a/sql/2024/media/color_spaces_and_depth.sql b/sql/2024/media/color_spaces_and_depth.sql
new file mode 100644
index 00000000000..3ba42cbac07
--- /dev/null
+++ b/sql/2024/media/color_spaces_and_depth.sql
@@ -0,0 +1,74 @@
+#standardSQL
+# info on color depth and spaces – will need to be further filtered/aggregated, but this is everything
+# color_spaces_and_depth.sql
+
+CREATE TEMPORARY FUNCTION magickMillions(imageMagickNumberString STRING)
+RETURNS FLOAT64
+LANGUAGE js AS r'''
+
+if (!imageMagickNumberString) { return 0; }
+const matched = imageMagickNumberString.match( /(\d+)\.?(\d+)?M$/ );
+if ( matched && matched[1] ) {
+ if ( matched[2] ) {
+ // input had a decimal (e.g. "1.23456M")
+ return `${ matched[1] }.${ matched[2] }e6`;
+ } else {
+ // input did not have a decimal (e.g. "1M")
+ return `${ matched[1] }e6`;
+ }
+} else {
+ return imageMagickNumberString;
+}
+
+''';
+
+WITH color_info AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ JSON_EXTRACT(payload, '$._image_details.magick.depth') AS color_depth,
+ JSON_EXTRACT(payload, '$._image_details.magick.colorspace') AS color_space,
+ JSON_EXTRACT(payload, '$._image_details.magick.properties.icc:description') AS description
+ FROM
+ `requests.2024_06_01_*`
+ WHERE
+ JSON_QUERY(payload, '$._image_details.magick.colorspace') IS NOT NULL AND
+ magickMillions(JSON_VALUE(payload, '$._image_details.magick.numberPixels')) > 1
+),
+
+totals AS (
+ SELECT
+ client,
+ COUNT(0) AS total
+ FROM
+ color_info
+ GROUP BY
+ client
+),
+
+counts AS (
+ SELECT
+ client,
+ color_depth,
+ color_space,
+ description,
+ COUNT(0) AS count
+ FROM
+ color_info
+ GROUP BY
+ client,
+ color_depth,
+ color_space,
+ description
+)
+
+SELECT
+ *,
+ count / total AS pct
+FROM
+ counts
+JOIN
+ totals
+USING
+ (client)
+ORDER BY
+ count DESC
diff --git a/sql/2024/media/image_0x0.sql b/sql/2024/media/image_0x0.sql
new file mode 100644
index 00000000000..f23f2c2e0a8
--- /dev/null
+++ b/sql/2024/media/image_0x0.sql
@@ -0,0 +1,48 @@
+#standardSQL
+# s whose sources have no dimensions (mostly not loaded because loading=lazy)
+# image_0x0.sql
+
+CREATE TEMPORARY FUNCTION getPixelInfo(responsiveImagesJsonString STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+const parsed = JSON.parse(responsiveImagesJsonString);
+if (parsed && parsed.map) {
+ const dataRegEx = new RegExp('^data');
+ return parsed.map(d => ({
+ isPixel: d.approximateResourceWidth == 0 && d.approximateResourceHeight == 0,
+ isDataURL: dataRegEx.test(d.url)
+ }));
+}
+''';
+
+WITH imgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ isPixel,
+ isDataURL
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getPixelInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')))
+),
+
+counts AS (
+ SELECT
+ client,
+ COUNT(0) AS total_imgs,
+ COUNTIF(isPixel) AS zero_pixel_imgs,
+ COUNTIF(isPixel AND isDataURL) AS zero_pixel_data_urls
+ FROM
+ imgs
+ GROUP BY
+ client
+)
+
+SELECT
+ client,
+ total_imgs,
+ zero_pixel_imgs,
+ zero_pixel_data_urls,
+ SAFE_DIVIDE(zero_pixel_imgs, total_imgs) AS pct_zero_pixel_imgs,
+ SAFE_DIVIDE(zero_pixel_data_urls, total_imgs) AS pct_zero_pixel_data_urls
+FROM
+ counts
diff --git a/sql/2024/media/image_1x1.sql b/sql/2024/media/image_1x1.sql
new file mode 100644
index 00000000000..9c78abd2649
--- /dev/null
+++ b/sql/2024/media/image_1x1.sql
@@ -0,0 +1,48 @@
+#standardSQL
+# s whose sources are 1x1 (tracking beacons, mostly)
+# image_1x1.sql
+
+CREATE TEMPORARY FUNCTION getPixelInfo(responsiveImagesJsonString STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+const parsed = JSON.parse(responsiveImagesJsonString);
+if (parsed && parsed.map) {
+ const dataRegEx = new RegExp('^data');
+ return parsed.map(d => ({
+ isPixel: d.approximateResourceWidth == 1 && d.approximateResourceHeight == 1,
+ isDataURL: dataRegEx.test(d.url)
+ }));
+}
+''';
+
+WITH imgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ isPixel,
+ isDataURL
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getPixelInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')))
+),
+
+counts AS (
+ SELECT
+ client,
+ COUNT(0) AS total_imgs,
+ COUNTIF(isPixel) AS one_pixel_imgs,
+ COUNTIF(isPixel AND isDataURL) AS one_pixel_data_urls
+ FROM
+ imgs
+ GROUP BY
+ client
+)
+
+SELECT
+ client,
+ total_imgs,
+ one_pixel_imgs,
+ one_pixel_data_urls,
+ SAFE_DIVIDE(one_pixel_imgs, total_imgs) AS pct_one_pixel_imgs,
+ SAFE_DIVIDE(one_pixel_data_urls, total_imgs) AS pct_one_pixel_data_urls
+FROM
+ counts
diff --git a/sql/2024/media/image_alt.sql b/sql/2024/media/image_alt.sql
new file mode 100644
index 00000000000..7f722189b7b
--- /dev/null
+++ b/sql/2024/media/image_alt.sql
@@ -0,0 +1,49 @@
+#standardSQL
+# usage of alt text in images
+# image_alt.sql
+
+CREATE TEMPORARY FUNCTION get_markup_info(markup_string STRING)
+RETURNS STRUCT<
+ total INT64,
+ alt_missing INT64,
+ alt_blank INT64,
+ alt_present INT64,
+ decode_lazy INT64
+> LANGUAGE js AS '''
+var result = {};
+try {
+ var markup = JSON.parse(markup_string);
+
+ if (Array.isArray(markup) || typeof markup != 'object') return result;
+
+ result.total = markup.images.img.total;
+ result.alt_missing = markup.images.img.alt.missing;
+ result.alt_blank = markup.images.img.alt.blank;
+ result.alt_present = markup.images.img.alt.present;
+ result.decode_lazy = markup.images.img.decoding || 0;
+
+} catch (e) {}
+return result;
+''';
+
+SELECT
+ client,
+ SAFE_DIVIDE(COUNTIF(markup_info.total > 0), COUNT(0)) AS pages_with_img_pct,
+ SAFE_DIVIDE(COUNTIF(markup_info.alt_missing > 0), COUNT(0)) AS pages_with_alt_missing_pct,
+ SAFE_DIVIDE(COUNTIF(markup_info.alt_blank > 0), COUNT(0)) AS pages_with_alt_blank_pct,
+ SAFE_DIVIDE(COUNTIF(markup_info.alt_present > 0), COUNT(0)) AS pages_with_alt_present_pct,
+ SUM(markup_info.total) AS img_total,
+ SAFE_DIVIDE(SUM(markup_info.alt_missing), SUM(markup_info.total)) AS imgs_alt_missing_pct,
+ SAFE_DIVIDE(SUM(markup_info.alt_blank), SUM(markup_info.total)) AS img_alt_blank_pct,
+ SAFE_DIVIDE(SUM(markup_info.alt_present), SUM(markup_info.total)) AS img_alt_present_pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ url,
+ get_markup_info(JSON_EXTRACT_SCALAR(payload, '$._markup')) AS markup_info
+ FROM
+ `httparchive.pages.2024_06_01_*`)
+GROUP BY
+ client
+ORDER BY
+ client
diff --git a/sql/2024/media/image_cdn_usage.sql b/sql/2024/media/image_cdn_usage.sql
new file mode 100644
index 00000000000..ef4540b7b5e
--- /dev/null
+++ b/sql/2024/media/image_cdn_usage.sql
@@ -0,0 +1,37 @@
+#standardSQL
+# Looking for some obvious signs in the URL that the image is dynamically resized
+# image_cdn_usage.sql
+
+CREATE TEMPORARY FUNCTION get_images(images_string STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+var result = [];
+try {
+ var images = JSON.parse(images_string);
+ for (const img of images){
+ result.push({
+ url: img.url
+ })
+ }
+} catch (e) {}
+return result;
+''';
+
+SELECT
+ client,
+ COUNT(DISTINCT pageUrl) AS pages,
+ COUNT(0) AS images,
+ SAFE_DIVIDE(COUNTIF(imgcdn1), COUNT(0)) AS img_with_cdn1_pct,
+ SAFE_DIVIDE(COUNTIF(imgcdn2), COUNT(0)) AS img_with_cdn2_pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ a.url AS pageUrl,
+ imageurl.url,
+ REGEXP_CONTAINS(imageurl.url, r'.*[,\/]w_\d+.*') AS imgcdn1,
+ REGEXP_CONTAINS(imageurl.url, r'\?.*w=.*') AS imgcdn2
+ FROM
+ `httparchive.pages.2024_06_01_*` AS a,
+ UNNEST(get_images(JSON_EXTRACT_SCALAR(payload, '$._Images'))) AS imageurl)
+GROUP BY
+ client
diff --git a/sql/2024/media/image_decode_usage.sql b/sql/2024/media/image_decode_usage.sql
new file mode 100644
index 00000000000..e793b6285a2
--- /dev/null
+++ b/sql/2024/media/image_decode_usage.sql
@@ -0,0 +1,48 @@
+#standardSQL
+# What percent of s have decode=async?
+# image_decode_usage.sql
+
+CREATE TEMPORARY FUNCTION get_decode_info(images_string STRING)
+RETURNS STRUCT<
+ total INT64,
+ decode_async INT64
+> LANGUAGE js AS '''
+let result = {};
+try {
+ let images = JSON.parse(images_string);
+ if (!Array.isArray(images)) {
+ return {};
+ }
+
+ result.total = images.length;
+ result.decode_async = 0;
+
+ for(let img of images) {
+ if(img.decoding === 'async'){
+ result.decode_async += 1
+ }
+ }
+} catch (e) {}
+return result;
+''';
+
+SELECT
+ client,
+ COUNT(0) AS pages_total,
+ SAFE_DIVIDE(COUNTIF(images_info.total > 0), COUNT(0)) AS pages_with_img_pct,
+ COUNTIF(images_info.decode_async > 0) AS pages_with_decode_async,
+ SAFE_DIVIDE(COUNTIF(images_info.decode_async > 0), COUNT(0)) AS pages_with_decode_async_pct,
+ SUM(images_info.total) AS img_total,
+ SUM(images_info.decode_async) AS imgs_with_decode_async,
+ SAFE_DIVIDE(SUM(images_info.decode_async), SUM(images_info.total)) AS imgs_with_decode_async_pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ url,
+ get_decode_info(JSON_EXTRACT_SCALAR(payload, '$._Images')) AS images_info
+ FROM
+ `httparchive.pages.2024_06_01_*`)
+GROUP BY
+ client
+ORDER BY
+ client
diff --git a/sql/2024/media/image_layout_widths.sql b/sql/2024/media/image_layout_widths.sql
new file mode 100644
index 00000000000..8b81c957f21
--- /dev/null
+++ b/sql/2024/media/image_layout_widths.sql
@@ -0,0 +1,42 @@
+#standardSQL
+# Distribution of image layout widths
+# image_layout_widths.sql
+
+CREATE TEMPORARY FUNCTION layoutDimensions(payload STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+try {
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.map(({clientWidth, clientHeight}) => ({
+ clientWidth,
+ clientHeight
+ }));
+} catch (e) {
+ return [];
+}
+''';
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(image.clientWidth, 1000)[OFFSET(percentile * 10)] AS clientWidth
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ image
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(layoutDimensions(payload)) AS image
+ WHERE
+ image.clientWidth > 1
+),
+UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/image_sizes_attribute_strings.sql b/sql/2024/media/image_sizes_attribute_strings.sql
new file mode 100644
index 00000000000..ea4df6040a0
--- /dev/null
+++ b/sql/2024/media/image_sizes_attribute_strings.sql
@@ -0,0 +1,24 @@
+#standardSQL
+# What are the most common values for sizes attributes (lots of auto!)
+# ❕ Updated in 2024 to use all.requests instead of almanac.summary_response_bodies
+# image_sizes_attribute_strings.sql
+
+SELECT
+ client,
+ sizes,
+ COUNT(0) AS freq,
+ SUM(COUNT(0)) OVER (PARTITION BY 0) AS total,
+ COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY 0) AS pct
+FROM
+ `httparchive.all.requests`,
+ UNNEST(REGEXP_EXTRACT_ALL(response_body, r'(?im)<(?:source|img)[^>]*sizes=[\'"]?([^\'"]*)')) AS sizes
+WHERE
+ date = '2024-06-01' AND
+ is_main_document
+GROUP BY
+ client,
+ sizes
+ORDER BY
+ freq DESC
+LIMIT
+ 100
diff --git a/sql/2024/media/image_sizes_per_page_accuracy_impact.sql b/sql/2024/media/image_sizes_per_page_accuracy_impact.sql
new file mode 100644
index 00000000000..0c5c08f9df3
--- /dev/null
+++ b/sql/2024/media/image_sizes_per_page_accuracy_impact.sql
@@ -0,0 +1,89 @@
+#standardSQL
+# measure the impact of innacurate sizes attributes per page
+# image_sizes_per_page_accuracy_impact.sql
+
+CREATE TEMPORARY FUNCTION pageUsesWDescriptors(payload STRING)
+RETURNS BOOL
+LANGUAGE js AS '''
+try {
+
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.reduce( ( acc, cv ) => {
+ if ( cv.srcsetHasWDescriptors ) {
+ acc = true;
+ }
+ return acc;
+ }, false );
+
+} catch (e) {
+ return false;
+}
+''';
+
+CREATE TEMPORARY FUNCTION getTotalEstimatedWastedLoadedPixels(payload STRING)
+RETURNS INT64
+LANGUAGE js AS '''
+try {
+
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.reduce( ( acc, cv ) => {
+ if ( cv.actualSizesEstimatedWastedLoadedPixels > 0 ) {
+ acc += cv.actualSizesEstimatedWastedLoadedPixels;
+ }
+ return acc;
+ }, 0 );
+
+} catch (e) {
+ return null;
+}
+''';
+
+CREATE TEMPORARY FUNCTION getTotalEstimatedWastedLoadedBytes(payload STRING)
+RETURNS FLOAT64
+LANGUAGE js AS '''
+try {
+
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.reduce( ( acc, cv ) => {
+ if ( cv.actualSizesEstimatedWastedLoadedBytes > 0 ) {
+ acc += cv.actualSizesEstimatedWastedLoadedBytes;
+ }
+ return acc;
+ }, 0 );
+
+} catch (e) {
+ return null;
+}
+''';
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(totalEstimatedWastedLoadedPixels, 1000)[OFFSET(percentile * 10)] AS totalEstimatedWastedLoadedPixels,
+ APPROX_QUANTILES(totalEstimatedWastedLoadedBytes, 1000)[OFFSET(percentile * 10)] AS totalEstimatedWastedLoadedBytes
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ getTotalEstimatedWastedLoadedPixels(payload) AS totalEstimatedWastedLoadedPixels,
+ getTotalEstimatedWastedLoadedBytes(payload) AS totalEstimatedWastedLoadedBytes
+ FROM
+ `httparchive.pages.2024_06_01_*`
+ WHERE
+ pageUsesWDescriptors(payload) = TRUE
+),
+UNNEST([10, 25, 50, 75, 90]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/image_sizing_extrinsic_intrinsic.sql b/sql/2024/media/image_sizing_extrinsic_intrinsic.sql
new file mode 100644
index 00000000000..41d75a1e0eb
--- /dev/null
+++ b/sql/2024/media/image_sizing_extrinsic_intrinsic.sql
@@ -0,0 +1,37 @@
+#standardSQL
+# Are s sized extrinsically or intrinsically (both height and width)?
+# image_sizing_extrinsic_intrinsic.sql
+
+CREATE TEMPORARY FUNCTION getImageSizing(payload STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+try {
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.flatMap(({intrinsicOrExtrinsicSizing}) => ([
+ {property: 'width', value: intrinsicOrExtrinsicSizing.width},
+ {property: 'height', value: intrinsicOrExtrinsicSizing.height}
+ ]));
+} catch (e) {
+ return [];
+}
+''';
+
+SELECT
+ _TABLE_SUFFIX AS client,
+ image.property,
+ image.value,
+ COUNT(0) AS freq,
+ SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, image.property) AS total,
+ COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, image.property) AS pct
+FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getImageSizing(payload)) AS image
+GROUP BY
+ client,
+ property,
+ value
+ORDER BY
+ pct DESC
diff --git a/sql/2024/media/image_srcset_candidates.sql b/sql/2024/media/image_srcset_candidates.sql
new file mode 100644
index 00000000000..58ed7a7b558
--- /dev/null
+++ b/sql/2024/media/image_srcset_candidates.sql
@@ -0,0 +1,39 @@
+#standardSQL
+# distribution of number of srcset candidates
+# image_srcset_candidates.sql
+
+CREATE TEMPORARY FUNCTION getNumberOfSrcsetCandidates(payload STRING)
+RETURNS ARRAY
+LANGUAGE js AS '''
+try {
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.map(({srcsetCandidateDensities}) =>
+ (srcsetCandidateDensities && srcsetCandidateDensities.length) ? srcsetCandidateDensities.length : null
+ );
+} catch (e) {
+ return [];
+}
+''';
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(numberOfCandidates, 1000)[OFFSET(percentile * 10)] AS numberOfCandidates
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ numberOfCandidates
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getNumberOfSrcsetCandidates(payload)) AS numberOfCandidates
+),
+UNNEST([10, 25, 50, 75, 90, 100]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/image_srcset_densities.sql b/sql/2024/media/image_srcset_densities.sql
new file mode 100644
index 00000000000..94457ad7847
--- /dev/null
+++ b/sql/2024/media/image_srcset_densities.sql
@@ -0,0 +1,41 @@
+#standardSQL
+# distribution of the densities of srcset candidates
+# image_srcset_densities.sql
+
+CREATE TEMPORARY FUNCTION getSrcsetDensities(payload STRING)
+RETURNS ARRAY>>
+LANGUAGE js AS '''
+try {
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.map(({currentSrcDensity, srcsetCandidateDensities}) => ({
+ currentSrcDensity,
+ srcsetCandidateDensities: srcsetCandidateDensities.map(density => Math.round(density * 100) / 100)
+ }));
+} catch (e) {
+ return [];
+}
+''';
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(image.currentSrcDensity, 1000)[OFFSET(percentile * 10)] AS currentSrcDensity,
+ APPROX_QUANTILES(srcsetCandidateDensity, 1000)[OFFSET(percentile * 10)] AS srcsetCandidateDensity
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ image
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getSrcsetDensities(payload)) AS image),
+ UNNEST(image.srcsetCandidateDensities) AS srcsetCandidateDensity,
+ UNNEST([10, 25, 50, 75, 90]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/image_srcset_descriptor.sql b/sql/2024/media/image_srcset_descriptor.sql
new file mode 100644
index 00000000000..91f7141f85a
--- /dev/null
+++ b/sql/2024/media/image_srcset_descriptor.sql
@@ -0,0 +1,41 @@
+#standardSQL
+# images with srcset descriptor_x descriptor_w
+# image_srcset_descriptor.sql
+
+CREATE TEMPORARY FUNCTION get_media_info(media_string STRING)
+RETURNS STRUCT<
+ num_srcset_all INT64,
+ num_srcset_descriptor_x INT64,
+ num_srcset_descriptor_w INT64
+> LANGUAGE js AS '''
+var result = {};
+try {
+ var media = JSON.parse(media_string);
+
+ if (Array.isArray(media) || typeof media != 'object') return result;
+
+ result.num_srcset_all = media.num_srcset_all;
+ result.num_srcset_descriptor_x = media.num_srcset_descriptor_x;
+ result.num_srcset_descriptor_w = media.num_srcset_descriptor_w;
+
+} catch (e) {}
+return result;
+''';
+
+SELECT
+ client,
+ SAFE_DIVIDE(COUNTIF(media_info.num_srcset_all > 0), COUNT(0)) AS pages_with_srcset_pct,
+ SAFE_DIVIDE(COUNTIF(media_info.num_srcset_descriptor_x > 0), COUNT(0)) AS pages_with_srcset_descriptor_x_pct,
+ SAFE_DIVIDE(COUNTIF(media_info.num_srcset_descriptor_w > 0), COUNT(0)) AS pages_with_srcset_descriptor_w_pct,
+ SAFE_DIVIDE(SUM(media_info.num_srcset_descriptor_x), SUM(media_info.num_srcset_all)) AS instances_of_srcset_descriptor_x_pct,
+ SAFE_DIVIDE(SUM(media_info.num_srcset_descriptor_w), SUM(media_info.num_srcset_all)) AS instances_of_srcset_descriptor_w_pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ get_media_info(JSON_EXTRACT_SCALAR(payload, '$._media')) AS media_info
+ FROM
+ `httparchive.pages.2024_06_01_*`)
+GROUP BY
+ client
+ORDER BY
+ client
diff --git a/sql/2024/media/image_srcset_sizes.sql b/sql/2024/media/image_srcset_sizes.sql
new file mode 100644
index 00000000000..7bc616039dd
--- /dev/null
+++ b/sql/2024/media/image_srcset_sizes.sql
@@ -0,0 +1,39 @@
+#standardSQL
+# images (and pages) with srcset w/wo sizes
+# image_srcset_sizes.sql
+
+CREATE TEMPORARY FUNCTION get_media_info(media_string STRING)
+RETURNS STRUCT<
+ num_srcset_all INT64,
+ num_srcset_sizes INT64
+> LANGUAGE js AS '''
+var result = {};
+try {
+ var media = JSON.parse(media_string);
+
+ if (Array.isArray(media) || typeof media != 'object') return result;
+
+ result.num_srcset_all = media.num_srcset_all;
+ result.num_srcset_sizes = media.num_srcset_sizes;
+
+} catch (e) {}
+return result;
+''';
+
+SELECT
+ client,
+ SAFE_DIVIDE(COUNTIF(media_info.num_srcset_all > 0), COUNT(0)) AS pages_with_srcset_pct,
+ SAFE_DIVIDE(COUNTIF(media_info.num_srcset_sizes > 0), COUNT(0)) AS pages_with_srcset_sizes_pct,
+ SAFE_DIVIDE((COUNTIF(media_info.num_srcset_all > 0) - COUNTIF(media_info.num_srcset_sizes > 0)), COUNT(0)) AS pages_with_srcset_wo_sizes_pct,
+ SAFE_DIVIDE(SUM(media_info.num_srcset_sizes), SUM(media_info.num_srcset_all)) AS instances_of_srcset_sizes_pct,
+ SAFE_DIVIDE((SUM(media_info.num_srcset_all) - SUM(media_info.num_srcset_sizes)), SUM(media_info.num_srcset_all)) AS instances_of_srcset_wo_sizes_pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ get_media_info(JSON_EXTRACT_SCALAR(payload, '$._media')) AS media_info
+ FROM
+ `httparchive.pages.2024_06_01_*`)
+GROUP BY
+ client
+ORDER BY
+ client
diff --git a/sql/2024/media/image_srcset_sizes_accuracy_pct.sql b/sql/2024/media/image_srcset_sizes_accuracy_pct.sql
new file mode 100644
index 00000000000..fdbd99bd1d3
--- /dev/null
+++ b/sql/2024/media/image_srcset_sizes_accuracy_pct.sql
@@ -0,0 +1,39 @@
+#standardSQL
+# How accurate are srcset and sizes attributes, and what is the impact of sizes innacuracies on srcset selection?
+# image_srcset_sizes_accuracy_pct.sql
+
+CREATE TEMPORARY FUNCTION getSrcsetSizesAccuracy(payload STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+try {
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.map(({srcsetHasWDescriptors, sizesAbsoluteError, sizesRelativeError, wDescriptorAbsoluteError, actualSizesEstimatedWastedLoadedPixels}) => ({
+ srcsetHasWDescriptors,
+ sizesAbsoluteError,
+ sizesRelativeError,
+ wDescriptorAbsoluteError,
+ actualSizesEstimatedWastedLoadedPixels
+ }));
+} catch (e) {
+ return [];
+}
+''';
+
+SELECT
+ _TABLE_SUFFIX AS client,
+ COUNTIF(image.sizesRelativeError > 0.05) AS some_sizes_error,
+ COUNTIF(image.wDescriptorAbsoluteError > 0) AS any_w_descriptor_error,
+ COUNTIF(image.actualSizesEstimatedWastedLoadedPixels > 0) AS sizes_error_impacted_srcset_selection,
+ COUNT(0) AS total_images,
+ COUNTIF(image.srcsetHasWDescriptors) AS total_images_with_w_descriptors,
+ COUNTIF(image.sizesRelativeError > 0.05) / COUNTIF(image.srcsetHasWDescriptors) AS pct_some_sizes_error,
+ COUNTIF(image.wDescriptorAbsoluteError > 0) / COUNTIF(image.srcsetHasWDescriptors) AS pct_any_w_descriptor_error,
+ COUNTIF(image.actualSizesEstimatedWastedLoadedPixels > 0) / COUNTIF(image.srcsetHasWDescriptors) AS pct_sizes_error_impacted_srcset_selection
+FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getSrcsetSizesAccuracy(payload)) AS image
+GROUP BY
+ client
diff --git a/sql/2024/media/image_srcset_sizes_per_image_accuracy_impact.sql b/sql/2024/media/image_srcset_sizes_per_image_accuracy_impact.sql
new file mode 100644
index 00000000000..825d0023a35
--- /dev/null
+++ b/sql/2024/media/image_srcset_sizes_per_image_accuracy_impact.sql
@@ -0,0 +1,67 @@
+#standardSQL
+# measure the impact of innacurate sizes attributes per
+# image_srcset_sizes_per_image_accuracy_impact.sql
+
+CREATE TEMPORARY FUNCTION getSrcsetSizesAccuracy(payload STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+try {
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.map(({
+ sizesAbsoluteError,
+ sizesRelativeError,
+ wDescriptorAbsoluteError,
+ wDescriptorRelativeError,
+ idealSizesSelectedResourceEstimatedPixels,
+ actualSizesEstimatedWastedLoadedPixels,
+ actualSizesEstimatedWastedLoadedBytes
+ }) => {
+ let wastedLoadedPercent;
+ if ( idealSizesSelectedResourceEstimatedPixels > 0 ) {
+ wastedLoadedPercent = actualSizesEstimatedWastedLoadedPixels / idealSizesSelectedResourceEstimatedPixels;
+ } else {
+ wastedLoadedPercent = null;
+ }
+ return {
+ sizesAbsoluteError,
+ sizesRelativeError,
+ wDescriptorAbsoluteError,
+ wDescriptorRelativeError,
+ actualSizesEstimatedWastedLoadedPixels,
+ actualSizesEstimatedWastedLoadedBytes,
+ wastedLoadedPercent
+ };
+ }
+);
+} catch (e) {
+ return [];
+}
+''';
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(image.sizesAbsoluteError, 1000)[OFFSET(percentile * 10)] AS sizesAbsoluteError,
+ APPROX_QUANTILES(image.sizesRelativeError, 1000)[OFFSET(percentile * 10)] AS sizesRelativeError,
+ APPROX_QUANTILES(image.wDescriptorAbsoluteError, 1000)[OFFSET(percentile * 10)] AS wDescriptorAbsoluteError,
+ APPROX_QUANTILES(image.wDescriptorRelativeError, 1000)[OFFSET(percentile * 10)] AS wDescriptorRelativeError,
+ APPROX_QUANTILES(image.actualSizesEstimatedWastedLoadedPixels, 1000)[OFFSET(percentile * 10)] AS actualSizesEstimatedWastedLoadedPixels,
+ APPROX_QUANTILES(image.actualSizesEstimatedWastedLoadedBytes, 1000)[OFFSET(percentile * 10)] AS actualSizesEstimatedWastedLoadedBytes,
+ APPROX_QUANTILES(image.wastedLoadedPercent, 1000)[OFFSET(percentile * 10)] AS wastedLoadedPercent
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ image
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getSrcsetSizesAccuracy(payload)) AS image),
+ UNNEST([10, 25, 50, 75, 90]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/img_sizes_explicit_errors.sql b/sql/2024/media/img_sizes_explicit_errors.sql
new file mode 100644
index 00000000000..f2c7db437a2
--- /dev/null
+++ b/sql/2024/media/img_sizes_explicit_errors.sql
@@ -0,0 +1,44 @@
+#standardSQL
+# how many sizes attributes had explicit errors?
+# also how many were implicit (100vw)
+# img_sizes_explicit_errors.sql
+
+CREATE TEMPORARY FUNCTION get_responsive_settings(images_string STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+let result = [];
+try {
+const images_ = JSON.parse(images_string);
+if (images_ && images_["responsive-images"]) {
+ const images = images_["responsive-images"];
+ for(const img of images) {
+ result.push({
+ sizes: img.hasSizes || false,
+ srcsetHasWDescriptors: img.srcsetHasWDescriptors || false,
+ sizesWasImplicit: img.sizesWasImplicit || false,
+ sizesParseError: img.sizesParseError || false
+ })
+ }
+}
+} catch (e) {}
+return result;
+''';
+SELECT
+ client,
+ COUNT(0) AS images_with_sizes,
+ SAFE_DIVIDE(COUNTIF(respimg.sizesWasImplicit = TRUE), COUNT(0)) AS implicit_pct,
+ SAFE_DIVIDE(COUNTIF(respimg.sizesWasImplicit = FALSE), COUNT(0)) AS explicit_pct,
+ SAFE_DIVIDE(COUNTIF(respimg.sizesParseError = TRUE), COUNT(0)) AS parseError_pct,
+ SAFE_DIVIDE(COUNTIF(respimg.srcsetHasWDescriptors = TRUE), COUNT(0)) AS wDescriptor_pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ a.url AS pageUrl,
+ respimg
+ FROM
+ `httparchive.pages.2024_06_01_*` AS a,
+ UNNEST(get_responsive_settings(JSON_EXTRACT_SCALAR(payload, '$._responsive_images'))) AS respimg
+ WHERE
+ respimg.srcsetHasWDescriptors)
+GROUP BY
+ client
diff --git a/sql/2024/media/img_with_dimensions.sql b/sql/2024/media/img_with_dimensions.sql
new file mode 100644
index 00000000000..0b8b1caf05d
--- /dev/null
+++ b/sql/2024/media/img_with_dimensions.sql
@@ -0,0 +1,53 @@
+#standardSQL
+# extracts the data about width, height and alt
+# using this, counts and reports on the usage for each attribute
+# img_with_dimensions.sql
+
+CREATE TEMPORARY FUNCTION get_image_info(responsiveString STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+try {
+ let result = Array()
+ const responsiveImages = JSON.parse(responsiveString)
+ if(responsiveImages && responsiveImages['responsive-images']){
+ for(const image of responsiveImages["responsive-images"]){
+ if ( image.approximateResourceWidth > 1 || image.approximateResourceHeight > 1 ) {
+ result.push({
+ hasWidth: image.hasWidth ? 1 : 0,
+ hasHeight: image.hasHeight ? 1 : 0,
+ hasAlt: image.hasAlt ? 1 : 0,
+ hasReservedLayoutDimension: image.reservedLayoutDimensions ? 1 : 0
+ })
+ }
+ }}
+ return result
+} catch(e) {
+ return [];
+}
+''';
+
+SELECT
+ client,
+ COUNT(0) AS images,
+ COUNTIF(hasWidth = 1) AS hasWidth,
+ COUNTIF(hasHeight = 1) AS hasHeight,
+ COUNTIF(hasWidth = 1 AND hasHeight = 1) AS hasBoth,
+ COUNTIF(hasAlt = 1) AS hasAlt,
+ COUNTIF(hasReservedLayoutDimension = 1) AS hasDimensions,
+ SAFE_DIVIDE(COUNTIF(hasWidth = 1), COUNT(0)) AS percHasWidth,
+ SAFE_DIVIDE(COUNTIF(hasHeight = 1), COUNT(0)) AS percHasHeight,
+ SAFE_DIVIDE(COUNTIF(hasWidth = 1 AND hasHeight = 1), COUNT(0)) AS percHasBoth,
+ SAFE_DIVIDE(COUNTIF(hasAlt = 1), COUNT(0)) AS percHasAlt,
+ SAFE_DIVIDE(COUNTIF(hasReservedLayoutDimension = 1), COUNT(0)) AS percHasReservedLayoutDimensions
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ hasWidth,
+ hasHeight,
+ hasAlt,
+ hasReservedLayoutDimension
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(get_image_info(JSON_VALUE(payload, '$._responsive_images'))))
+GROUP BY
+ client
diff --git a/sql/2024/media/img_xdomain.sql b/sql/2024/media/img_xdomain.sql
new file mode 100644
index 00000000000..e19e305173c
--- /dev/null
+++ b/sql/2024/media/img_xdomain.sql
@@ -0,0 +1,36 @@
+#standardSQL
+# Cross domain image requests
+# Note to the future this would be more interesting if it looked at origins
+# img_xdomain.sql
+
+CREATE TEMPORARY FUNCTION get_images(images_string STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+var result = [];
+try {
+ var images = JSON.parse(images_string);
+ for (img of images){
+ result.push({
+ url: img.url
+ });
+ }
+} catch (e) {}
+return result;
+''';
+SELECT
+ client,
+ COUNT(DISTINCT pageUrl) AS pages,
+ count(0) AS images,
+ SAFE_DIVIDE(COUNTIF(pageDomain = imageDomain), COUNT(0)) AS img_xdomain_pct,
+ SAFE_DIVIDE(COUNTIF(pageDomain != imageDomain), COUNT(0)) AS img_samedomain_pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ a.url AS pageUrl,
+ FORMAT('%T', NET.REG_DOMAIN(a.url)) AS pageDomain,
+ FORMAT('%T', NET.REG_DOMAIN(imageurl.url)) AS imageDomain
+ FROM
+ `httparchive.pages.2024_06_01_*` a,
+ UNNEST(get_images(JSON_EXTRACT_SCALAR(payload, '$._Images'))) AS imageurl)
+GROUP BY
+ client
diff --git a/sql/2024/media/imgs_per_page.sql b/sql/2024/media/imgs_per_page.sql
new file mode 100644
index 00000000000..875dbbd3467
--- /dev/null
+++ b/sql/2024/media/imgs_per_page.sql
@@ -0,0 +1,39 @@
+#standardSQL
+# Distribution of number-of-images per page
+# imgs_per_page.sql
+
+CREATE TEMPORARY FUNCTION numberOfImages(images_string STRING)
+RETURNS INT64
+LANGUAGE js AS '''
+try {
+ return JSON.parse(images_string).filter( i => parseInt(i.approximateResourceWidth) > 1 && parseInt(i.approximateResourceWidth) > 1 ).length;
+} catch {
+ return 0;
+}
+''';
+
+WITH numImgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ numberOfImages(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')) AS numberOfImages
+ FROM
+ `httparchive.pages.2024_06_01_*`
+),
+
+percentiles AS (
+ SELECT
+ client,
+ APPROX_QUANTILES(numberOfImages, 1000) AS numberOfImagesPercentiles
+ FROM
+ numImgs
+ GROUP BY
+ client
+)
+
+SELECT
+ client,
+ percentile,
+ numberOfImagesPercentiles[OFFSET(percentile * 10)] AS numberOfImages
+FROM
+ percentiles,
+ UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile
diff --git a/sql/2024/media/largest_image_per_page_bytes.sql b/sql/2024/media/largest_image_per_page_bytes.sql
new file mode 100644
index 00000000000..106d9dbd457
--- /dev/null
+++ b/sql/2024/media/largest_image_per_page_bytes.sql
@@ -0,0 +1,44 @@
+#standardSQL
+# what's the largest image per page? (by bytes)
+# largest_image_per_page_bytes.sql
+
+CREATE TEMPORARY FUNCTION largestImage(payload STRING)
+RETURNS INT64
+LANGUAGE js AS '''
+try {
+
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.reduce( ( acc, cv ) => {
+ const bytes = cv.byteSize;
+ if ( bytes > acc ) {
+ acc = bytes;
+ }
+ return acc;
+ }, 0 );
+
+} catch (e) {
+ return 0;
+}
+''';
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(largestImageBytes, 1000)[OFFSET(percentile * 10)] AS largestImageBytesize
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ largestImage(payload) AS largestImageBytes
+ FROM
+ `httparchive.pages.2024_06_01_*`
+),
+UNNEST([10, 25, 50, 75, 90]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/largest_image_per_page_layout.sql b/sql/2024/media/largest_image_per_page_layout.sql
new file mode 100644
index 00000000000..1bb7d39e452
--- /dev/null
+++ b/sql/2024/media/largest_image_per_page_layout.sql
@@ -0,0 +1,44 @@
+#standardSQL
+# what's the largest image per page? (by *css* pixels)
+# largest_image_per_page_layout.sql
+
+CREATE TEMPORARY FUNCTION widestImage(payload STRING)
+RETURNS INT64
+LANGUAGE js AS '''
+try {
+
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.reduce( ( acc, cv ) => {
+ const w = cv.clientWidth;
+ if ( w > acc ) {
+ acc = w;
+ }
+ return acc;
+ }, 0 );
+
+} catch (e) {
+ return 0;
+}
+''';
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(widestImageCSSpx, 1000)[OFFSET(percentile * 10)] AS widestImageLayoutWidth
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ widestImage(payload) AS widestImageCSSpx
+ FROM
+ `httparchive.pages.2024_06_01_*`
+),
+UNNEST([10, 25, 50, 75, 90]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/largest_image_per_page_pixels.sql b/sql/2024/media/largest_image_per_page_pixels.sql
new file mode 100644
index 00000000000..9dbf77f2c75
--- /dev/null
+++ b/sql/2024/media/largest_image_per_page_pixels.sql
@@ -0,0 +1,44 @@
+#standardSQL
+# what's the largest image per page? (by pixels)
+# largest_image_per_page_pixels.sql
+
+CREATE TEMPORARY FUNCTION largestImage(payload STRING)
+RETURNS INT64
+LANGUAGE js AS '''
+try {
+
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.reduce( ( acc, cv ) => {
+ const pixels = cv.approximateResourceWidth * cv.approximateResourceHeight;
+ if ( pixels > acc ) {
+ acc = pixels;
+ }
+ return acc;
+ }, 0 );
+
+} catch (e) {
+ return 0;
+}
+''';
+
+SELECT
+ percentile,
+ client,
+ APPROX_QUANTILES(largestImageMegapixels, 1000)[OFFSET(percentile * 10)] AS largestImagePixelCount
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ largestImage(payload) / 1e6 AS largestImageMegapixels
+ FROM
+ `httparchive.pages.2024_06_01_*`
+),
+UNNEST([10, 25, 50, 75, 90]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/lazy_loading_adoption_over_time.sql b/sql/2024/media/lazy_loading_adoption_over_time.sql
new file mode 100644
index 00000000000..408d0d1807b
--- /dev/null
+++ b/sql/2024/media/lazy_loading_adoption_over_time.sql
@@ -0,0 +1,19 @@
+#standardSQL
+# How has lazy-loading adoption changed over time?
+# copied from https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/2021/resource-hints/imgLazy.sql
+# lazy_loading_adoption_over_time.sql
+
+SELECT
+ SUBSTR(_TABLE_SUFFIX, 0, 10) AS date,
+ IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client,
+ COUNT(DISTINCT IF(LOWER(attr) = '"lazy"', url, NULL)) / COUNT(DISTINCT url) AS percent
+FROM
+ `httparchive.pages.*`
+LEFT JOIN
+ UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, "$['_img-loading-attr']"), '$')) AS attr
+GROUP BY
+ date,
+ client
+ORDER BY
+ date DESC,
+ client
diff --git a/sql/2024/media/lcp_element_data.sql b/sql/2024/media/lcp_element_data.sql
new file mode 100644
index 00000000000..8312b4cb146
--- /dev/null
+++ b/sql/2024/media/lcp_element_data.sql
@@ -0,0 +1,96 @@
+#standardSQL
+# LCP element node details
+# How many LCPs had images? (either s or as background images)?
+# How many of those s were lazy-loaded (anti-pattern!)
+# lcp_element_data.sql
+
+CREATE TEMP FUNCTION getLoadingAttr(attributes STRING) RETURNS STRING LANGUAGE js AS '''
+ try {
+ const data = JSON.parse(attributes);
+ const loadingAttr = data.find(attr => attr["name"] === "loading")
+ return loadingAttr.value
+ } catch (e) {
+ return "";
+ }
+''';
+
+CREATE TEMP FUNCTION getDecodingAttr(attributes STRING) RETURNS STRING LANGUAGE js AS '''
+ try {
+ const data = JSON.parse(attributes);
+ const decodingAttr = data.find(attr => attr["name"] === "decoding")
+ return decodingAttr.value
+ } catch (e) {
+ return "";
+ }
+''';
+
+CREATE TEMP FUNCTION getLoadingClasses(attributes STRING) RETURNS STRING LANGUAGE js AS '''
+ try {
+ const data = JSON.parse(attributes);
+ const classes = data.find(attr => attr["name"] === "class").value
+ if (classes.indexOf('lazyload') !== -1) {
+ return classes
+ } else {
+ return ""
+ }
+ } catch (e) {
+ return "";
+ }
+''';
+
+
+
+WITH
+lcp_stats AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ url,
+ JSON_EXTRACT_SCALAR(payload, '$._performance.lcp_elem_stats.nodeName') AS nodeName,
+ JSON_EXTRACT_SCALAR(payload, '$._performance.lcp_elem_stats.url') AS elementUrl,
+ CAST(JSON_EXTRACT_SCALAR(payload, '$._performance.lcp_elem_stats.size') AS INT64) AS size,
+ CAST(JSON_EXTRACT_SCALAR(payload, '$._performance.lcp_elem_stats.loadTime') AS FLOAT64) AS loadTime,
+ CAST(JSON_EXTRACT_SCALAR(payload, '$._performance.lcp_elem_stats.startTime') AS FLOAT64) AS startTime,
+ CAST(JSON_EXTRACT_SCALAR(payload, '$._performance.lcp_elem_stats.renderTime') AS FLOAT64) AS renderTime,
+ JSON_EXTRACT(payload, '$._performance.lcp_elem_stats.attributes') AS attributes,
+ getLoadingAttr(JSON_EXTRACT(payload, '$._performance.lcp_elem_stats.attributes')) AS loading,
+ getDecodingAttr(JSON_EXTRACT(payload, '$._performance.lcp_elem_stats.attributes')) AS decoding,
+ getLoadingClasses(JSON_EXTRACT(payload, '$._performance.lcp_elem_stats.attributes')) AS classWithLazyload
+ FROM
+ `httparchive.pages.2024_06_01_*`
+)
+
+SELECT
+ client,
+ nodeName,
+ COUNT(DISTINCT url) AS pages,
+ ANY_VALUE(total) AS total,
+ COUNT(DISTINCT url) / ANY_VALUE(total) AS pct,
+ COUNTIF(elementUrl != '') AS haveImages,
+ COUNTIF(elementUrl != '') / COUNT(DISTINCT url) AS pct_haveImages,
+ COUNTIF(loading = 'eager') AS native_eagerload,
+ COUNTIF(loading = 'lazy') AS native_lazyload,
+ COUNTIF(classWithLazyload != '') AS lazyload_class,
+ COUNTIF(classWithLazyload != '' OR loading = 'lazy') AS probably_lazyLoaded,
+ COUNTIF(classWithLazyload != '' OR loading = 'lazy') / COUNT(DISTINCT url) AS pct_prob_lazyloaded,
+ COUNTIF(decoding = 'async') AS async_decoding,
+ COUNTIF(decoding = 'sync') AS sync_decoding,
+ COUNTIF(decoding = 'auto') AS auto_decoding
+FROM
+ lcp_stats
+JOIN (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ COUNT(0) AS total
+ FROM
+ `httparchive.summary_pages.2024_06_01_*`
+ GROUP BY
+ _TABLE_SUFFIX)
+USING
+ (client)
+GROUP BY
+ client,
+ nodeName
+HAVING
+ pages > 1000
+ORDER BY
+ pct DESC
diff --git a/sql/2024/media/media_formats.sql b/sql/2024/media/media_formats.sql
new file mode 100644
index 00000000000..ba0ef393800
--- /dev/null
+++ b/sql/2024/media/media_formats.sql
@@ -0,0 +1,54 @@
+#standardSQL
+# Format adoption
+# media_formats.sql
+# ❕ Updated in 2024 to use all.requests instead of almanac.requests
+
+CREATE TEMPORARY FUNCTION fixMimeType(mimeType STRING)
+RETURNS STRING
+LANGUAGE js AS '''
+if (mimeType === "image/avif") {
+ return "avif";
+} else if (mimeType === "image/webp" || mimeType==="webp") {
+ return "webp";
+} else if (mimeType === 'image/jpg' || mimeType === 'image/jpeg') {
+ return 'jpg';
+} else if (mimeType === 'image/png' || mimeType === 'Image/png') {
+ return 'png';
+} else if (mimeType === 'image/gif') {
+ return 'gif';
+} else if (mimeType === 'image/svg+xml') {
+ return 'svg';
+} else if (mimeType === 'image/x-icon' || mimeType === 'image/vnd.microsoft.icon') {
+ return 'ico';
+} else if (mimeType === 'image/bmp') {
+ return 'bmp';
+} else {
+ return 'other/unknown';
+}
+''';
+
+SELECT
+ client,
+ trueFormat,
+ COUNT(DISTINCT NET.HOST(url)) AS hosts,
+ COUNT(DISTINCT page) AS pages,
+ COUNT(0) AS freqImages,
+ SUM(COUNT(0)) OVER (PARTITION BY client) AS totalImages,
+ COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pctImages
+FROM (
+ SELECT
+ client,
+ page,
+ url,
+ fixMimeType(JSON_VALUE(payload, '$.response.content.mimeType')) AS trueFormat
+ FROM
+ `httparchive.all.requests`
+ WHERE
+ date = '2024-06-01' AND
+ type = 'image' AND
+ JSON_VALUE(summary, '$.respBodySize') IS NOT NULL)
+GROUP BY
+ client,
+ trueFormat
+ORDER BY
+ pctImages DESC
diff --git a/sql/2024/media/picture_distribution.sql b/sql/2024/media/picture_distribution.sql
new file mode 100644
index 00000000000..d4ecea5f8c4
--- /dev/null
+++ b/sql/2024/media/picture_distribution.sql
@@ -0,0 +1,24 @@
+#standardSQL
+# Number of picture elements per page
+# picture_distribution.sql
+
+SELECT
+ percentile,
+ client,
+ COUNTIF(picture > 0) AS pages,
+ COUNT(0) AS total,
+ COUNTIF(picture > 0) / COUNT(0) AS pct,
+ APPROX_QUANTILES(IF(picture > 0, picture, NULL), 1000)[OFFSET(percentile * 10)] AS picture_elements_per_page
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ CAST(JSON_QUERY(JSON_VALUE(payload, '$._media'), '$.num_picture_img') AS INT64) AS picture
+ FROM
+ `httparchive.pages.2024_06_01_*`),
+ UNNEST([10, 25, 50, 75, 90, 100]) AS percentile
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/picture_format_distribution.sql b/sql/2024/media/picture_format_distribution.sql
new file mode 100644
index 00000000000..099a96fbac2
--- /dev/null
+++ b/sql/2024/media/picture_format_distribution.sql
@@ -0,0 +1,59 @@
+#standardSQL
+# picture formats distribution
+# picture_format_distribution.sql
+
+# returns all the data we need from _media
+CREATE TEMPORARY FUNCTION get_media_info(media_string STRING)
+RETURNS STRUCT<
+ num_picture_img INT64,
+ num_picture_formats INT64,
+ picture_formats ARRAY
+> LANGUAGE js AS '''
+var result = {};
+try {
+ var media = JSON.parse(media_string);
+
+ if (Array.isArray(media) || typeof media != 'object') return result;
+
+ // fix "picture_formats":"[]"
+ if (!Array.isArray(media.picture_formats))
+ {
+ media.picture_formats = JSON.parse(media.picture_formats);
+ }
+
+ // skip "picture_formats":[{}]
+ if (media.picture_formats.length == 1 && Object.keys(media.picture_formats[0]).length === 0)
+ {
+ media.picture_formats = [];
+ }
+
+ result.picture_formats = media.picture_formats;
+ result.num_picture_img = media.num_picture_img;
+ result.num_picture_formats = result.picture_formats.length;
+
+} catch (e) {}
+return result;
+''';
+
+SELECT
+ client,
+ SAFE_DIVIDE(COUNTIF(media_info.num_picture_formats > 0), COUNTIF(media_info.num_picture_img > 0)) AS pages_with_picture_formats_pct,
+ SAFE_DIVIDE(COUNTIF(media_info.num_picture_formats = 1), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_picture_formats_1_pct,
+ SAFE_DIVIDE(COUNTIF(media_info.num_picture_formats = 2), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_picture_formats_2_pct,
+ SAFE_DIVIDE(COUNTIF(media_info.num_picture_formats = 3), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_picture_formats_3_pct,
+ SAFE_DIVIDE(COUNTIF(media_info.num_picture_formats >= 4), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_picture_formats_4_and_more_pct,
+ SAFE_DIVIDE(COUNTIF('image/webp' IN UNNEST(media_info.picture_formats)), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_webp_pct,
+ SAFE_DIVIDE(COUNTIF('image/gif' IN UNNEST(media_info.picture_formats)), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_gif_pct,
+ SAFE_DIVIDE(COUNTIF('image/jpg' IN UNNEST(media_info.picture_formats)), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_jpg_pct,
+ SAFE_DIVIDE(COUNTIF('image/png' IN UNNEST(media_info.picture_formats)), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_png_pct,
+ SAFE_DIVIDE(COUNTIF('image/avif' IN UNNEST(media_info.picture_formats)), COUNTIF(media_info.num_picture_formats > 0)) AS pages_with_avif_pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ get_media_info(JSON_EXTRACT_SCALAR(payload, '$._media')) AS media_info
+ FROM
+ `httparchive.pages.2024_06_01_*`)
+GROUP BY
+ client
+ORDER BY
+ client
diff --git a/sql/2024/media/picture_orientation.sql b/sql/2024/media/picture_orientation.sql
new file mode 100644
index 00000000000..019a3e0988d
--- /dev/null
+++ b/sql/2024/media/picture_orientation.sql
@@ -0,0 +1,40 @@
+#standardSQL
+# picture using orientation
+# picture_orientation.sql
+
+CREATE TEMPORARY FUNCTION get_media_info(media_string STRING)
+RETURNS STRUCT<
+ num_picture_img INT64,
+ num_picture_using_orientation INT64
+> LANGUAGE js AS '''
+var result = {};
+try {
+ var media = JSON.parse(media_string);
+
+ if (Array.isArray(media) || typeof media != 'object') return result;
+
+ result.num_picture_img = media.num_picture_img;
+ result.num_picture_using_orientation = media.num_picture_using_orientation;
+
+} catch (e) {}
+return result;
+''';
+
+SELECT
+ client,
+ COUNTIF(media_info.num_picture_using_orientation > 0) AS picture_orientation_pages,
+ COUNTIF(media_info.num_picture_img > 0) AS total_picture_pages,
+ SAFE_DIVIDE(COUNTIF(media_info.num_picture_using_orientation > 0), COUNTIF(media_info.num_picture_img > 0)) AS pct_picture_orientation_pages,
+ SUM(media_info.num_picture_using_orientation) AS picture_orientation_images,
+ SUM(media_info.num_picture_img) AS total_picture_images,
+ SAFE_DIVIDE(SUM(media_info.num_picture_using_orientation), SUM(media_info.num_picture_img)) AS pct_picture_orientation_images
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ get_media_info(JSON_EXTRACT_SCALAR(payload, '$._media')) AS media_info
+ FROM
+ `httparchive.pages.2024_06_01_*`)
+GROUP BY
+ client
+ORDER BY
+ client
diff --git a/sql/2024/media/picture_switching.sql b/sql/2024/media/picture_switching.sql
new file mode 100644
index 00000000000..5ef4d108c74
--- /dev/null
+++ b/sql/2024/media/picture_switching.sql
@@ -0,0 +1,33 @@
+#standardSQL
+# How many picture elements are doing media and type switching?
+# picture_switching.sql
+
+CREATE TEMPORARY FUNCTION getPictureSwitching(payload STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+try {
+ var $ = JSON.parse(payload);
+ var responsiveImages = JSON.parse($._responsive_images);
+ responsiveImages = responsiveImages['responsive-images'];
+
+ return responsiveImages.filter(img => img.isInPicture).map(({pictureMediaSwitching, pictureTypeSwitching}) => ({
+ pictureMediaSwitching,
+ pictureTypeSwitching
+ }));
+} catch (e) {
+ return [];
+}
+''';
+
+SELECT
+ _TABLE_SUFFIX AS client,
+ COUNTIF(image.pictureMediaSwitching) AS picture_media_switching,
+ COUNTIF(image.pictureTypeSwitching) AS picture_type_switching,
+ COUNT(0) AS total_picture,
+ COUNTIF(image.pictureMediaSwitching) / COUNT(0) AS pct_picture_media_switching,
+ COUNTIF(image.pictureTypeSwitching) / COUNT(0) AS pct_picture_type_switching
+FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getPictureSwitching(payload)) AS image
+GROUP BY
+ client
diff --git a/sql/2024/media/picture_using_min_resolution.sql b/sql/2024/media/picture_using_min_resolution.sql
new file mode 100644
index 00000000000..12d490fd5ef
--- /dev/null
+++ b/sql/2024/media/picture_using_min_resolution.sql
@@ -0,0 +1,41 @@
+#standardSQL
+# picture using min resolution
+# picture_using_min_resolution.sql
+
+# returns all the data we need from _media
+CREATE TEMPORARY FUNCTION get_media_info(media_string STRING)
+RETURNS STRUCT<
+ num_picture_using_min_resolution INT64,
+ num_picture_img INT64
+> LANGUAGE js AS '''
+var result = {};
+try {
+ var media = JSON.parse(media_string);
+
+ if (Array.isArray(media) || typeof media != 'object') return result;
+
+ result.num_picture_using_min_resolution = media.num_picture_using_min_resolution;
+ result.num_picture_img = media.num_picture_img;
+
+} catch (e) {}
+return result;
+''';
+
+SELECT
+ client,
+ COUNTIF(media_info.num_picture_using_min_resolution > 0) AS picture_min_resolution_pages,
+ COUNTIF(media_info.num_picture_img > 0) AS total_picture_pages,
+ SAFE_DIVIDE(COUNTIF(media_info.num_picture_using_min_resolution > 0), COUNTIF(media_info.num_picture_img > 0)) AS pct_picture_min_resolution_pages,
+ SUM(media_info.num_picture_using_min_resolution) AS picture_min_resolution_images,
+ SUM(media_info.num_picture_img) AS total_picture_images,
+ SAFE_DIVIDE(SUM(media_info.num_picture_using_min_resolution), SUM(media_info.num_picture_img)) AS pct_picture_min_resolution_images
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ get_media_info(JSON_EXTRACT_SCALAR(payload, '$._media')) AS media_info
+ FROM
+ `httparchive.pages.2024_06_01_*`)
+GROUP BY
+ client
+ORDER BY
+ client
diff --git a/sql/2024/media/pixel_volume.sql b/sql/2024/media/pixel_volume.sql
new file mode 100644
index 00000000000..bde07985d8e
--- /dev/null
+++ b/sql/2024/media/pixel_volume.sql
@@ -0,0 +1,59 @@
+#standardSQL
+# pixel areas
+# pixel_volume.sql
+
+CREATE TEMPORARY FUNCTION getCssPixels(payload STRING)
+RETURNS INT64 LANGUAGE js AS '''
+try {
+ let data = JSON.parse(payload);
+ return data.reduce((a, c) => a + (c.width||0)*(c.height||0), 0) || 0;
+}
+catch (e) {}
+return null;
+''';
+
+CREATE TEMPORARY FUNCTION getNaturalPixels(payload STRING)
+RETURNS INT64 LANGUAGE js AS '''
+try {
+ let data = JSON.parse(payload);
+ return data.reduce((a, c) => a + (c.naturalWidth||0)*(c.naturalHeight||0), 0) || 0;
+}
+catch (e) {}
+return null;
+''';
+
+SELECT
+ percentile,
+ client,
+ ANY_VALUE(viewport_height) AS viewport_height,
+ ANY_VALUE(viewport_width) AS viewport_width,
+ ANY_VALUE(dpr) AS dpr,
+ ANY_VALUE(viewport_height) * ANY_VALUE(viewport_width) AS display_px,
+ APPROX_QUANTILES(css_pixels, 1000)[OFFSET(percentile * 10)] AS css_pixels,
+ APPROX_QUANTILES(natural_pixels, 1000)[OFFSET(percentile * 10)] AS natural_pixels,
+ APPROX_QUANTILES(natural_pixels, 1000)[OFFSET(percentile * 10)] / (ANY_VALUE(viewport_height) * ANY_VALUE(viewport_width)) AS pct
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ url AS page,
+ getCssPixels(JSON_EXTRACT_SCALAR(payload, '$._Images')) AS css_pixels,
+ getNaturalPixels(JSON_EXTRACT_SCALAR(payload, '$._Images')) AS natural_pixels,
+ CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._Dpi'), '$.dppx') AS FLOAT64) AS dpr,
+ CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._Resolution'), '$.absolute.height') AS FLOAT64) AS viewport_height,
+ CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._Resolution'), '$.absolute.width') AS FLOAT64) AS viewport_width
+ FROM
+ `httparchive.pages.2024_06_01_*`),
+ UNNEST([10, 25, 50, 75, 90]) AS percentile
+WHERE
+ # it appears the _Images array is populated only from tag requests and not CSS or favicon
+ # likewise the bigImageCount and smallImageCount only track images > 100,000 and < 10,000 respectively.
+ # Meaning images between 10KB and 100KB won't show up in the count
+ # https://github.com/WPO-Foundation/webpagetest/blob/master/www/breakdown.inc#L95
+ css_pixels > 0 AND
+ natural_pixels > 0
+GROUP BY
+ percentile,
+ client
+ORDER BY
+ percentile,
+ client
diff --git a/sql/2024/media/portrait_landscape_square.sql b/sql/2024/media/portrait_landscape_square.sql
new file mode 100644
index 00000000000..344fb59ce90
--- /dev/null
+++ b/sql/2024/media/portrait_landscape_square.sql
@@ -0,0 +1,64 @@
+#standardSQL
+# What are the aspect ratios of the web's images?
+# portrait_landscape_square.sql
+
+CREATE TEMPORARY FUNCTION getAspectRatioInfo(responsiveImagesJsonString STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+ const parsed = JSON.parse( responsiveImagesJsonString );
+ if ( parsed && parsed.map ) {
+ return parsed.map( d => {
+ const aspectRatio = ( d.approximateResourceWidth > 0 && d.approximateResourceHeight > 0 ?
+ Math.round( ( d.approximateResourceWidth / d.approximateResourceHeight ) * 1000 ) / 1000 : -1 );
+ return {
+ imgURL: d.url,
+ approximateResourceWidth: Math.floor( d.approximateResourceWidth || 0 ),
+ approximateResourceHeight: Math.floor( d.approximateResourceHeight || 0 ),
+ aspectRatio: aspectRatio,
+ isPortrait: aspectRatio < 1 && aspectRatio > 0,
+ isLandscape: aspectRatio > 1,
+ isSquare: aspectRatio == 1
+ }
+ });
+ }
+''';
+
+WITH imgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ url AS pageURL,
+ imgURL,
+ approximateResourceWidth,
+ approximateResourceHeight,
+ isPortrait,
+ isLandscape,
+ isSquare
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getAspectRatioInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')))
+ WHERE
+ approximateResourceWidth > 1 AND
+ approximateResourceHeight > 1
+),
+
+counts_per_client AS (
+ SELECT
+ client,
+ COUNTIF(isPortrait) AS portraits,
+ COUNTIF(isLandscape) AS landscapes,
+ COUNTIF(isSquare) AS squares,
+ COUNT(0) AS numberOfImagesPerClient
+ FROM
+ imgs
+ GROUP BY
+ client
+)
+
+SELECT
+ client,
+ SAFE_DIVIDE(portraits, numberOfImagesPerClient) AS percentPortrait,
+ SAFE_DIVIDE(landscapes, numberOfImagesPerClient) AS percentLandscape,
+ SAFE_DIVIDE(squares, numberOfImagesPerClient) AS percentSquare,
+ numberOfImagesPerClient
+FROM
+ counts_per_client
diff --git a/sql/2024/media/score_progressive_jpeg.sql b/sql/2024/media/score_progressive_jpeg.sql
new file mode 100644
index 00000000000..38f4a372a87
--- /dev/null
+++ b/sql/2024/media/score_progressive_jpeg.sql
@@ -0,0 +1,22 @@
+#standardSQL
+# percent of pages with score_progressive_jpeg
+# -1, 0 - 25, 25 - 50, 50 - 75, 75 - 100
+# score_progressive_jpeg.sql
+
+SELECT
+ client,
+ COUNTIF(score < 0) / COUNT(0) AS percent_negative,
+ COUNTIF(score >= 0 AND score < 25) / COUNT(0) AS percent_0_25,
+ COUNTIF(score >= 25 AND score < 50) / COUNT(0) AS percent_25_50,
+ COUNTIF(score >= 50 AND score < 75) / COUNT(0) AS percent_50_75,
+ COUNTIF(score >= 75 AND score <= 100) / COUNT(0) AS percent_75_100
+FROM (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ CAST(JSON_EXTRACT(payload, '$._score_progressive_jpeg') AS INT64) AS score
+ FROM
+ `httparchive.pages.2024_06_01_*`)
+GROUP BY
+ client
+ORDER BY
+ client
diff --git a/sql/2024/media/srcset_density_coverage.sql b/sql/2024/media/srcset_density_coverage.sql
new file mode 100644
index 00000000000..4c41468dd17
--- /dev/null
+++ b/sql/2024/media/srcset_density_coverage.sql
@@ -0,0 +1,76 @@
+#standardSQL
+# images srcset candidates' densities: do they cover useful ranges?
+# srcset_density_coverage.sql
+
+CREATE TEMPORARY FUNCTION getSrcsetInfo(responsiveImagesJsonString STRING)
+RETURNS ARRAY,
+ numberOfSrcsetCandidates INT64,
+ minDensity FLOAT64,
+ maxDensity FLOAT64>>
+LANGUAGE js AS '''
+ const parsed = JSON.parse( responsiveImagesJsonString );
+ if ( parsed && parsed.map ) {
+ return parsed.map( d => {
+ const result = {
+ hasSrcset: d.hasSrcset,
+ srcsetHasXDescriptors: d.srcsetHasXDescriptors,
+ srcsetHasWDescriptors: d.srcsetHasXDescriptors,
+ srcsetCandidateDensities: [],
+ numberOfSrcsetCandidates: 0,
+ minDensity: d.currentSrcDensity,
+ maxDensity: d.currentSrcDensity
+ };
+ if ( d.srcsetCandidateDensities && d.srcsetCandidateDensities.map ) {
+ const densities = d.srcsetCandidateDensities.map( n => parseFloat( n ) );
+ result.srcsetCandidateDensities = densities;
+ result.numberOfSrcsetCandidates = densities.length;
+ result.minDensity = Math.min( ...densities );
+ result.maxDensity = Math.max( ...densities );
+ }
+ return result;
+ });
+ }
+''';
+
+WITH imgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ hasSrcset,
+ srcsetCandidateDensities,
+ minDensity,
+ maxDensity
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getSrcsetInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')))
+ WHERE
+ srcsetHasXDescriptors = TRUE OR srcsetHasWDescriptors = TRUE
+),
+
+counts AS (
+ SELECT
+ client,
+ COUNT(0) AS number_of_imgs_with_srcset,
+ COUNTIF(minDensity <= 1 AND maxDensity >= 1.5) AS number_of_srcsets_covering_1x_to_1p5x,
+ COUNTIF(minDensity <= 1 AND maxDensity >= 2) AS number_of_srcsets_covering_1x_to_2x,
+ COUNTIF(minDensity <= 1 AND maxDensity >= 2.5) AS number_of_srcsets_covering_1x_to_2p5x,
+ COUNTIF(minDensity <= 1 AND maxDensity >= 3) AS number_of_srcsets_covering_1x_to_3x
+ FROM imgs
+ GROUP BY client
+)
+
+SELECT
+ client,
+ number_of_imgs_with_srcset,
+ number_of_srcsets_covering_1x_to_1p5x,
+ number_of_srcsets_covering_1x_to_2x,
+ number_of_srcsets_covering_1x_to_2p5x,
+ number_of_srcsets_covering_1x_to_3x,
+ number_of_srcsets_covering_1x_to_1p5x / number_of_imgs_with_srcset AS pct_of_srcsets_covering_1x_to_1p5x,
+ number_of_srcsets_covering_1x_to_2x / number_of_imgs_with_srcset AS pct_of_srcsets_covering_1x_to_2x,
+ number_of_srcsets_covering_1x_to_2p5x / number_of_imgs_with_srcset AS pct_of_srcsets_covering_1x_to_2p5x,
+ number_of_srcsets_covering_1x_to_3x / number_of_imgs_with_srcset AS pct_of_srcsets_covering_1x_to_3x
+FROM counts
diff --git a/sql/2024/media/top_aspect_ratios.sql b/sql/2024/media/top_aspect_ratios.sql
new file mode 100644
index 00000000000..4a9d63a53d3
--- /dev/null
+++ b/sql/2024/media/top_aspect_ratios.sql
@@ -0,0 +1,71 @@
+#standardSQL
+# top particular aspect ratios
+# top_aspect_ratios.sql
+
+CREATE TEMPORARY FUNCTION getSrcsetInfo(responsiveImagesJsonString STRING)
+RETURNS ARRAY>
+LANGUAGE js AS '''
+ const parsed = JSON.parse( responsiveImagesJsonString );
+ if ( parsed && parsed.map ) {
+ return parsed.map( d => ({
+ imgURL: d.url,
+ approximateResourceWidth: Math.floor( d.approximateResourceWidth || 0 ),
+ approximateResourceHeight: Math.floor( d.approximateResourceHeight || 0 ),
+ aspectRatio: ( d.approximateResourceWidth > 0 && d.approximateResourceHeight > 0 ?
+ Math.round( ( d.approximateResourceWidth / d.approximateResourceHeight ) * 1000 ) / 1000 :
+ -1 )
+ }) );
+ }
+''';
+
+WITH imgs AS (
+ SELECT
+ _TABLE_SUFFIX AS client,
+ url AS pageURL,
+ imgURL,
+ approximateResourceWidth,
+ approximateResourceHeight,
+ aspectRatio
+ FROM
+ `httparchive.pages.2024_06_01_*`,
+ UNNEST(getSrcsetInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images')))
+ WHERE
+ approximateResourceWidth > 1 AND
+ approximateResourceHeight > 1
+),
+
+counts_per_client AS (
+ SELECT
+ client,
+ COUNT(0) AS numberOfImagesPerClient
+ FROM
+ imgs
+ GROUP BY
+ client
+),
+
+counts_per_client_and_aspect_ratio AS (
+ SELECT
+ client,
+ aspectRatio,
+ COUNT(0) AS numberOfImagesPerClientAndAspectRatio
+ FROM
+ imgs
+ GROUP BY
+ client,
+ aspectRatio
+)
+
+SELECT
+ client,
+ aspectRatio,
+ numberOfImagesPerClientAndAspectRatio,
+ SAFE_DIVIDE(numberOfImagesPerClientAndAspectRatio, numberOfImagesPerClient) AS percentOfImages
+FROM
+ counts_per_client_and_aspect_ratio
+LEFT JOIN
+ counts_per_client
+USING
+ (client)
+ORDER BY
+ percentOfImages DESC
diff --git a/sql/2024/media/video_adoption.sql b/sql/2024/media/video_adoption.sql
new file mode 100644
index 00000000000..1cac4f6e5c4
--- /dev/null
+++ b/sql/2024/media/video_adoption.sql
@@ -0,0 +1,20 @@
+#standardSQL
+# How many pages use