diff --git a/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py b/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py index 5c19b593..11251491 100644 --- a/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py +++ b/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py @@ -4,30 +4,35 @@ from datetime import datetime, timedelta, timezone from enum import Enum -from pathlib import Path -from typing import List, Optional, Any, Dict, Union, TypedDict, Tuple +from typing import List, Optional, Any, Dict, Union import gzip import json import logging import math import time +import string import ujson import urllib3 -from fastapi import APIRouter, Depends, Query, HTTPException, Header, Response, Request +from fastapi import ( + APIRouter, + Depends, + Query, + HTTPException, + Header, + Response, + Request, + status, +) from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse from typing_extensions import Annotated -from pydantic import Field +from pydantic import Field, field_serializer, field_validator -import sqlalchemy as sa -from sqlalchemy import tuple_, Row, sql -from sqlalchemy.orm import Session +from sqlalchemy import sql from sqlalchemy.sql.expression import and_, text, select, column -from sqlalchemy.sql.expression import text as sql_text -from sqlalchemy.sql.expression import table as sql_table from sqlalchemy.exc import OperationalError from psycopg2.extensions import QueryCanceledError @@ -276,7 +281,7 @@ class MeasurementMeta(BaseModel): report_id: Optional[str] = None test_name: Optional[str] = None test_start_time: Optional[datetime] = None - probe_asn: Optional[str] = None + probe_asn: Optional[str | int] = None probe_cc: Optional[str] = None scores: Optional[str] = None category_code: Optional[str] = None @@ -286,6 +291,10 @@ class MeasurementMeta(BaseModel): raw_measurement: Optional[str] = None category_code: Optional[str] = None + @field_serializer("measurement_start_time", "test_start_time") + def format_ts(self, v: datetime) -> str: + return v.strftime("%Y-%m-%dT%H:%M:%SZ") + def format_msmt_meta(msmt_meta: dict) -> MeasurementMeta: formatted_msmt_meta = MeasurementMeta( @@ -430,75 +439,97 @@ class MeasurementBase(BaseModel): test_name: Optional[str] = Field(default=None, title="test name of the measurement") +class GetMeasurementMetaRequest(BaseModel): + measurement_uid: Optional[str] = Field( + None, + description="The measurement ID, mutually exclusive with report_id + input", + min_length=3, + ) + report_id: Optional[str] = Field( + None, + description=( + "The report_id to search measurements for example: " + "20210208T162755Z_ndt_DZ_36947_n1_8swgXi7xNuRUyO9a" + ), + min_length=3, + ) + input: Optional[str] = Field( + None, + description="The input (for example a URL or IP address) to search measurements for", + min_length=3, + ) + full: bool = Field( + False, + description="Include JSON measurement data", + ) + + @field_validator("report_id") + def report_id_validator(cls, report_id: str) -> str: + if report_id: + return validate_report_id(report_id) + + return report_id + +def validate_report_id(report_id: str) -> str: + if len(report_id) < 15 or len(report_id) > 100: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Invalid report_id field", + ) + + is_in_charset( + report_id, string.ascii_letters + string.digits + "_", "Invalid report_id field" + ) + + return report_id + @router.get("/v1/measurement_meta", response_model_exclude_unset=True) async def get_measurement_meta( response: Response, - measurement_uid: Annotated[ - Optional[str], - Query( - description="The measurement ID, mutually exclusive with report_id + input", - min_length=3, - ), - ] = None, - report_id: Annotated[ - Optional[str], - Query( - description=( - "The report_id to search measurements for example: " - "20210208T162755Z_ndt_DZ_36947_n1_8swgXi7xNuRUyO9a" - ), - min_length=3, - ), - ] = None, - input: Annotated[ - Optional[str], - Query( - description="The input (for example a URL or IP address) to search measurements for", - min_length=3, - ), - ] = None, - full: Annotated[bool, Query(description="Include JSON measurement data")] = False, + request: GetMeasurementMetaRequest = Depends(), settings=Depends(get_settings), db=Depends(get_clickhouse_session), -) -> MeasurementMeta: +) -> MeasurementMeta | Dict[str, Any]: """ Get metadata on one measurement by measurement_uid or report_id + input """ - if measurement_uid: - log.info(f"get_measurement_meta {measurement_uid}") - msmt_meta = _get_measurement_meta_by_uid(db, measurement_uid) - elif report_id: - log.info(f"get_measurement_meta {report_id} {input}") - msmt_meta = _get_measurement_meta_clickhouse(db, report_id, input) + if request.measurement_uid: + log.info(f"get_measurement_meta {request.measurement_uid}") + msmt_meta = _get_measurement_meta_by_uid(db, request.measurement_uid) + elif request.report_id: + log.info(f"get_measurement_meta {request.report_id} {input}") + msmt_meta = _get_measurement_meta_clickhouse( + db, request.report_id, request.input + ) else: raise HTTPException( - status_code=400, - detail="Either report_id or measurement_uid must be provided", + status_code=status.HTTP_400_BAD_REQUEST, + detail="Missing measurement_uid or report_id. You should provide at the least one" ) + if msmt_meta.probe_asn is not None and isinstance(msmt_meta.probe_asn, str): + # Emulates old monolith behaviour of returning int as probe_asn + msmt_meta.probe_asn = asn_to_int(msmt_meta.probe_asn) + setcacheresponse("1m", response) - body = "" - if not full: # return without raw_measurement + if not request.full: return msmt_meta - if msmt_meta == {}: # measurement not found - return MeasurementMeta(raw_measurement=body) + if msmt_meta == MeasurementMeta(): # measurement not found + return {"raw_measurement": ""} try: - assert isinstance(msmt_meta.report_id, str) and isinstance( - msmt_meta.measurement_uid, str - ) - body = _fetch_measurement_body( + # TODO: uid_cleanup + assert msmt_meta.report_id is not None + msmt_meta.raw_measurement = _fetch_measurement_body( db, settings, msmt_meta.report_id, msmt_meta.measurement_uid ) - assert isinstance(body, bytes) - body = body.decode() except Exception as e: log.error(e, exc_info=True) + msmt_meta.raw_measurement = "" - msmt_meta.raw_measurement = body return msmt_meta @@ -529,6 +560,7 @@ def genurl(base_url: str, path: str, **kw) -> str: """Generate absolute URL for the API""" return urljoin(base_url, path) + "?" + urlencode(kw) + class OrderBy(str, Enum): measurement_start_time = "measurement_start_time" input = "input" @@ -536,6 +568,7 @@ class OrderBy(str, Enum): probe_asn = "probe_asn" test_name = "test_name" + @router.get("/v1/measurements") async def list_measurements( request: Request, @@ -571,13 +604,13 @@ async def list_measurements( Query(description="Category code from the citizenlab list"), ] = None, since: Annotated[ - Optional[str], + Optional[datetime], Query( description='Start date of when measurements were run (ex. "2016-10-20T10:30:00")' ), ] = None, until: Annotated[ - Optional[str], + Optional[datetime], Query( description='End date of when measurement were run (ex. "2016-10-20T10:30:00")' ), @@ -625,7 +658,9 @@ async def list_measurements( Optional[str], Query(description="Filter measurements by OONIRun ID.") ] = None, order_by: Annotated[ - Optional[OrderBy], # Use an actual enum to enforce validation of ordering fields + Optional[ + OrderBy + ], # Use an actual enum to enforce validation of ordering fields Query( description="By which key the results should be ordered by (default: `null`)", ), @@ -641,7 +676,10 @@ async def list_measurements( int, Query(description="Offset into the result set (default: 0)") ] = 0, limit: Annotated[ - int, Query(description="Number of records to return (default: 100)", ge=0, le=1_000_000) + int, + Query( + description="Number of records to return (default: 100)", ge=0, le=1_000_000 + ), ] = 100, user_agent: Annotated[str | None, Header()] = None, db=Depends(get_clickhouse_session), @@ -681,30 +719,12 @@ async def list_measurements( ) ### Prepare query parameters + if until is None and report_id is None: + t = datetime.now(timezone.utc) + timedelta(days=1) + until = datetime(t.year, t.month, t.day) - until_dt = None - if until is not None: - until_dt = datetime.strptime(until, "%Y-%m-%d") - - # Set reasonable since/until ranges if not specified. - try: - if until is None: - if report_id is None: - t = datetime.now(timezone.utc) + timedelta(days=1) - until_dt = datetime(t.year, t.month, t.day) - except ValueError: - raise HTTPException(status_code=400, detail="Invalid until parameter") - - since_dt = None - if since is not None: - since_dt = datetime.strptime(since, "%Y-%m-%d") - - try: - if since_dt is None: - if report_id is None and until_dt is not None: - since_dt = until_dt - timedelta(days=30) - except ValueError: - raise HTTPException(status_code=400, detail="Invalid since parameter") + if since is None and report_id is None and until is not None: + since = until - timedelta(days=30) if order.lower() not in ("asc", "desc"): raise HTTPException(status_code=400, detail="Invalid order parameter") @@ -994,3 +1014,15 @@ async def get_torsf_stats( def get_bucket_url(bucket_name: str) -> str: return f"https://{bucket_name}.s3.amazonaws.com/" + + +def asn_to_int(asn_str: str) -> int: + return int(asn_str.strip("AS")) + +def is_in_charset(s: str, charset: str, error_msg: str): + """Ensure `s` contains only valid characters listed in `charset`""" + for c in s: + if c not in charset: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=error_msg + ) diff --git a/ooniapi/services/oonimeasurements/tests/conftest.py b/ooniapi/services/oonimeasurements/tests/conftest.py index 574c387b..b85c4371 100644 --- a/ooniapi/services/oonimeasurements/tests/conftest.py +++ b/ooniapi/services/oonimeasurements/tests/conftest.py @@ -18,13 +18,15 @@ def get_file_path(file_path: str): return Path(__file__).parent / file_path + @pytest.fixture(scope="session") def maybe_download_fixtures(): base_url = "https://ooni-data-eu-fra.s3.eu-central-1.amazonaws.com/" filenames = [ "samples/analysis_web_measurement-sample.sql.gz", "samples/obs_web-sample.sql.gz", - "raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz" + "raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz", + "raw/20210709/00/MY/webconnectivity/2021070900_MY_webconnectivity.n0.2.jsonl.gz", ] for fn in filenames: dst_path = get_file_path(f"fixtures/{fn}") diff --git a/ooniapi/services/oonimeasurements/tests/migrations/2_clickhouse_populate_fastpath.sql b/ooniapi/services/oonimeasurements/tests/migrations/2_clickhouse_populate_fastpath.sql index b86ef534..fb649ef8 100644 --- a/ooniapi/services/oonimeasurements/tests/migrations/2_clickhouse_populate_fastpath.sql +++ b/ooniapi/services/oonimeasurements/tests/migrations/2_clickhouse_populate_fastpath.sql @@ -1075,4 +1075,6 @@ INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_nam INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20210708064435.632040_MM_whatsapp_b3fe1a6061d720e4','20210708T064431Z_whatsapp_MM_133384_n1_yhkQtlaBRuoDnD9B',NULL,'MM',133384,'whatsapp','2021-07-08 06:44:31','2021-07-08 06:44:31',NULL,'{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"registration_server_accessible":true,"whatsapp_web_accessible":true,"whatsapp_endpoints_accessible":true}}','windows','f','f','f',NULL,'ooniprobe-desktop-unattended','3.8.0',NULL,0,0,0,0,NULL,0,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL); INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20210708064412.083471_AM_whatsapp_8db72fe2cc57adee','20210708T064408Z_whatsapp_AM_49800_n1_MvhXsYKzDSooGPCy',NULL,'AM',49800,'whatsapp','2021-07-08 06:45:05','2021-07-08 06:45:05',NULL,'{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"registration_server_accessible":true,"whatsapp_web_accessible":true,"whatsapp_endpoints_accessible":true}}','unknown','f','f','f',NULL,'ooniprobe-cli','3.9.2',NULL,0,0,0,0,NULL,0,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL); INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20210708064537.694608_HR_whatsapp_cbee59e3f8d64e84','20210708T064536Z_whatsapp_HR_31012_n1_6bKkL89MKhYySdfK',NULL,'HR',31012,'whatsapp','2021-07-08 06:45:35','2021-07-08 06:45:35',NULL,'{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"registration_server_accessible":true,"whatsapp_web_accessible":true,"whatsapp_endpoints_accessible":true}}','android','f','f','f',NULL,'measurement_kit','0.8.1',NULL,0,0,0,0,NULL,0,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL); -INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20250709075147.833477_US_webconnectivity_8f0e0b49950f2592','20250709T074913Z_webconnectivity_US_10796_n1_XDgk16bsGyJbx6Jl','https://freenetproject.org/','US',10796,'web_connectivity','2025-07-09 07:49:13','2025-07-09 07:51:46','','{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0}','android','f','f','f','freenetproject.org','ooniprobe-android-unattended','5.1.0','',0,0,0,0,'',0,'','0.4.3','arm64','ooniprobe-engine','3.26.0',1.3149208,'','https://5.th.ooni.org','https',NULL) \ No newline at end of file +INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20250709075147.833477_US_webconnectivity_8f0e0b49950f2592','20250709T074913Z_webconnectivity_US_10796_n1_XDgk16bsGyJbx6Jl','https://freenetproject.org/','US',10796,'web_connectivity','2025-07-09 07:49:13','2025-07-09 07:51:46','','{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0}','android','f','f','f','freenetproject.org','ooniprobe-android-unattended','5.1.0','',0,0,0,0,'',0,'','0.4.3','arm64','ooniprobe-engine','3.26.0',1.3149208,'','https://5.th.ooni.org','https',NULL); +INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20250709074932.361231_US_webconnectivity_52be70d41f91a6cc', '20250709T074749Z_webconnectivity_US_10796_n1_oljUoi3ZVNHUzjdp', 'https://www.quora.com/', 'US', 10796, 'web_connectivity', '2025-07-09 07:47:49', '2025-07-09 07:49:30', '', '{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"fingerprints":[{"name":"cp.fp_x_redirect_just","scope":"fp","location_found":"body","confidence_no_fp":5,"expected_countries":[]}]}', 'android', 'f', 'f', 'f', 'www.quora.com', 'ooniprobe-android-unattended', '5.1.0', '', 0, 0, 0, 0, '', 0, '', '0.4.3', 'arm64', 'ooniprobe-engine', '3.26.0', 0.79127073, '', 'https://5.th.ooni.org', 'https', NULL); +INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20210709005529.664022_MY_webconnectivity_68e5bea1060d1874', '20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3', 'https://www.backtrack-linux.org/', 'MY', 4818, 'web_connectivity', '2025-07-09 00:43:40', '2025-07-09 00:55:13', '', '{"blocking_general":1.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"blocking_type":"http-failure"}}', 'android', 't', 'f', 'f', 'www.backtrack-linux.org', 'ooniprobe-android', '3.0.0', '', 0, 0, 0, 0, '', 0, '', '', '', '', '', 0, '', '', '', NULL) \ No newline at end of file diff --git a/ooniapi/services/oonimeasurements/tests/migrations/3_clickhouse_populate_jsonl.sql b/ooniapi/services/oonimeasurements/tests/migrations/3_clickhouse_populate_jsonl.sql index 3d26b33c..31bb01d4 100644 --- a/ooniapi/services/oonimeasurements/tests/migrations/3_clickhouse_populate_jsonl.sql +++ b/ooniapi/services/oonimeasurements/tests/migrations/3_clickhouse_populate_jsonl.sql @@ -1,2 +1,3 @@ INSERT INTO jsonl (report_id, input, s3path, linenum) VALUES ('20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3', 'https://www.backtrack-linux.org/', 'raw/20210709/00/MY/webconnectivity/2021070900_MY_webconnectivity.n0.2.jsonl.gz', 35); -INSERT INTO jsonl (report_id, input, s3path, linenum, measurement_uid) VALUES ('20250709T074913Z_webconnectivity_US_10796_n1_XDgk16bsGyJbx6Jl','https://freenetproject.org/','raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz',623,'20250709075147.833477_US_webconnectivity_8f0e0b49950f2592') \ No newline at end of file +INSERT INTO jsonl (report_id, input, s3path, linenum, measurement_uid) VALUES ('20250709T074913Z_webconnectivity_US_10796_n1_XDgk16bsGyJbx6Jl','https://freenetproject.org/','raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz',623,'20250709075147.833477_US_webconnectivity_8f0e0b49950f2592'); +INSERT INTO jsonl (report_id, input, s3path, linenum, measurement_uid) VALUES ('20250709T074749Z_webconnectivity_US_10796_n1_oljUoi3ZVNHUzjdp', 'https://www.quora.com/', 'raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz', 187, '20250709074932.361231_US_webconnectivity_52be70d41f91a6cc'); \ No newline at end of file diff --git a/ooniapi/services/oonimeasurements/tests/test_aggregation.py b/ooniapi/services/oonimeasurements/tests/test_aggregation.py index 58d1e2bf..e6d9189e 100644 --- a/ooniapi/services/oonimeasurements/tests/test_aggregation.py +++ b/ooniapi/services/oonimeasurements/tests/test_aggregation.py @@ -721,6 +721,7 @@ def test_aggregation_result_validation(client): probe_asn=1234, ) + def test_aggregation_probe_asn_result_wont_crash(client): """ Validates that the API is able to generate the response model when probe_asn is not None @@ -730,5 +731,5 @@ def test_aggregation_probe_asn_result_wont_crash(client): # should not crash r = api(client, url) - assert r['dimension_count'] == 2 - assert isinstance(r['result'][0]['probe_asn'], int) \ No newline at end of file + assert r["dimension_count"] == 2 + assert isinstance(r["result"][0]["probe_asn"], int) diff --git a/ooniapi/services/oonimeasurements/tests/test_measurements.py b/ooniapi/services/oonimeasurements/tests/test_measurements.py index 436f1f17..5d5e2039 100644 --- a/ooniapi/services/oonimeasurements/tests/test_measurements.py +++ b/ooniapi/services/oonimeasurements/tests/test_measurements.py @@ -1,5 +1,5 @@ import pytest -from datetime import datetime +from datetime import datetime, timezone, timedelta from clickhouse_driver import Client as Clickhouse from oonimeasurements.common.clickhouse_utils import query_click_one_row from oonimeasurements.routers.v1.measurements import format_msmt_meta @@ -10,14 +10,27 @@ route = "api/v1/measurements" +def fake_get_bucket_url(bucket_name): + return f"file://{THIS_DIR}/fixtures/" + + def normalize_probe_asn(probe_asn): if probe_asn.startswith("AS"): return probe_asn return f"AS{probe_asn}" +def get_time(row): + return datetime.strptime( + row["measurement_start_time"], "%Y-%m-%dT%H:%M:%S.%fZ" + ).replace(tzinfo=timezone.utc) + + +SINCE = datetime.strftime(datetime(2020, 1, 1), "%Y-%m-%dT%H:%M:%S.%fZ") + + def test_list_measurements(client): - response = client.get(route) + response = client.get(route, params={"since": SINCE}) json = response.json() assert isinstance(json["results"], list), json @@ -51,6 +64,7 @@ def test_list_measurements_with_one_value_to_filters( ): params = {} params[filter_param] = filter_value + params["since"] = SINCE response = client.get(route, params=params) @@ -68,9 +82,7 @@ def test_list_measurements_with_one_value_to_filters( def test_list_measurements_with_one_value_to_filters_not_present_in_the_result(client): domain = "cloudflare-dns.com" - params = { - "domain": domain, - } + params = {"domain": domain, "since": SINCE} response = client.get(route, params=params) @@ -94,6 +106,7 @@ def test_list_measurements_with_multiple_values_to_filters( ): params = {} params[filter_param] = filter_value + params["since"] = SINCE filter_value_list = filter_value.split(",") if filter_param == "probe_asn": filter_value_list = list(map(normalize_probe_asn, filter_value_list)) @@ -109,7 +122,7 @@ def test_list_measurements_with_multiple_values_to_filters( def test_list_measurements_with_multiple_values_to_filters_not_in_the_result(client): domainCollection = "cloudflare-dns.com, adblock.doh.mullvad.net, 1.1.1.1" - params = {"domain": domainCollection} + params = {"domain": domainCollection, "since": SINCE} response = client.get(route, params=params) @@ -151,10 +164,6 @@ def test_raw_measurement_args_optional(client, monkeypatch, maybe_download_fixtu """ Test that all arguments in raw_measurements are optional """ - - def fake_get_bucket_url(bucket_name): - return f"file://{THIS_DIR}/fixtures/" - monkeypatch.setattr(measurements, "get_bucket_url", fake_get_bucket_url) # Taken from fixtures @@ -178,10 +187,6 @@ def test_raw_measurement_returns_json(client, monkeypatch, maybe_download_fixtur """ Test that raw_measurements returns json instead of a string """ - - def fake_get_bucket_url(bucket_name): - return f"file://{THIS_DIR}/fixtures/" - monkeypatch.setattr(measurements, "get_bucket_url", fake_get_bucket_url) uid = "20250709075147.833477_US_webconnectivity_8f0e0b49950f2592" @@ -230,7 +235,8 @@ def test_measurements_desc_default(client): """ resp = client.get( - "/api/v1/measurements", params={"order_by": "measurement_start_time"} + "/api/v1/measurements", + params={"order_by": "measurement_start_time", "since": SINCE}, ) assert ( resp.status_code == 200 @@ -238,11 +244,166 @@ def test_measurements_desc_default(client): j = resp.json() assert len(j["results"]) > 1, "Not enough results" - def get_time(row): - return datetime.strptime(row["measurement_start_time"], "%Y-%m-%dT%H:%M:%S.%fZ") - d = get_time(j["results"][0]) for row in j["results"][1:]: next_d = get_time(row) assert next_d <= d, "Results should be in descending order" d = next_d + + +def test_msm_meta_probe_asn_int(client, monkeypatch): + """ + The monolith returns probe_asn as an int in /measurement_meta + This test ensures the same functionality + """ + monkeypatch.setattr(measurements, "get_bucket_url", fake_get_bucket_url) + + report_id = "20250709T074749Z_webconnectivity_US_10796_n1_oljUoi3ZVNHUzjdp" + input = "https://www.quora.com/" + resp = client.get( + "/api/v1/measurement_meta", + params={"report_id": report_id, "full": True, "input": input}, + ) + + assert resp.status_code == 200, resp.content + j = resp.json() + assert isinstance(j["probe_asn"], int), "probe_asn should be int" + +def test_no_report_id_msm_uid_400(client): + """ + measurement_meta should return 400 if neither report_id nor measurement_uid are provided + """ + resp = client.get("/api/v1/measurement_meta") + assert resp.status_code == 400 + +def test_fix_msm_date_parsing(client): + + # This query was raising an error parsing the date: + # /api/v1/measurements?probe_cc=SY&since=2025-09-29T00:00:00&until=2025-09-29T23:59:59&limit=2000&since_index=20250910T105502Z_tor_SY_29256_n1_C8NFgxmJpyaP5Bsd + resp = client.get( + "/api/v1/measurements", + params={ + "since": "2025-09-29T00:00:00", + "until": "2025-09-29T23:59:59", + "limit": "2000", + }, + ) + + assert resp.status_code == 200, resp.content + + +def test_get_measurement_meta_basic(client): + rid = "20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3" + inp = "https://www.backtrack-linux.org/" + response = client.get(f"/api/v1/measurement_meta?report_id={rid}&input={inp}") + assert response.status_code == 200, response.status_code + j = response.json() + assert j == { + "anomaly": True, + "confirmed": False, + "failure": False, + "input": inp, + "measurement_start_time": "2025-07-09T00:55:13Z", + "measurement_uid": "20210709005529.664022_MY_webconnectivity_68e5bea1060d1874", + "probe_asn": 4818, + "probe_cc": "MY", + "report_id": rid, + "scores": '{"blocking_general":1.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"blocking_type":"http-failure"}}', + "test_name": "web_connectivity", + "test_start_time": "2025-07-09T00:43:40Z", + "category_code": "", + } + + # You can also query by measurment uid + uid = "20210709005529.664022_MY_webconnectivity_68e5bea1060d1874" + response = client.get("/api/v1/measurement_meta", params={'measurement_uid' : uid}) + assert response.status_code == 200, response.status_code + + +def test_get_measurement_meta_invalid_rid(client): + response = client.get("/api/v1/measurement_meta?report_id=BOGUS") + assert b"Invalid report_id" in response.content + + +def test_get_measurement_meta_not_found(client): + url = "/api/v1/measurement_meta?report_id=20200712T100000Z_AS9999_BOGUSsYKWBS2S0hdzXf7rhUusKfYP5cQM9HwAdZRPmUfroVoCn" + resp = client.get(url) + # TODO: is this a bug? + assert resp.status_code == 200 + assert resp.json() == {} + + +def test_get_measurement_meta_input_none_from_fp(client): + rid = "20210709T000017Z_httpinvalidrequestline_CH_3303_n1_8mr2M3dzkoFmmjIU" + # input is None + response = client.get(f"/api/v1/measurement_meta?report_id={rid}") + assert response.status_code == 200, response.status_code + assert response.json() == { + "anomaly": False, + "category_code": None, + "confirmed": False, + "failure": False, + "input": "", + "measurement_start_time": "2021-07-09T00:00:18Z", + "measurement_uid": "20210709000024.440526_CH_httpinvalidrequestline_3937f817503ed4ea", + "probe_asn": 3303, + "probe_cc": "CH", + "report_id": rid, + "scores": '{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0}', + "test_name": "http_invalid_request_line", + "test_start_time": "2021-07-09T00:00:16Z", + } + + +def test_get_measurement_meta_full(client, monkeypatch): + monkeypatch.setattr(measurements, "get_bucket_url", fake_get_bucket_url) + + rid = "20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3" + inp = "https://www.backtrack-linux.org/" + response = client.get( + f"/api/v1/measurement_meta?report_id={rid}&input={inp}&full=True" + ) + assert response.status_code == 200, response.status_code + data = response.json() + raw_msm = data.pop("raw_measurement") + assert data == { + "anomaly": True, + "confirmed": False, + "failure": False, + "input": inp, + "measurement_uid": "20210709005529.664022_MY_webconnectivity_68e5bea1060d1874", + "measurement_start_time": "2025-07-09T00:55:13Z", + "probe_asn": 4818, + "probe_cc": "MY", + "scores": '{"blocking_general":1.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"blocking_type":"http-failure"}}', + "report_id": rid, + "test_name": "web_connectivity", + "test_start_time": "2025-07-09T00:43:40Z", + "category_code": "", + } + assert raw_msm + +def test_bad_report_id_wont_validate(client): + + resp = client.get("/api/v1/measurement_meta", params={ + "report_id" : "20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds#$%" # bad suffix + }) + assert resp.status_code == 422, resp.json() + +def test_no_measurements_before_30_days(client): + """ + The default filtering should not retrieve measurements older than 30 days since tomorrow + """ + + resp = client.get("/api/v1/measurements") # no since/until + assert resp.status_code, resp.status_code + json = resp.json() + min_date = datetime.now(timezone.utc) - timedelta(29) + for r in json["results"]: + date = get_time(r) + assert date >= min_date + + +def test_asn_to_int(): + assert measurements.asn_to_int("AS1234") == 1234 + assert measurements.asn_to_int("1234") == 1234