Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9782190
Working on test to reproduce bug
LDiazN Sep 30, 2025
a63886f
Remove useless date parsing; add test for broken query
LDiazN Sep 30, 2025
bde45c6
Fix bad comparision between model and {}
LDiazN Sep 30, 2025
0d0ea2a
Measurement body is already str, no need for translation
LDiazN Oct 1, 2025
ab117cd
Emulate monolith behavior of returning probe_asn as int
LDiazN Oct 1, 2025
c7ada7e
Add fixture to test measurement body fetch
LDiazN Oct 1, 2025
4fb1286
Port monolith measurement_meta endpoint function
LDiazN Oct 1, 2025
fffbf9f
Port basic usage test for measurements meta
LDiazN Oct 1, 2025
5046c0f
Add test from old version
LDiazN Oct 2, 2025
5755a63
Add test to validate report id
LDiazN Oct 2, 2025
447a925
Black reformat
LDiazN Oct 2, 2025
e1b1fc2
Port more tests from the monolith
LDiazN Oct 2, 2025
f41f65e
black reformat
LDiazN Oct 2, 2025
4f7031e
Remove unused imports
LDiazN Oct 2, 2025
864b570
Add since parameter and fix bad date filtering
LDiazN Oct 2, 2025
c8b50cf
Add test to check that default filters don't retrieve anything older …
LDiazN Oct 2, 2025
5c13772
Fix datetime parsing
LDiazN Oct 2, 2025
64d51a2
Black reformat
LDiazN Oct 2, 2025
d5ea53b
trigger ci
LDiazN Oct 2, 2025
b63a188
trigger ci
LDiazN Oct 2, 2025
3785837
Add test for missing report_id or measurement_uid
LDiazN Oct 15, 2025
fb8c66a
move validate function closer to where it's needed
LDiazN Oct 15, 2025
f1f6084
Add test for bad report_id detection
LDiazN Oct 15, 2025
189bc06
Extend measurement_meta test to include query by uid
LDiazN Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,35 @@

from datetime import datetime, timedelta, timezone
from enum import Enum
from pathlib import Path
from typing import List, Optional, Any, Dict, Union, TypedDict, Tuple
from typing import List, Optional, Any, Dict, Union
import gzip
import json
import logging
import math
import time
import string

import ujson
import urllib3

from fastapi import APIRouter, Depends, Query, HTTPException, Header, Response, Request
from fastapi import (
APIRouter,
Depends,
Query,
HTTPException,
Header,
Response,
Request,
status,
)
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from typing_extensions import Annotated

from pydantic import Field
from pydantic import Field, field_serializer, field_validator

import sqlalchemy as sa
from sqlalchemy import tuple_, Row, sql
from sqlalchemy.orm import Session
from sqlalchemy import sql
from sqlalchemy.sql.expression import and_, text, select, column
from sqlalchemy.sql.expression import text as sql_text
from sqlalchemy.sql.expression import table as sql_table
from sqlalchemy.exc import OperationalError
from psycopg2.extensions import QueryCanceledError

Expand Down Expand Up @@ -276,7 +281,7 @@ class MeasurementMeta(BaseModel):
report_id: Optional[str] = None
test_name: Optional[str] = None
test_start_time: Optional[datetime] = None
probe_asn: Optional[str] = None
probe_asn: Optional[str | int] = None
probe_cc: Optional[str] = None
scores: Optional[str] = None
category_code: Optional[str] = None
Expand All @@ -286,6 +291,10 @@ class MeasurementMeta(BaseModel):
raw_measurement: Optional[str] = None
category_code: Optional[str] = None

@field_serializer("measurement_start_time", "test_start_time")
def format_ts(self, v: datetime) -> str:
return v.strftime("%Y-%m-%dT%H:%M:%SZ")


def format_msmt_meta(msmt_meta: dict) -> MeasurementMeta:
formatted_msmt_meta = MeasurementMeta(
Expand Down Expand Up @@ -430,75 +439,97 @@ class MeasurementBase(BaseModel):
test_name: Optional[str] = Field(default=None, title="test name of the measurement")


class GetMeasurementMetaRequest(BaseModel):
measurement_uid: Optional[str] = Field(
None,
description="The measurement ID, mutually exclusive with report_id + input",
min_length=3,
)
report_id: Optional[str] = Field(
None,
description=(
"The report_id to search measurements for example: "
"20210208T162755Z_ndt_DZ_36947_n1_8swgXi7xNuRUyO9a"
),
min_length=3,
)
input: Optional[str] = Field(
None,
description="The input (for example a URL or IP address) to search measurements for",
min_length=3,
)
full: bool = Field(
False,
description="Include JSON measurement data",
)

@field_validator("report_id")
def report_id_validator(cls, report_id: str) -> str:
if report_id:
return validate_report_id(report_id)

return report_id

def validate_report_id(report_id: str) -> str:
if len(report_id) < 15 or len(report_id) > 100:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="Invalid report_id field",
)

is_in_charset(
report_id, string.ascii_letters + string.digits + "_", "Invalid report_id field"
)

return report_id

@router.get("/v1/measurement_meta", response_model_exclude_unset=True)
async def get_measurement_meta(
response: Response,
measurement_uid: Annotated[
Optional[str],
Query(
description="The measurement ID, mutually exclusive with report_id + input",
min_length=3,
),
] = None,
report_id: Annotated[
Optional[str],
Query(
description=(
"The report_id to search measurements for example: "
"20210208T162755Z_ndt_DZ_36947_n1_8swgXi7xNuRUyO9a"
),
min_length=3,
),
] = None,
input: Annotated[
Optional[str],
Query(
description="The input (for example a URL or IP address) to search measurements for",
min_length=3,
),
] = None,
full: Annotated[bool, Query(description="Include JSON measurement data")] = False,
request: GetMeasurementMetaRequest = Depends(),
settings=Depends(get_settings),
db=Depends(get_clickhouse_session),
) -> MeasurementMeta:
) -> MeasurementMeta | Dict[str, Any]:
"""
Get metadata on one measurement by measurement_uid or report_id + input
"""

if measurement_uid:
log.info(f"get_measurement_meta {measurement_uid}")
msmt_meta = _get_measurement_meta_by_uid(db, measurement_uid)
elif report_id:
log.info(f"get_measurement_meta {report_id} {input}")
msmt_meta = _get_measurement_meta_clickhouse(db, report_id, input)
if request.measurement_uid:
log.info(f"get_measurement_meta {request.measurement_uid}")
msmt_meta = _get_measurement_meta_by_uid(db, request.measurement_uid)
elif request.report_id:
log.info(f"get_measurement_meta {request.report_id} {input}")
msmt_meta = _get_measurement_meta_clickhouse(
db, request.report_id, request.input
)
else:
raise HTTPException(
status_code=400,
detail="Either report_id or measurement_uid must be provided",
status_code=status.HTTP_400_BAD_REQUEST,
detail="Missing measurement_uid or report_id. You should provide at the least one"
)

if msmt_meta.probe_asn is not None and isinstance(msmt_meta.probe_asn, str):
# Emulates old monolith behaviour of returning int as probe_asn
msmt_meta.probe_asn = asn_to_int(msmt_meta.probe_asn)

setcacheresponse("1m", response)
body = ""

if not full: # return without raw_measurement
if not request.full:
return msmt_meta

if msmt_meta == {}: # measurement not found
return MeasurementMeta(raw_measurement=body)
if msmt_meta == MeasurementMeta(): # measurement not found
return {"raw_measurement": ""}

try:
assert isinstance(msmt_meta.report_id, str) and isinstance(
msmt_meta.measurement_uid, str
)
body = _fetch_measurement_body(
# TODO: uid_cleanup
assert msmt_meta.report_id is not None
msmt_meta.raw_measurement = _fetch_measurement_body(
db, settings, msmt_meta.report_id, msmt_meta.measurement_uid
)
assert isinstance(body, bytes)
body = body.decode()
except Exception as e:
log.error(e, exc_info=True)
msmt_meta.raw_measurement = ""

msmt_meta.raw_measurement = body
return msmt_meta


Expand Down Expand Up @@ -529,13 +560,15 @@ def genurl(base_url: str, path: str, **kw) -> str:
"""Generate absolute URL for the API"""
return urljoin(base_url, path) + "?" + urlencode(kw)


class OrderBy(str, Enum):
measurement_start_time = "measurement_start_time"
input = "input"
probe_cc = "probe_cc"
probe_asn = "probe_asn"
test_name = "test_name"


@router.get("/v1/measurements")
async def list_measurements(
request: Request,
Expand Down Expand Up @@ -571,13 +604,13 @@ async def list_measurements(
Query(description="Category code from the citizenlab list"),
] = None,
since: Annotated[
Optional[str],
Optional[datetime],
Query(
description='Start date of when measurements were run (ex. "2016-10-20T10:30:00")'
),
] = None,
until: Annotated[
Optional[str],
Optional[datetime],
Query(
description='End date of when measurement were run (ex. "2016-10-20T10:30:00")'
),
Expand Down Expand Up @@ -625,7 +658,9 @@ async def list_measurements(
Optional[str], Query(description="Filter measurements by OONIRun ID.")
] = None,
order_by: Annotated[
Optional[OrderBy], # Use an actual enum to enforce validation of ordering fields
Optional[
OrderBy
], # Use an actual enum to enforce validation of ordering fields
Query(
description="By which key the results should be ordered by (default: `null`)",
),
Expand All @@ -641,7 +676,10 @@ async def list_measurements(
int, Query(description="Offset into the result set (default: 0)")
] = 0,
limit: Annotated[
int, Query(description="Number of records to return (default: 100)", ge=0, le=1_000_000)
int,
Query(
description="Number of records to return (default: 100)", ge=0, le=1_000_000
),
] = 100,
user_agent: Annotated[str | None, Header()] = None,
db=Depends(get_clickhouse_session),
Expand Down Expand Up @@ -681,30 +719,12 @@ async def list_measurements(
)

### Prepare query parameters
if until is None and report_id is None:
t = datetime.now(timezone.utc) + timedelta(days=1)
until = datetime(t.year, t.month, t.day)

until_dt = None
if until is not None:
until_dt = datetime.strptime(until, "%Y-%m-%d")

# Set reasonable since/until ranges if not specified.
try:
if until is None:
if report_id is None:
t = datetime.now(timezone.utc) + timedelta(days=1)
until_dt = datetime(t.year, t.month, t.day)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid until parameter")

since_dt = None
if since is not None:
since_dt = datetime.strptime(since, "%Y-%m-%d")

try:
if since_dt is None:
if report_id is None and until_dt is not None:
since_dt = until_dt - timedelta(days=30)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid since parameter")
if since is None and report_id is None and until is not None:
since = until - timedelta(days=30)

if order.lower() not in ("asc", "desc"):
raise HTTPException(status_code=400, detail="Invalid order parameter")
Expand Down Expand Up @@ -994,3 +1014,15 @@ async def get_torsf_stats(

def get_bucket_url(bucket_name: str) -> str:
return f"https://{bucket_name}.s3.amazonaws.com/"


def asn_to_int(asn_str: str) -> int:
return int(asn_str.strip("AS"))

def is_in_charset(s: str, charset: str, error_msg: str):
"""Ensure `s` contains only valid characters listed in `charset`"""
for c in s:
if c not in charset:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=error_msg
)
4 changes: 3 additions & 1 deletion ooniapi/services/oonimeasurements/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@
def get_file_path(file_path: str):
return Path(__file__).parent / file_path


@pytest.fixture(scope="session")
def maybe_download_fixtures():
base_url = "https://ooni-data-eu-fra.s3.eu-central-1.amazonaws.com/"
filenames = [
"samples/analysis_web_measurement-sample.sql.gz",
"samples/obs_web-sample.sql.gz",
"raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz"
"raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz",
"raw/20210709/00/MY/webconnectivity/2021070900_MY_webconnectivity.n0.2.jsonl.gz",
]
for fn in filenames:
dst_path = get_file_path(f"fixtures/{fn}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1075,4 +1075,6 @@ INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_nam
INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20210708064435.632040_MM_whatsapp_b3fe1a6061d720e4','20210708T064431Z_whatsapp_MM_133384_n1_yhkQtlaBRuoDnD9B',NULL,'MM',133384,'whatsapp','2021-07-08 06:44:31','2021-07-08 06:44:31',NULL,'{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"registration_server_accessible":true,"whatsapp_web_accessible":true,"whatsapp_endpoints_accessible":true}}','windows','f','f','f',NULL,'ooniprobe-desktop-unattended','3.8.0',NULL,0,0,0,0,NULL,0,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL);
INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20210708064412.083471_AM_whatsapp_8db72fe2cc57adee','20210708T064408Z_whatsapp_AM_49800_n1_MvhXsYKzDSooGPCy',NULL,'AM',49800,'whatsapp','2021-07-08 06:45:05','2021-07-08 06:45:05',NULL,'{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"registration_server_accessible":true,"whatsapp_web_accessible":true,"whatsapp_endpoints_accessible":true}}','unknown','f','f','f',NULL,'ooniprobe-cli','3.9.2',NULL,0,0,0,0,NULL,0,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL);
INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20210708064537.694608_HR_whatsapp_cbee59e3f8d64e84','20210708T064536Z_whatsapp_HR_31012_n1_6bKkL89MKhYySdfK',NULL,'HR',31012,'whatsapp','2021-07-08 06:45:35','2021-07-08 06:45:35',NULL,'{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"registration_server_accessible":true,"whatsapp_web_accessible":true,"whatsapp_endpoints_accessible":true}}','android','f','f','f',NULL,'measurement_kit','0.8.1',NULL,0,0,0,0,NULL,0,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL);
INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20250709075147.833477_US_webconnectivity_8f0e0b49950f2592','20250709T074913Z_webconnectivity_US_10796_n1_XDgk16bsGyJbx6Jl','https://freenetproject.org/','US',10796,'web_connectivity','2025-07-09 07:49:13','2025-07-09 07:51:46','','{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0}','android','f','f','f','freenetproject.org','ooniprobe-android-unattended','5.1.0','',0,0,0,0,'',0,'','0.4.3','arm64','ooniprobe-engine','3.26.0',1.3149208,'','https://5.th.ooni.org','https',NULL)
INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20250709075147.833477_US_webconnectivity_8f0e0b49950f2592','20250709T074913Z_webconnectivity_US_10796_n1_XDgk16bsGyJbx6Jl','https://freenetproject.org/','US',10796,'web_connectivity','2025-07-09 07:49:13','2025-07-09 07:51:46','','{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0}','android','f','f','f','freenetproject.org','ooniprobe-android-unattended','5.1.0','',0,0,0,0,'',0,'','0.4.3','arm64','ooniprobe-engine','3.26.0',1.3149208,'','https://5.th.ooni.org','https',NULL);
INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20250709074932.361231_US_webconnectivity_52be70d41f91a6cc', '20250709T074749Z_webconnectivity_US_10796_n1_oljUoi3ZVNHUzjdp', 'https://www.quora.com/', 'US', 10796, 'web_connectivity', '2025-07-09 07:47:49', '2025-07-09 07:49:30', '', '{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"fingerprints":[{"name":"cp.fp_x_redirect_just","scope":"fp","location_found":"body","confidence_no_fp":5,"expected_countries":[]}]}', 'android', 'f', 'f', 'f', 'www.quora.com', 'ooniprobe-android-unattended', '5.1.0', '', 0, 0, 0, 0, '', 0, '', '0.4.3', 'arm64', 'ooniprobe-engine', '3.26.0', 0.79127073, '', 'https://5.th.ooni.org', 'https', NULL);
INSERT INTO fastpath(measurement_uid,report_id,input,probe_cc,probe_asn,test_name,test_start_time,measurement_start_time,filename,scores,platform,anomaly,confirmed,msm_failure,domain,software_name,software_version,control_failure,blocking_general,is_ssl_expected,page_len,page_len_ratio,server_cc,server_asn,server_as_name,test_version,architecture,engine_name,engine_version,test_runtime,blocking_type,test_helper_address,test_helper_type,ooni_run_link_id) VALUES ('20210709005529.664022_MY_webconnectivity_68e5bea1060d1874', '20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3', 'https://www.backtrack-linux.org/', 'MY', 4818, 'web_connectivity', '2025-07-09 00:43:40', '2025-07-09 00:55:13', '', '{"blocking_general":1.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"analysis":{"blocking_type":"http-failure"}}', 'android', 't', 'f', 'f', 'www.backtrack-linux.org', 'ooniprobe-android', '3.0.0', '', 0, 0, 0, 0, '', 0, '', '', '', '', '', 0, '', '', '', NULL)
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
INSERT INTO jsonl (report_id, input, s3path, linenum) VALUES ('20210709T004340Z_webconnectivity_MY_4818_n1_YCM7J9mGcEHds2K3', 'https://www.backtrack-linux.org/', 'raw/20210709/00/MY/webconnectivity/2021070900_MY_webconnectivity.n0.2.jsonl.gz', 35);
INSERT INTO jsonl (report_id, input, s3path, linenum, measurement_uid) VALUES ('20250709T074913Z_webconnectivity_US_10796_n1_XDgk16bsGyJbx6Jl','https://freenetproject.org/','raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz',623,'20250709075147.833477_US_webconnectivity_8f0e0b49950f2592')
INSERT INTO jsonl (report_id, input, s3path, linenum, measurement_uid) VALUES ('20250709T074913Z_webconnectivity_US_10796_n1_XDgk16bsGyJbx6Jl','https://freenetproject.org/','raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz',623,'20250709075147.833477_US_webconnectivity_8f0e0b49950f2592');
INSERT INTO jsonl (report_id, input, s3path, linenum, measurement_uid) VALUES ('20250709T074749Z_webconnectivity_US_10796_n1_oljUoi3ZVNHUzjdp', 'https://www.quora.com/', 'raw/20250709/07/US/webconnectivity/2025070907_US_webconnectivity.n1.7.jsonl.gz', 187, '20250709074932.361231_US_webconnectivity_52be70d41f91a6cc');
Loading