diff --git a/app/logo.py b/app/logo.py index 986d166..0bd359a 100644 --- a/app/logo.py +++ b/app/logo.py @@ -1,20 +1,14 @@ api_logo = """\ -██████╗ ██████╗ ██████╗ █████╗ ██████╗ ██╗ -██╔══██╗██╔══██╗██╔════╝ ██╔══██╗██╔══██╗██║ -██████╔╝██████╔╝██║ █████╗ ███████║██████╔╝██║ -██╔══██╗██╔══██╗██║ ╚════╝ ██╔══██║██╔═══╝ ██║ -██║ ██║██████╔╝╚██████╗ ██║ ██║██║ ██║ -╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝\ +▄▄▄▄ ▄▄▄▄ ▄▄▄▄ ▄▄▄ ▄▄▄▄ ▄▄ +██▄█▄ ██▄██ ██▀▀▀ ▄▄▄ ██▀██ ██▄█▀ ██ +██ ██ ██▄█▀ ▀████ ██▀██ ██ ██ \ """ cli_logo = """\ -██████╗ ██████╗ ██████╗ ██████╗██╗ ██╗ -██╔══██╗██╔══██╗██╔════╝ ██╔════╝██║ ██║ -██████╔╝██████╔╝██║ █████╗ ██║ ██║ ██║ -██╔══██╗██╔══██╗██║ ╚════╝ ██║ ██║ ██║ -██║ ██║██████╔╝╚██████╗ ╚██████╗███████╗██║ -╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝╚══════╝╚═╝\ +▄▄▄▄ ▄▄▄▄ ▄▄▄▄ ▄▄▄▄ ▄▄ ▄▄ +██▄█▄ ██▄██ ██▀▀▀ ▄▄▄ ██▀▀▀ ██ ██ +██ ██ ██▄█▀ ▀████ ▀████ ██▄▄▄ ██ \ """ diff --git a/app/schema/openapi.yaml b/app/schema/openapi.yaml index 52fad0c..571df0d 100644 --- a/app/schema/openapi.yaml +++ b/app/schema/openapi.yaml @@ -4,7 +4,7 @@ info: title: Blind Charging API description: | This API lets an application communicate with the CPL Blind Charging module via an HTTP REST API. - version: 0.8.1 + version: 0.9.0 contact: name: Joe Nudell email: jnudell@hks.harvard.edu @@ -702,6 +702,49 @@ components: TEXT: "#/components/schemas/DocumentText" BASE64: "#/components/schemas/DocumentContent" + UnidentifiedDocumentLink: + type: object + description: | + Link-based input without a caller-provided document ID. + required: + - attachmentType + - url + properties: + attachmentType: + type: string + enum: + - LINK + url: + type: string + format: uri + + UnidentifiedDocumentContent: + type: object + description: | + Base64 input without a caller-provided document ID. + required: + - attachmentType + - content + properties: + attachmentType: + type: string + enum: + - BASE64 + content: + type: string + + UnidentifiedInputDocument: + description: | + Document input that omits `documentId`. Supports URL and base64 payloads. + oneOf: + - $ref: "#/components/schemas/UnidentifiedDocumentLink" + - $ref: "#/components/schemas/UnidentifiedDocumentContent" + discriminator: + propertyName: attachmentType + mapping: + LINK: "#/components/schemas/UnidentifiedDocumentLink" + BASE64: "#/components/schemas/UnidentifiedDocumentContent" + BlindReviewInfo: type: object required: @@ -1179,61 +1222,16 @@ components: # --- Extraction schemas --- - ExtractionDocumentLink: - type: object - description: | - PDF document supplied by URL for extraction. Unlike `DocumentLink`, - `documentId` is omitted because the document identifier is not known - until after extraction is accepted. - required: - - attachmentType - - url - properties: - attachmentType: - type: string - enum: - - LINK - url: - type: string - format: uri - - ExtractionDocumentContent: - type: object - description: | - PDF document supplied as base64 for extraction. Unlike `DocumentContent`, - `documentId` is omitted because the document identifier is not known - until after extraction is accepted. - required: - - attachmentType - - content - properties: - attachmentType: - type: string - enum: - - BASE64 - content: - type: string - - ExtractionInputDocument: - description: | - An input document for extraction. Only PDF documents are supported, - provided either as a URL or base64-encoded content. - oneOf: - - $ref: "#/components/schemas/ExtractionDocumentLink" - - $ref: "#/components/schemas/ExtractionDocumentContent" - discriminator: - propertyName: attachmentType - mapping: - LINK: "#/components/schemas/ExtractionDocumentLink" - BASE64: "#/components/schemas/ExtractionDocumentContent" - ExtractionTarget: type: object required: - document properties: document: - $ref: "#/components/schemas/ExtractionInputDocument" + description: | + Input document to extract from, without requiring a caller-supplied + `documentId`. + $ref: "#/components/schemas/UnidentifiedInputDocument" callbackUrl: type: string format: uri diff --git a/app/server/README.md b/app/server/README.md index 25420df..6b77009 100644 --- a/app/server/README.md +++ b/app/server/README.md @@ -7,15 +7,13 @@ All of the code in `./generated` is generated from code in `./schema`. To run code generation, make sure you have the `fastapi-codegen` repo cloned. -**Note 1** [Joe's fork of the repo](https://github.com/jnu/fastapi-code-generator) +**Note** [Joe's fork of the repo](https://github.com/jnu/fastapi-code-generator) still has a couple more useful features that haven't been merged into upstream branch. -**Note 2** FastAPI code gen uses `poetry` for package management, so you will need to install that and use it to run the code generator. - ```zsh # Set this to the path where this repo is checked out. BCAPI_ROOT=../../comppolicylab/blind-charging-api -poetry run python -m fastapi_code_generator -i "$BCAPI_ROOT/app/schema/openapi.yaml" -o "$BCAPI_ROOT/app/server/generated" -r -t "$BCAPI_ROOT/app/schema/templates" -d pydantic_v2.BaseModel -p 3.13 +uv run python -m fastapi_code_generator -i "$BCAPI_ROOT/app/schema/openapi.yaml" -o "$BCAPI_ROOT/app/server/generated" -r -t "$BCAPI_ROOT/app/schema/templates" -d pydantic_v2.BaseModel -p 3.13 ``` ### Implementations diff --git a/app/server/extracted_report.py b/app/server/extracted_report.py new file mode 100644 index 0000000..f4ef363 --- /dev/null +++ b/app/server/extracted_report.py @@ -0,0 +1,278 @@ +"""Translation from bc2 pipeline output to the API ``ExtractedReport`` model. + +The extraction pipeline (``bc2.Pipeline``) produces a +:class:`bc2.core.common.ontology.PoliceReportParseResult`, which is +structurally similar to but not identical to the API's +:class:`app.server.generated.models.ExtractedReport`. This module contains the +logic to translate between the two, plus a lenient parser that accepts raw +pipeline output bytes. +""" + +import json + +from bc2.core.common.ontology import ( + Cited, + Offense, + PoliceReportParseResult, + SourceChunk, +) +from bc2.core.common.ontology import Subject as BC2Subject +from pydantic import ValidationError + +from .generated.models import ( + BoundingBox, + CitedString, + DocumentRegion, + ExtractedCharge, + ExtractedDefendant, + ExtractedOfficer, + ExtractedPerson, + ExtractedReport, + IncidentMetadata, +) + +# Subject `type` values from the ontology prompt are free-form strings, so we +# use lenient keyword matching to bucket them into the API's categories. +_DEFENDANT_TYPE_KEYWORDS = ( + "defendant", + "suspect", + "arrestee", + "accused", + "respondent", + "perpetrator", + "offender", +) +_OFFICER_TYPE_KEYWORDS = ( + "officer", + "deputy", + "sheriff", + "trooper", + "detective", + "investigator", + "police", +) + + +def parse_extracted_report(raw_output: bytes) -> ExtractedReport: + """Parse extraction pipeline output into the API model. + + The extraction pipeline produces a :class:`PoliceReportParseResult` from + ``bc2.core.common.ontology``, which is structurally similar to but not + identical to :class:`ExtractedReport`. This function translates between the + two, and also tolerates a couple of alternative envelopes for robustness. + """ + loaded = json.loads(raw_output) + + # Unwrap common envelopes so we can handle either shape uniformly. + if isinstance(loaded, dict) and "extractedReport" in loaded: + loaded = loaded["extractedReport"] + + # The canonical output from the bc2 pipeline is a PoliceReportParseResult + # (i.e. a dict with "report" and "chunks"). Translate that into the API + # model. Fall back to treating the payload as an already-translated + # ExtractedReport to support legacy/alternative pipeline outputs. + if isinstance(loaded, dict) and "report" in loaded and "chunks" in loaded: + parse_result = PoliceReportParseResult.model_validate(loaded) + return convert_parse_result(parse_result) + + try: + return ExtractedReport.model_validate(loaded) + except ValidationError: + # Last-ditch: maybe it's still a parse result but with extra keys. + return convert_parse_result(PoliceReportParseResult.model_validate(loaded)) + + +def convert_parse_result(parse_result: PoliceReportParseResult) -> ExtractedReport: + """Translate a bc2 :class:`PoliceReportParseResult` to an ``ExtractedReport``. + + The two schemas describe the same underlying concepts but differ in a few + ways: + + * bc2 models chunks as polygons (``points``) in arbitrary document regions, + while the API flattens all cited regions into a single ``references`` + array of axis-aligned bounding boxes. bc2 ``Cited.ids`` refer to chunk + indices; the API's ``CitedString.referenceIds`` refer to ``references`` + indices. We expand each chunk into one reference per bounding region and + remap ids accordingly. + * bc2 ``Subject.type`` is a free-form role label. We bucket subjects into + defendants, referring officers, and other people via keyword matching. + * bc2 ``Offense`` fields (``crime``/``statute``/``code``) map to the API's + ``ExtractedCharge`` fields (``description``/``statute``/``class``). + * ``PoliceReport.location`` and ``PoliceReport.incident_type`` have no + natural home in ``ExtractedReport`` and are dropped. + """ + references, chunk_id_map = _build_references(parse_result.chunks) + report = parse_result.report + + defendants, officers, others = _classify_subjects(report.subjects) + + charges = [_convert_offense(off, chunk_id_map) for off in report.offenses] + if not charges: + # Each defendant must have >=1 charge per the API schema. Fall back to + # a single empty charge so the payload validates. + charges = [ExtractedCharge()] + + extracted_defendants = [ + _convert_defendant(subj, charges, chunk_id_map) for subj in defendants + ] + if not extracted_defendants: + # The API requires at least one defendant. Synthesize a placeholder so + # downstream consumers always see a well-formed payload even when the + # model failed to identify any defendant-like subject. + extracted_defendants = [ExtractedDefendant(charges=charges)] + + return ExtractedReport( + references=references, + incident=IncidentMetadata( + agencyName=_convert_cited_string(report.reporting_agency, chunk_id_map), + incidentNumber=_convert_cited_string(report.case_number, chunk_id_map), + incidentDate=None, + ), + defendants=extracted_defendants, + referringOfficers=[_convert_officer(subj, chunk_id_map) for subj in officers] + or None, + narratives=[ + cs + for cs in ( + _convert_cited_string(n, chunk_id_map) for n in report.narratives + ) + if cs is not None + ] + or None, + otherPeople=[_convert_person(subj, chunk_id_map) for subj in others] or None, + ) + + +def _build_references( + chunks: list[SourceChunk], +) -> tuple[list[DocumentRegion], dict[int, list[int]]]: + """Flatten chunk regions into ``references`` and build a chunk→ref id map. + + Each chunk may contribute zero or more regions (one per ``BoundingRegion``). + The returned map translates a chunk index into the list of flat indices + into ``references`` for that chunk. + """ + references: list[DocumentRegion] = [] + chunk_id_map: dict[int, list[int]] = {} + for chunk_idx, chunk in enumerate(chunks): + ref_indices: list[int] = [] + for region in chunk.regions: + bbox = _polygon_to_bbox(region.points) + if bbox is None: + continue + ref_indices.append(len(references)) + references.append(DocumentRegion(page=region.page, bbox=bbox)) + chunk_id_map[chunk_idx] = ref_indices + return references, chunk_id_map + + +def _polygon_to_bbox( + points: list[tuple[float, float]], +) -> BoundingBox | None: + """Convert a polygon into its axis-aligned bounding box.""" + if not points: + return None + xs = [p[0] for p in points] + ys = [p[1] for p in points] + return BoundingBox(x0=min(xs), y0=min(ys), x1=max(xs), y1=max(ys)) + + +def _remap_ids(ids: list[int], chunk_id_map: dict[int, list[int]]) -> list[int]: + """Expand bc2 chunk ids into flat ``references`` ids, preserving order.""" + out: list[int] = [] + seen: set[int] = set() + for chunk_id in ids: + for ref_id in chunk_id_map.get(chunk_id, []): + if ref_id in seen: + continue + seen.add(ref_id) + out.append(ref_id) + return out + + +def _convert_cited_string( + cited: Cited[str] | None, chunk_id_map: dict[int, list[int]] +) -> CitedString | None: + """Convert a bc2 ``Cited[str]`` into an API ``CitedString``. + + Returns ``None`` when the value is absent or carries no content, so the + caller can omit empty optional fields from the output. + """ + if cited is None: + return None + content = (cited.content or "").strip() + if not content: + return None + return CitedString( + referenceIds=_remap_ids(cited.ids, chunk_id_map), + content=content, + ) + + +def _classify_subjects( + subjects: list[BC2Subject], +) -> tuple[list[BC2Subject], list[BC2Subject], list[BC2Subject]]: + """Bucket subjects into (defendants, officers, others) by ``type``.""" + defendants: list[BC2Subject] = [] + officers: list[BC2Subject] = [] + others: list[BC2Subject] = [] + for subject in subjects: + label = (subject.type.content or "").lower() + if any(kw in label for kw in _DEFENDANT_TYPE_KEYWORDS): + defendants.append(subject) + elif any(kw in label for kw in _OFFICER_TYPE_KEYWORDS): + officers.append(subject) + else: + others.append(subject) + return defendants, officers, others + + +def _convert_offense( + offense: Offense, chunk_id_map: dict[int, list[int]] +) -> ExtractedCharge: + # `class_` on ExtractedCharge is only populable via its `class` alias, so + # we build the payload as a dict and validate. + payload: dict = { + "statute": _convert_cited_string(offense.statute, chunk_id_map), + "description": _convert_cited_string(offense.crime, chunk_id_map), + "severity": None, + "class": _convert_cited_string(offense.code, chunk_id_map), + } + return ExtractedCharge.model_validate(payload) + + +def _convert_defendant( + subject: BC2Subject, + charges: list[ExtractedCharge], + chunk_id_map: dict[int, list[int]], +) -> ExtractedDefendant: + return ExtractedDefendant( + charges=charges, + name=_convert_cited_string(subject.name, chunk_id_map), + gender=_convert_cited_string(subject.sex, chunk_id_map), + race=_convert_cited_string(subject.race, chunk_id_map), + phoneNumber=_convert_cited_string(subject.phone, chunk_id_map), + address=_convert_cited_string(subject.address, chunk_id_map), + # bc2 doesn't capture weight/height/eye color for subjects. + weight=None, + height=None, + eyeColor=None, + ) + + +def _convert_officer( + subject: BC2Subject, chunk_id_map: dict[int, list[int]] +) -> ExtractedOfficer: + return ExtractedOfficer( + name=_convert_cited_string(subject.name, chunk_id_map), + agency=None, + ) + + +def _convert_person( + subject: BC2Subject, chunk_id_map: dict[int, list[int]] +) -> ExtractedPerson: + return ExtractedPerson( + name=_convert_cited_string(subject.name, chunk_id_map), + status=_convert_cited_string(subject.type, chunk_id_map), + ) diff --git a/app/server/generated/__init__.py b/app/server/generated/__init__.py index 36742c7..baab351 100644 --- a/app/server/generated/__init__.py +++ b/app/server/generated/__init__.py @@ -1,3 +1,3 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 diff --git a/app/server/generated/auth.py b/app/server/generated/auth.py index b23f248..ac9faa3 100644 --- a/app/server/generated/auth.py +++ b/app/server/generated/auth.py @@ -1,6 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 from fastapi import HTTPException, Request diff --git a/app/server/generated/dependencies.py b/app/server/generated/dependencies.py index 3acc05d..f5bb26d 100644 --- a/app/server/generated/dependencies.py +++ b/app/server/generated/dependencies.py @@ -1,8 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 - -from __future__ import annotations +# timestamp: 2026-04-20T19:33:44+00:00 from typing import Optional, Union diff --git a/app/server/generated/handlers.py b/app/server/generated/handlers.py index 875c548..1d8531f 100644 --- a/app/server/generated/handlers.py +++ b/app/server/generated/handlers.py @@ -1,6 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 # mypy: disable-error-code="name-defined" diff --git a/app/server/generated/main.py b/app/server/generated/main.py index ac8c0f3..7870f3f 100644 --- a/app/server/generated/main.py +++ b/app/server/generated/main.py @@ -1,6 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 from __future__ import annotations @@ -11,7 +11,7 @@ app = FastAPI( title='Blind Charging API', description='This API lets an application communicate with the CPL Blind Charging module via an HTTP REST API.\n', - version='0.8.1', + version='0.9.0', contact={'name': 'Joe Nudell', 'email': 'jnudell@hks.harvard.edu'}, license={'name': 'MIT License', 'url': 'https://opensource.org/license/mit/'}, ) diff --git a/app/server/generated/models.py b/app/server/generated/models.py index ad0e283..c222594 100644 --- a/app/server/generated/models.py +++ b/app/server/generated/models.py @@ -1,14 +1,13 @@ # generated by fastapi-codegen: -# filename: ../../stanford-policylab/blind-charging-api/app/schema/openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# filename: openapi.yaml +# timestamp: 2026-04-20T19:33:44+00:00 from __future__ import annotations -from datetime import datetime -from enum import Enum +from enum import StrEnum from typing import List, Literal, Optional, Union -from pydantic import AnyUrl, BaseModel, Field, RootModel, conint, constr +from pydantic import AnyUrl, AwareDatetime, BaseModel, Field, RootModel, conint, constr class Error(BaseModel): @@ -19,8 +18,8 @@ class ExperimentConfig(BaseModel): version: str blob: str active: bool - createdAt: datetime - updatedAt: datetime + createdAt: AwareDatetime + updatedAt: AwareDatetime parent: Optional[str] = None name: Optional[str] = None description: Optional[str] = None @@ -41,7 +40,7 @@ class ClientCredentialsRevokeTokenRequest(BaseModel): client_secret: str -class GrantType(Enum): +class GrantType(StrEnum): client_credentials = 'client_credentials' @@ -57,7 +56,7 @@ class ClientCredentialsTokenResponse(BaseModel): expires_in: int -class AttachmentType(Enum): +class AttachmentType(StrEnum): LINK = 'LINK' @@ -67,7 +66,7 @@ class DocumentLink(BaseModel): url: AnyUrl -class AttachmentType1(Enum): +class AttachmentType1(StrEnum): TEXT = 'TEXT' @@ -77,7 +76,7 @@ class DocumentText(BaseModel): content: str -class AttachmentType2(Enum): +class AttachmentType2(StrEnum): BASE64 = 'BASE64' @@ -87,7 +86,7 @@ class DocumentContent(BaseModel): content: str -class AttachmentType3(Enum): +class AttachmentType3(StrEnum): JSON = 'JSON' @@ -125,15 +124,43 @@ class InputDocument(RootModel[Union[DocumentLink, DocumentText, DocumentContent] ) -class Status(Enum): +class AttachmentType4(StrEnum): + LINK = 'LINK' + + +class UnidentifiedDocumentLink(BaseModel): + attachmentType: Literal['LINK'] + url: AnyUrl + + +class AttachmentType5(StrEnum): + BASE64 = 'BASE64' + + +class UnidentifiedDocumentContent(BaseModel): + attachmentType: Literal['BASE64'] + content: str + + +class UnidentifiedInputDocument( + RootModel[Union[UnidentifiedDocumentLink, UnidentifiedDocumentContent]] +): + root: Union[UnidentifiedDocumentLink, UnidentifiedDocumentContent] = Field( + ..., + description='Document input that omits `documentId`. Supports URL and base64 payloads.\n', + discriminator='attachmentType', + ) + + +class Status(StrEnum): COMPLETE = 'COMPLETE' -class Status1(Enum): +class Status1(StrEnum): ERROR = 'ERROR' -class Status2(Enum): +class Status2(StrEnum): QUEUED = 'QUEUED' PROCESSING = 'PROCESSING' @@ -179,7 +206,7 @@ class Subject(BaseModel): subject: Person -class OutputFormat(Enum): +class OutputFormat(StrEnum): PDF = 'PDF' TEXT = 'TEXT' HTML = 'HTML' @@ -193,11 +220,11 @@ class RedactionTarget(BaseModel): class ReviewTimestamps(BaseModel): - pageOpen: datetime - decision: datetime + pageOpen: AwareDatetime + decision: AwareDatetime -class FinalChargingDecision(Enum): +class FinalChargingDecision(StrEnum): CHARGE = 'CHARGE' DECLINE = 'DECLINE' @@ -211,11 +238,11 @@ class FinalChargeOutcome(BaseModel): ) -class OutcomeType(Enum): +class OutcomeType(StrEnum): BLIND_DECISION = 'BLIND_DECISION' -class BlindChargingDecision(Enum): +class BlindChargingDecision(StrEnum): CHARGE_LIKELY = 'CHARGE_LIKELY' CHARGE_MAYBE = 'CHARGE_MAYBE' DECLINE_MAYBE = 'DECLINE_MAYBE' @@ -237,7 +264,7 @@ class BlindDecisionOutcome(BaseModel): ) -class DisqualifyingReason(Enum): +class DisqualifyingReason(StrEnum): ASSIGNED_TO_UNBLIND = 'ASSIGNED_TO_UNBLIND' CASE_TYPE_INELIGIBLE = 'CASE_TYPE_INELIGIBLE' PRIOR_KNOWLEDGE_BIAS = 'PRIOR_KNOWLEDGE_BIAS' @@ -247,7 +274,7 @@ class DisqualifyingReason(Enum): OTHER = 'OTHER' -class OutcomeType1(Enum): +class OutcomeType1(StrEnum): DISQUALIFICATION = 'DISQUALIFICATION' @@ -260,12 +287,12 @@ class DisqualifyOutcome(BaseModel): ) -class ReviewProtocol(Enum): +class ReviewProtocol(StrEnum): BLIND_REVIEW = 'BLIND_REVIEW' FINAL_REVIEW = 'FINAL_REVIEW' -class Protocol(Enum): +class Protocol(StrEnum): BLIND_REVIEW = 'BLIND_REVIEW' @@ -276,7 +303,7 @@ class BlindReviewDecision(BaseModel): ) -class Protocol1(Enum): +class Protocol1(StrEnum): FINAL_REVIEW = 'FINAL_REVIEW' @@ -318,36 +345,11 @@ class APIStatus(BaseModel): detail: str -class AttachmentType4(Enum): - LINK = 'LINK' - - -class ExtractionDocumentLink(BaseModel): - attachmentType: Literal['LINK'] - url: AnyUrl - - -class AttachmentType5(Enum): - BASE64 = 'BASE64' - - -class ExtractionDocumentContent(BaseModel): - attachmentType: Literal['BASE64'] - content: str - - -class ExtractionInputDocument( - RootModel[Union[ExtractionDocumentLink, ExtractionDocumentContent]] -): - root: Union[ExtractionDocumentLink, ExtractionDocumentContent] = Field( +class ExtractionTarget(BaseModel): + document: UnidentifiedInputDocument = Field( ..., - description='An input document for extraction. Only PDF documents are supported,\nprovided either as a URL or base64-encoded content.\n', - discriminator='attachmentType', + description='Input document to extract from, without requiring a caller-supplied\n`documentId`.\n', ) - - -class ExtractionTarget(BaseModel): - document: ExtractionInputDocument callbackUrl: Optional[AnyUrl] = None @@ -362,11 +364,11 @@ class ExtractionAccepted(BaseModel): ) -class Status3(Enum): +class Status3(StrEnum): COMPLETE = 'COMPLETE' -class Status4(Enum): +class Status4(StrEnum): ERROR = 'ERROR' @@ -375,7 +377,7 @@ class ExtractionResultError(BaseModel): status: Literal['ERROR'] -class Status5(Enum): +class Status5(StrEnum): QUEUED = 'QUEUED' PROCESSING = 'PROCESSING' diff --git a/app/server/generated/routers/experiments.py b/app/server/generated/routers/experiments.py index 8e86919..adc228e 100644 --- a/app/server/generated/routers/experiments.py +++ b/app/server/generated/routers/experiments.py @@ -1,6 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 from __future__ import annotations diff --git a/app/server/generated/routers/extraction.py b/app/server/generated/routers/extraction.py index cdde0fa..ce67740 100644 --- a/app/server/generated/routers/extraction.py +++ b/app/server/generated/routers/extraction.py @@ -1,6 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 from __future__ import annotations diff --git a/app/server/generated/routers/operations.py b/app/server/generated/routers/operations.py index e6a9512..6507399 100644 --- a/app/server/generated/routers/operations.py +++ b/app/server/generated/routers/operations.py @@ -1,6 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 from __future__ import annotations diff --git a/app/server/generated/routers/redaction.py b/app/server/generated/routers/redaction.py index 1f374cb..ee0f7de 100644 --- a/app/server/generated/routers/redaction.py +++ b/app/server/generated/routers/redaction.py @@ -1,6 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 from __future__ import annotations diff --git a/app/server/generated/routers/review.py b/app/server/generated/routers/review.py index 523c9b9..2b87deb 100644 --- a/app/server/generated/routers/review.py +++ b/app/server/generated/routers/review.py @@ -1,6 +1,6 @@ # generated by fastapi-codegen: # filename: openapi.yaml -# timestamp: 2026-04-14T00:30:19+00:00 +# timestamp: 2026-04-20T19:33:44+00:00 from __future__ import annotations diff --git a/app/server/handlers/extraction.py b/app/server/handlers/extraction.py new file mode 100644 index 0000000..1e3a566 --- /dev/null +++ b/app/server/handlers/extraction.py @@ -0,0 +1,112 @@ +from fastapi import HTTPException, Request +from uuid_utils import uuid7 + +from ..config import config +from ..generated.models import ( + ExtractionAccepted, + ExtractionRequest, + ExtractionResult, + ExtractionResultError, + ExtractionResultPending, + ExtractionResultSuccess, + ExtractionStatus, +) +from ..tasks import create_document_extraction_task, get_result +from ..tasks.extract_callback import ExtractionCallbackTaskResult +from .redaction import validate_callback_url + + +def _task_key(token: str) -> str: + return f"extract:task:{token}" + + +async def extract_documents( + *, + request: Request, + body: ExtractionRequest, +) -> ExtractionAccepted: + """Queue extraction tasks and return polling tokens.""" + tokens = list[str]() + now = await request.state.store.time() + expires_at = now + config.queue.task.retention_time_seconds + + for doc in body.documents: + callback_url = str(doc.callbackUrl) if doc.callbackUrl else None + validate_callback_url(callback_url) + + token = str(uuid7()) + task_chain = create_document_extraction_task(token, doc) + task = task_chain.apply_async() + + await request.state.store.set(_task_key(token), task.id) + await request.state.store.expire_at(_task_key(token), expires_at) + tokens.append(token) + + return ExtractionAccepted(tokens=tokens) + + +async def get_extraction_status(*, request: Request, token: str) -> ExtractionStatus: + """Get extraction result by token.""" + task_id = await request.state.store.get(_task_key(token)) + if not task_id: + raise HTTPException(status_code=404, detail="Token not found") + + task_result = get_result(task_id.decode("utf-8")) + state = task_result.state + + if state == "PENDING": + result = ExtractionResult( + ExtractionResultPending( + status="QUEUED", + statusDetail="Extraction request has been received and is queued.", + ) + ) + elif state in {"RETRY", "STARTED"}: + result = ExtractionResult( + ExtractionResultPending( + status="PROCESSING", + statusDetail="Extraction task is currently being processed.", + ) + ) + elif state == "SUCCESS": + final_result = task_result.result + if not isinstance(final_result, ExtractionCallbackTaskResult): + result = ExtractionResult( + ExtractionResultError( + status="ERROR", + error="Unexpected extraction task result type.", + ) + ) + elif ( + final_result.extracted.errors or not final_result.extracted.extracted_report + ): + result = ExtractionResult( + ExtractionResultError( + status="ERROR", + error=str(final_result.extracted.errors) + or "Unknown extraction error", + ) + ) + else: + result = ExtractionResult( + ExtractionResultSuccess( + status="COMPLETE", + extractedReport=final_result.extracted.extracted_report, + ) + ) + elif state == "FAILURE": + result = ExtractionResult( + ExtractionResultError( + status="ERROR", + error=str(task_result.result), + ) + ) + else: + result = ExtractionResult( + ExtractionResultError( + status="ERROR", + error=f"Unknown extraction task state: {state}", + ) + ) + + return ExtractionStatus(token=token, result=result) diff --git a/app/server/tasks/__init__.py b/app/server/tasks/__init__.py index 1b969c5..6f2bd10 100644 --- a/app/server/tasks/__init__.py +++ b/app/server/tasks/__init__.py @@ -1,6 +1,18 @@ from .callback import CallbackTask, CallbackTaskResult, callback -from .controller import create_document_redaction_task -from .fetch import FetchTask, FetchTaskResult, fetch +from .controller import create_document_extraction_task, create_document_redaction_task +from .extract import ExtractionTask, ExtractionTaskResult, extract +from .extract_callback import ( + ExtractionCallbackTask, + ExtractionCallbackTaskResult, + extraction_callback, +) +from .fetch import ( + FetchTask, + FetchTaskResult, + UnidentifiedFetchTask, + fetch, + fetch_unidentified, +) from .finalize import FinalizeTask, FinalizeTaskResult, finalize from .format import FormatTask, FormatTaskResult, format from .http import get_liveness_app @@ -12,13 +24,21 @@ "redact", "callback", "fetch", + "fetch_unidentified", "FetchTask", + "UnidentifiedFetchTask", "FetchTaskResult", "CallbackTask", "CallbackTaskResult", + "ExtractionCallbackTask", + "ExtractionCallbackTaskResult", + "extraction_callback", "RedactionTask", "get_result", "RedactionTaskResult", + "extract", + "ExtractionTask", + "ExtractionTaskResult", "get_liveness_app", "finalize", "FinalizeTask", @@ -28,4 +48,5 @@ "format", "ProcessingError", "create_document_redaction_task", + "create_document_extraction_task", ] diff --git a/app/server/tasks/controller.py b/app/server/tasks/controller.py index 2c15069..173a806 100644 --- a/app/server/tasks/controller.py +++ b/app/server/tasks/controller.py @@ -2,9 +2,11 @@ from celery import chain -from ..generated.models import OutputFormat, RedactionTarget +from ..generated.models import ExtractionTarget, OutputFormat, RedactionTarget from .callback import CallbackTask -from .fetch import FetchTask +from .extract import ExtractionTask +from .extract_callback import ExtractionCallbackTask +from .fetch import FetchTask, UnidentifiedFetchTask from .finalize import FinalizeTask from .format import FormatTask from .redact import RedactionTask @@ -54,3 +56,22 @@ def create_document_redaction_task( renderer=renderer, ).s(), ) + + +def create_document_extraction_task( + token: str, + object: ExtractionTarget, +) -> chain: + """Create celery chain for extraction on one document.""" + return chain( + UnidentifiedFetchTask( + document=object.document, + document_id=token, + ).s(), + ExtractionTask( + document_id=token, + ).s(), + ExtractionCallbackTask( + callback_url=str(object.callbackUrl) if object.callbackUrl else None + ).s(), + ) diff --git a/app/server/tasks/extract.py b/app/server/tasks/extract.py new file mode 100644 index 0000000..99eac79 --- /dev/null +++ b/app/server/tasks/extract.py @@ -0,0 +1,193 @@ +import io + +from bc2 import AnyProcessingConfig, Pipeline, PipelineConfig +from bc2.core.common.openai import FilteredContentError +from celery import Task +from celery.canvas import Signature +from celery.utils.log import get_task_logger +from pydantic import BaseModel + +from app.func import allf + +from ..case_helper import get_document_sync, save_retry_state_sync +from ..config import config +from ..extracted_report import parse_extracted_report +from ..generated.models import ExtractedReport +from .fetch import FetchTaskResult +from .metrics import ( + record_task_failure, + record_task_retry, + record_task_start, + record_task_success, +) +from .queue import ProcessingError, queue +from .serializer import register_type + +logger = get_task_logger(__name__) + + +class ExtractionTask(BaseModel): + document_id: str + + def s(self) -> Signature: + return extract.s(self) + + +class ExtractionTaskResult(BaseModel): + document_id: str + extracted_report: ExtractedReport | None = None + errors: list[ProcessingError] = [] + + +register_type(ExtractionTask) +register_type(ExtractionTaskResult) + + +def derive_extract_pipeline(pipe: list[AnyProcessingConfig]) -> list[dict]: # noqa: C901 + """Derive the extract pipeline from the redaction pipe. + + Args: + pipe: The redaction pipe to derive the extract pipeline from. + + Returns: + The extract pipeline, without I/O engines. + + Raises: + RuntimeError: If the extract pipeline cannot be derived. + """ + # NOTE(jnu): For an MVP implementation, to limit complexity in the config, + # we will inspect the redaction pipe to pull keys and urls for services, + # and format them into a viable extract pipeline. + # + # In the future we should define a completely separate config for this. + analyze_config: dict = { + "engine": "analyze:azuredi", + "kv": True, + "document_model": "prebuilt-layout", + } + analyze_config_shared_keys = {"endpoint", "api_key", "api_version", "locale"} + ontology_client: dict = {} + ontology_generator: dict = { + "method": "chat", + "temperature": 0.0, + "system": {"prompt_id": "ontology_20260415_1"}, + } + ontology_config: dict = { + "engine": "ontology:openai", + "client": ontology_client, + "generator": ontology_generator, + } + generator_config_shared_keys = {"model", "openai_model", "max_tokens"} + client_config_shared_keys = {"azure_endpoint", "api_key", "api_version"} + required_configs = {"analyze:azuredi", "parse:openai"} + steps = [] + + def _flatten_steps(pp: list[AnyProcessingConfig]): + for step in pp: + if step.engine == "$chunk": + if step.processor.engine == "$compose": + _flatten_steps(step.processor.pipe) + continue + else: + steps.append(step.processor) + else: + steps.append(step) + + _flatten_steps(pipe) + + for step in steps: + if not required_configs: + break + + if step.engine == "analyze:azuredi": + required_configs.remove("analyze:azuredi") + for key in analyze_config_shared_keys: + if hasattr(step, key): + analyze_config[key] = getattr(step, key) + elif step.engine == "parse:openai": + required_configs.remove("parse:openai") + for key in generator_config_shared_keys: + if hasattr(getattr(step, "generator", None), key): + ontology_generator[key] = getattr(step.generator, key) + for key in client_config_shared_keys: + if hasattr(getattr(step, "client", None), key): + ontology_client[key] = getattr(step.client, key) + + if required_configs: + raise RuntimeError( + f"Unable to derive extract pipeline: {required_configs} missing from config" + ) + + return [ + analyze_config, + ontology_config, + ] + + +@queue.task( + bind=True, + task_track_started=True, + task_time_limit=300, + task_soft_time_limit=240, + max_retries=3, + retry_backoff=True, + default_retry_delay=30, + on_retry=allf(save_retry_state_sync, record_task_retry), + on_failure=record_task_failure, + on_success=record_task_success, + before_start=record_task_start, +) +def extract( + self: Task, fetch_result: FetchTaskResult, params: ExtractionTask +) -> ExtractionTaskResult: + """Run extraction pipeline against a fetched document.""" + if fetch_result.errors: + return ExtractionTaskResult( + document_id=params.document_id, + errors=fetch_result.errors, + ) + + try: + pipeline_cfg = PipelineConfig.model_validate( + { + "pipe": [ + {"engine": "in:memory"}, + *(derive_extract_pipeline(config.processor.pipe)), + {"engine": "out:memory"}, + ] + } + ) + print("PIPELINE CFG", pipeline_cfg) + + pipeline = Pipeline(pipeline_cfg) + input_buffer = io.BytesIO(get_document_sync(fetch_result.file_storage_id)) + output_buffer = io.BytesIO() + pipeline.run({"in": {"buffer": input_buffer}, "out": {"buffer": output_buffer}}) + + extracted_report = parse_extracted_report(output_buffer.getvalue()) + return ExtractionTaskResult( + document_id=params.document_id, + extracted_report=extracted_report, + ) + except Exception as e: + if self.request.retries >= self.max_retries or isinstance( + e, FilteredContentError + ): + logger.error( + f"Extraction failed for {params.document_id} " + f"after {self.max_retries} retries. Error: {e}" + ) + return ExtractionTaskResult( + document_id=params.document_id, + errors=[ + *fetch_result.errors, + ProcessingError.from_exception("extract", e), + ], + ) + + logger.warning( + f"Extraction failed for {params.document_id}. This task will be retried." + ) + logger.error("The exception that caused the failure was:") + logger.exception(e) + raise self.retry() from e diff --git a/app/server/tasks/extract_callback.py b/app/server/tasks/extract_callback.py new file mode 100644 index 0000000..a5194db --- /dev/null +++ b/app/server/tasks/extract_callback.py @@ -0,0 +1,123 @@ +import json +import logging + +import requests +from celery.canvas import Signature +from pydantic import BaseModel + +from app.func import allf + +from ..case_helper import save_retry_state_sync +from ..config import config +from ..generated.models import ( + ExtractionResultCompleted, + ExtractionResultError, + ExtractionResultSuccess, +) +from .extract import ExtractionTaskResult +from .metrics import ( + celery_counters, + record_task_failure, + record_task_retry, + record_task_start, + record_task_success, +) +from .queue import ProcessingError, queue +from .serializer import register_type + +logger = logging.getLogger(__name__) + + +class ExtractionCallbackTask(BaseModel): + callback_url: str | None = None + + def s(self) -> Signature: + return extraction_callback.s(self) + + +class ExtractionCallbackTaskResult(BaseModel): + status_code: int + response: str | None = None + extracted: ExtractionTaskResult + + +register_type(ExtractionCallbackTask) +register_type(ExtractionCallbackTaskResult) + + +_callback_timeout = config.queue.task.callback_timeout_seconds + + +@queue.task( + task_track_started=True, + task_time_limit=_callback_timeout + 10, + task_soft_time_limit=_callback_timeout, + max_retries=5, + retry_backoff=True, + autoretry_for=(Exception,), + default_retry_delay=30, + on_retry=allf(save_retry_state_sync, record_task_retry), + on_failure=record_task_failure, + on_success=record_task_success, + before_start=record_task_start, +) +def extraction_callback( + extract_result: ExtractionTaskResult, params: ExtractionCallbackTask +) -> ExtractionCallbackTaskResult: + """Post extraction callback if requested.""" + if params.callback_url: + body = build_callback_body(extract_result) + response = requests.post( + params.callback_url, + json=body.model_dump(mode="json"), + ) + try: + response.raise_for_status() + celery_counters.record_callback(True) + except Exception: + celery_counters.record_callback(False) + raise + + return ExtractionCallbackTaskResult( + status_code=response.status_code, + response=response.text, + extracted=extract_result, + ) + + return ExtractionCallbackTaskResult( + status_code=0, + response="[nothing to do]", + extracted=extract_result, + ) + + +def build_callback_body(result: ExtractionTaskResult) -> ExtractionResultCompleted: + """Build callback body for extraction status.""" + if result.errors or not result.extracted_report: + return ExtractionResultCompleted( + ExtractionResultError( + error=format_errors(result.errors), + status="ERROR", + ) + ) + + return ExtractionResultCompleted( + ExtractionResultSuccess( + extractedReport=result.extracted_report, + status="COMPLETE", + ) + ) + + +def format_errors(errors: list[ProcessingError]) -> str: + if not errors: + return json.dumps( + [ + { + "message": "Unknown error", + "task": "unknown", + "exception": "UnknownException", + } + ] + ) + return json.dumps([err.model_dump() for err in errors]) diff --git a/app/server/tasks/fetch.py b/app/server/tasks/fetch.py index 649cc9f..f470ff2 100644 --- a/app/server/tasks/fetch.py +++ b/app/server/tasks/fetch.py @@ -15,6 +15,9 @@ DocumentLink, DocumentText, InputDocument, + UnidentifiedDocumentContent, + UnidentifiedDocumentLink, + UnidentifiedInputDocument, ) from .metrics import ( record_task_failure, @@ -35,6 +38,14 @@ def s(self) -> Signature: return fetch.s(self) +class UnidentifiedFetchTask(BaseModel): + document: UnidentifiedInputDocument + document_id: str + + def s(self) -> Signature: + return fetch_unidentified.s(self) + + class FetchTaskResult(BaseModel): document_id: str file_storage_id: str | None = None @@ -42,6 +53,7 @@ class FetchTaskResult(BaseModel): register_type(FetchTask) +register_type(UnidentifiedFetchTask) register_type(FetchTaskResult) @@ -67,42 +79,81 @@ def fetch(self, params: FetchTask) -> FetchTaskResult: Returns: FetchTaskResult: The task result. """ - try: - content = b"" - match params.document.root.attachmentType: - case "LINK": - response = requests.get( - str(cast(DocumentLink, params.document.root).url), - timeout=config.queue.task.link_download_timeout_seconds, - ) - response.raise_for_status() - content = response.content - case "TEXT": - content = cast(DocumentText, params.document.root).content.encode( - "utf-8" - ) - case "BASE64": - content = base64.b64decode( - cast(DocumentContent, params.document.root).content - ) - case _: - raise ValueError( - "Unsupported attachment type: " - f"{params.document.root.attachmentType}" - ) + return _fetch_and_save(self, params.document.root.documentId, params.document) + + +@queue.task( + bind=True, + task_track_started=True, + task_time_limit=config.queue.task.link_download_timeout_seconds + 30, + task_soft_time_limit=config.queue.task.link_download_timeout_seconds, + max_retries=3, + retry_backoff=True, + default_retry_delay=30, + on_retry=allf(save_retry_state_sync, record_task_retry), + on_failure=record_task_failure, + on_success=record_task_success, + before_start=record_task_start, +) +def fetch_unidentified(self, params: UnidentifiedFetchTask) -> FetchTaskResult: + """Fetch the content of an unidentified input document.""" + return _fetch_and_save(self, params.document_id, params.document) + + +def _fetch_and_save( + task, + document_id: str, + document: InputDocument | UnidentifiedInputDocument, +) -> FetchTaskResult: + """Fetch document bytes, persist them, and build a task result. + Shared implementation for the identified and unidentified fetch tasks. + """ + try: + content = fetch_document_content(document) return FetchTaskResult( - document_id=params.document.root.documentId, + document_id=document_id, file_storage_id=save_document_sync(content), ) except Exception as e: - if self.request.retries < self.max_retries: + if task.request.retries < task.max_retries: logger.warning(f"Fetch task failed: {e}, will be retried.") - return self.retry(exc=e) - else: - logger.error(f"Fetch task failed for {params.document.root.documentId}") - logger.exception(e) - return FetchTaskResult( - document_id=params.document.root.documentId, - errors=[ProcessingError.from_exception("fetch", e)], + return task.retry(exc=e) + logger.error(f"Fetch task failed for {document_id}") + logger.exception(e) + return FetchTaskResult( + document_id=document_id, + errors=[ProcessingError.from_exception("fetch", e)], + ) + + +def fetch_document_content( + document: InputDocument | UnidentifiedInputDocument, +) -> bytes: + """Fetch bytes from a supported input document type.""" + match document.root.attachmentType: + case "LINK": + if isinstance(document, InputDocument): + url = cast(DocumentLink, document.root).url + else: + url = cast(UnidentifiedDocumentLink, document.root).url + response = requests.get( + str(url), + timeout=config.queue.task.link_download_timeout_seconds, + ) + response.raise_for_status() + return response.content + case "TEXT": + if not isinstance(document, InputDocument): + raise ValueError("TEXT attachment is not supported for anonymous docs.") + return cast(DocumentText, document.root).content.encode("utf-8") + case "BASE64": + if isinstance(document, InputDocument): + content = cast(DocumentContent, document.root).content + else: + content = cast(UnidentifiedDocumentContent, document.root).content + return base64.b64decode(content) + case _: + raise ValueError( + f"Unsupported attachment type: {document.root.attachmentType}" ) diff --git a/pyproject.toml b/pyproject.toml index 944fab5..d60e34c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,10 +40,10 @@ dependencies = [ "kombu @ git+https://github.com/jnu/kombu.git@bb38b12eec4b79dc95caf75bf4ccdd12a78216c1", "celery==5.4", "tiktoken>=0.12.0", - "bc2 @ git+https://github.com/comppolicylab/bc2.git@ff6f2b4", + "bc2", ] name = "blind-charging-api" -version = "0.12.9" +version = "0.13.0-beta.2" description = "Race-blind Charging API" readme = "README.md" @@ -73,3 +73,6 @@ exclude = "^(alembic)" [[tool.mypy.overrides]] module = "app.server.generated.models" ignore_errors = true + +[tool.uv.sources] +bc2 = { git = "https://github.com/comppolicylab/bc2.git" } diff --git a/terraform/app_config.tf b/terraform/app_config.tf index 4fe0117..7bd3d46 100644 --- a/terraform/app_config.tf +++ b/terraform/app_config.tf @@ -41,11 +41,14 @@ EOF # This is the full pipeline on the production version. app_pipeline_prod_toml = < Cited[str]: + return Cited[str](ids=ids or [], content=content) + + +def _subject(type_: str, name: str = "", ids: list[int] | None = None) -> Subject: + ids = ids or [] + blank = _cited_str("", ids) + return Subject( + type=_cited_str(type_, ids), + name=_cited_str(name, ids) if name else blank, + address=blank, + phone=blank, + race=blank, + sex=blank, + dob=blank, + ) + + +def _chunk( + content: str, + regions: list[tuple[int, list[tuple[float, float]]]], + offset: int = 0, + length: int = 10, +) -> SourceChunk: + return SourceChunk( + spans=[SourceChunkSpan(offset=offset, length=length)], + regions=[ + SourceChunkBoundingRegion(page=page, points=points) + for page, points in regions + ], + content=content, + ) + + +def _square(origin_x: float = 0.0, origin_y: float = 0.0, size: float = 1.0): + return [ + (origin_x, origin_y), + (origin_x + size, origin_y), + (origin_x + size, origin_y + size), + (origin_x, origin_y + size), + ] + + +def _build_parse_result( + *, + chunks: list[SourceChunk] | None = None, + subjects: list[Subject] | None = None, + narratives: list[Cited[str]] | None = None, + offenses: list[Offense] | None = None, + reporting_agency: Cited[str] | None = None, + case_number: Cited[str] | None = None, + location: Cited[str] | None = None, + incident_type: Cited[str] | None = None, +) -> PoliceReportParseResult: + """Convenience builder for PoliceReportParseResult test fixtures.""" + return PoliceReportParseResult( + report=PoliceReport( + reporting_agency=reporting_agency or _cited_str(""), + case_number=case_number or _cited_str(""), + location=location or _cited_str(""), + incident_type=incident_type or _cited_str(""), + subjects=subjects or [], + narratives=narratives or [], + offenses=offenses or [], + ), + chunks=chunks or [], + ) + + +# --------------------------------------------------------------------------- +# _polygon_to_bbox +# --------------------------------------------------------------------------- + + +class TestPolygonToBbox: + def test_square(self): + bbox = _polygon_to_bbox(_square()) + assert bbox == BoundingBox(x0=0.0, y0=0.0, x1=1.0, y1=1.0) + + def test_arbitrary_polygon_takes_min_max(self): + points = [(0.2, 0.9), (0.5, 0.1), (1.3, 0.4), (0.7, 1.2)] + bbox = _polygon_to_bbox(points) + assert bbox == BoundingBox(x0=0.2, y0=0.1, x1=1.3, y1=1.2) + + def test_empty_polygon_returns_none(self): + assert _polygon_to_bbox([]) is None + + def test_single_point(self): + assert _polygon_to_bbox([(0.5, 0.5)]) == BoundingBox( + x0=0.5, y0=0.5, x1=0.5, y1=0.5 + ) + + +# --------------------------------------------------------------------------- +# _build_references +# --------------------------------------------------------------------------- + + +class TestBuildReferences: + def test_flattens_multi_region_chunks(self): + chunks = [ + _chunk("a", [(1, _square())]), + _chunk( + "b", + [(1, _square(origin_y=2.0)), (2, _square(origin_x=3.0, size=2.0))], + ), + _chunk("c", [(3, _square(size=0.5))]), + ] + references, chunk_map = _build_references(chunks) + + assert references == [ + DocumentRegion(page=1, bbox=BoundingBox(x0=0.0, y0=0.0, x1=1.0, y1=1.0)), + DocumentRegion(page=1, bbox=BoundingBox(x0=0.0, y0=2.0, x1=1.0, y1=3.0)), + DocumentRegion(page=2, bbox=BoundingBox(x0=3.0, y0=0.0, x1=5.0, y1=2.0)), + DocumentRegion(page=3, bbox=BoundingBox(x0=0.0, y0=0.0, x1=0.5, y1=0.5)), + ] + assert chunk_map == {0: [0], 1: [1, 2], 2: [3]} + + def test_chunk_with_no_regions_maps_to_empty_list(self): + chunks = [_chunk("lonely", [])] + references, chunk_map = _build_references(chunks) + assert references == [] + assert chunk_map == {0: []} + + def test_skips_empty_polygon_regions(self): + chunks = [_chunk("a", [(1, []), (1, _square())])] + references, chunk_map = _build_references(chunks) + # Only the non-empty polygon produces a reference, and the chunk map + # only references the surviving entry. + assert len(references) == 1 + assert chunk_map == {0: [0]} + + def test_empty_chunks(self): + references, chunk_map = _build_references([]) + assert references == [] + assert chunk_map == {} + + +# --------------------------------------------------------------------------- +# _remap_ids +# --------------------------------------------------------------------------- + + +class TestRemapIds: + def test_expands_chunk_ids_into_reference_ids(self): + chunk_map = {0: [0], 1: [1, 2], 2: [3]} + assert _remap_ids([0, 1], chunk_map) == [0, 1, 2] + + def test_dedupes_while_preserving_order(self): + chunk_map = {0: [0, 1], 1: [1, 2], 2: [0]} + assert _remap_ids([0, 1, 2], chunk_map) == [0, 1, 2] + + def test_unknown_chunk_ids_are_silently_skipped(self): + chunk_map = {0: [0]} + assert _remap_ids([99, 0, 42], chunk_map) == [0] + + def test_empty_input(self): + assert _remap_ids([], {0: [0]}) == [] + + +# --------------------------------------------------------------------------- +# _convert_cited_string +# --------------------------------------------------------------------------- + + +class TestConvertCitedString: + def test_basic_conversion_remaps_ids(self): + cited = _cited_str("hello", ids=[0, 1]) + chunk_map = {0: [0], 1: [1]} + result = _convert_cited_string(cited, chunk_map) + assert result == CitedString(referenceIds=[0, 1], content="hello") + + def test_none_in_none_out(self): + assert _convert_cited_string(None, {}) is None + + def test_empty_content_is_none(self): + assert _convert_cited_string(_cited_str("", [0]), {0: [0]}) is None + + def test_whitespace_only_content_is_none(self): + assert _convert_cited_string(_cited_str(" \n\t", [0]), {0: [0]}) is None + + def test_strips_surrounding_whitespace(self): + result = _convert_cited_string(_cited_str(" hi ", [0]), {0: [0]}) + assert result is not None + assert result.content == "hi" + + +# --------------------------------------------------------------------------- +# _classify_subjects +# --------------------------------------------------------------------------- + + +class TestClassifySubjects: + @pytest.mark.parametrize( + "type_label", + [ + "Defendant", + "SUSPECT", + "accused", + "arrestee", + "respondent", + "perpetrator", + "offender", + "Primary Defendant", + "suspect/arrestee", + ], + ) + def test_defendant_keywords(self, type_label): + defendants, officers, others = _classify_subjects([_subject(type_label, "X")]) + assert len(defendants) == 1 + assert not officers and not others + + @pytest.mark.parametrize( + "type_label", + [ + "Officer", + "Reporting Officer", + "Deputy Sheriff", + "Detective", + "Investigator", + "POLICE", + "state trooper", + ], + ) + def test_officer_keywords(self, type_label): + defendants, officers, others = _classify_subjects([_subject(type_label, "X")]) + assert len(officers) == 1 + assert not defendants and not others + + @pytest.mark.parametrize( + "type_label", + ["Witness", "Victim", "Complainant", "Bystander", "", "Reporting Party"], + ) + def test_other_keywords(self, type_label): + defendants, officers, others = _classify_subjects([_subject(type_label, "X")]) + assert len(others) == 1 + assert not defendants and not officers + + def test_defendant_wins_over_officer_if_both_keywords_present(self): + # Defendant keyword matching runs before officer matching. + defendants, officers, _ = _classify_subjects( + [_subject("Defendant Officer", "X")] + ) + assert len(defendants) == 1 and not officers + + +# --------------------------------------------------------------------------- +# convert_parse_result +# --------------------------------------------------------------------------- + + +class TestConvertParseResult: + def test_end_to_end_report(self): + # Three chunks, the second with two regions so we exercise id remap. + chunks = [ + _chunk("case", [(1, _square())]), + _chunk( + "defendant box", + [(1, _square(origin_y=2.0)), (2, _square(origin_x=3.0))], + ), + _chunk("narrative", [(2, _square(origin_y=5.0))]), + ] + parse_result = _build_parse_result( + chunks=chunks, + reporting_agency=_cited_str("SFPD", [0]), + case_number=_cited_str("12345", [0]), + location=_cited_str("100 Main St", [0]), # dropped by converter + incident_type=_cited_str("Assault", [0]), # dropped by converter + subjects=[ + _subject("Defendant", "John Doe", ids=[1]), + _subject("Reporting Officer", "Officer Smith", ids=[1]), + _subject("Witness", "Jane Roe", ids=[1]), + ], + narratives=[_cited_str("Fled on foot.", ids=[2])], + offenses=[ + Offense( + crime=_cited_str("Assault", [0]), + statute=_cited_str("PC 240", [0]), + code=None, + ) + ], + ) + + result = convert_parse_result(parse_result) + + # Flattened references array. + assert len(result.references) == 4 + assert result.references[0].page == 1 + assert result.references[3].page == 2 + + # Incident metadata: agency + case, but no location / incident_type. + assert result.incident is not None + assert result.incident.agencyName == CitedString( + referenceIds=[0], content="SFPD" + ) + assert result.incident.incidentNumber == CitedString( + referenceIds=[0], content="12345" + ) + assert result.incident.incidentDate is None + + # Defendants have all offenses as charges. + assert len(result.defendants) == 1 + d = result.defendants[0] + assert d.name == CitedString(referenceIds=[1, 2], content="John Doe") + assert len(d.charges) == 1 + charge = d.charges[0] + assert charge.description == CitedString(referenceIds=[0], content="Assault") + assert charge.statute == CitedString(referenceIds=[0], content="PC 240") + assert charge.class_ is None + + # Officer/other classification. + assert result.referringOfficers is not None + assert len(result.referringOfficers) == 1 + assert result.referringOfficers[0].name == CitedString( + referenceIds=[1, 2], content="Officer Smith" + ) + assert result.otherPeople is not None + assert len(result.otherPeople) == 1 + assert result.otherPeople[0].name == CitedString( + referenceIds=[1, 2], content="Jane Roe" + ) + assert result.otherPeople[0].status == CitedString( + referenceIds=[1, 2], content="Witness" + ) + + # Narrative ids remap into the flattened references array. + assert result.narratives == [ + CitedString(referenceIds=[3], content="Fled on foot.") + ] + + def test_no_offenses_yields_placeholder_empty_charge(self): + parse_result = _build_parse_result( + chunks=[_chunk("x", [(1, _square())])], + subjects=[_subject("Defendant", "D", ids=[0])], + ) + result = convert_parse_result(parse_result) + assert len(result.defendants) == 1 + charges = result.defendants[0].charges + assert len(charges) == 1 + assert charges[0].statute is None + assert charges[0].description is None + assert charges[0].severity is None + assert charges[0].class_ is None + + def test_no_defendant_subjects_synthesizes_placeholder(self): + parse_result = _build_parse_result( + chunks=[_chunk("x", [(1, _square())])], + subjects=[_subject("Witness", "W", ids=[0])], + ) + result = convert_parse_result(parse_result) + # Placeholder defendant with no fields populated. + assert len(result.defendants) == 1 + assert result.defendants[0].name is None + assert result.defendants[0].charges # must be non-empty + # The witness still shows up in otherPeople. + assert result.otherPeople is not None and len(result.otherPeople) == 1 + + def test_empty_optional_lists_are_normalized_to_none(self): + parse_result = _build_parse_result( + chunks=[_chunk("x", [(1, _square())])], + subjects=[_subject("Defendant", "D", ids=[0])], + narratives=[], + ) + result = convert_parse_result(parse_result) + assert result.referringOfficers is None + assert result.narratives is None + assert result.otherPeople is None + + def test_offense_code_maps_to_class_field(self): + parse_result = _build_parse_result( + chunks=[_chunk("x", [(1, _square())])], + subjects=[_subject("Defendant", "D", ids=[0])], + offenses=[ + Offense( + crime=_cited_str("Theft", [0]), + statute=None, + code=_cited_str("459", [0]), + ) + ], + ) + result = convert_parse_result(parse_result) + charge = result.defendants[0].charges[0] + assert charge.class_ == CitedString(referenceIds=[0], content="459") + assert charge.statute is None + assert charge.description == CitedString(referenceIds=[0], content="Theft") + + def test_empty_content_fields_become_none(self): + parse_result = _build_parse_result( + chunks=[_chunk("x", [(1, _square())])], + subjects=[ + Subject( + type=_cited_str("Defendant", [0]), + name=_cited_str("D", [0]), + address=_cited_str("", [0]), + phone=_cited_str(" ", [0]), + race=_cited_str("", [0]), + sex=_cited_str("", [0]), + dob=_cited_str("", [0]), + ) + ], + ) + result = convert_parse_result(parse_result) + d = result.defendants[0] + assert d.name == CitedString(referenceIds=[0], content="D") + assert d.address is None + assert d.phoneNumber is None + assert d.race is None + assert d.gender is None + + def test_incident_is_present_but_fields_may_be_none(self): + parse_result = _build_parse_result( + subjects=[_subject("Defendant", "D")], + ) + result = convert_parse_result(parse_result) + assert result.incident is not None + assert result.incident.agencyName is None + assert result.incident.incidentNumber is None + assert result.incident.incidentDate is None + + +# --------------------------------------------------------------------------- +# parse_extracted_report +# --------------------------------------------------------------------------- + + +class TestParseExtractedReport: + @pytest.fixture + def parse_result_payload(self) -> dict: + return { + "chunks": [ + { + "spans": [{"offset": 0, "length": 10}], + "regions": [ + { + "page": 1, + "points": [ + [0.0, 0.0], + [1.0, 0.0], + [1.0, 1.0], + [0.0, 1.0], + ], + } + ], + "content": "case", + } + ], + "report": { + "reporting_agency": {"ids": [0], "content": "SFPD"}, + "case_number": {"ids": [0], "content": "12345"}, + "location": {"ids": [], "content": ""}, + "incident_type": {"ids": [], "content": ""}, + "subjects": [ + { + "seq": None, + "type": {"ids": [0], "content": "Defendant"}, + "name": {"ids": [0], "content": "John Doe"}, + "address": {"ids": [], "content": ""}, + "phone": {"ids": [], "content": ""}, + "race": {"ids": [], "content": ""}, + "sex": {"ids": [], "content": ""}, + "dob": {"ids": [], "content": ""}, + } + ], + "narratives": [], + "offenses": [ + { + "crime": {"ids": [0], "content": "Assault"}, + "statute": None, + "code": None, + } + ], + }, + } + + def test_parses_police_report_parse_result(self, parse_result_payload): + result = parse_extracted_report(json.dumps(parse_result_payload).encode()) + assert isinstance(result, ExtractedReport) + assert len(result.references) == 1 + assert result.defendants[0].name == CitedString( + referenceIds=[0], content="John Doe" + ) + + def test_unwraps_extracted_report_envelope(self, parse_result_payload): + envelope = {"extractedReport": parse_result_payload} + result = parse_extracted_report(json.dumps(envelope).encode()) + assert isinstance(result, ExtractedReport) + assert result.defendants[0].name == CitedString( + referenceIds=[0], content="John Doe" + ) + + def test_accepts_legacy_extracted_report_payload(self): + # Already-translated payload (no ``report``/``chunks`` keys) should + # be accepted verbatim. + legacy = { + "references": [ + {"page": 1, "bbox": {"x0": 0.0, "y0": 0.0, "x1": 1.0, "y1": 1.0}} + ], + "defendants": [ + { + "charges": [{}], + "name": {"referenceIds": [0], "content": "Jane"}, + } + ], + } + result = parse_extracted_report(json.dumps(legacy).encode()) + assert len(result.references) == 1 + assert result.defendants[0].name == CitedString( + referenceIds=[0], content="Jane" + ) + + def test_rejects_completely_unrecognized_payload(self): + from pydantic import ValidationError + + with pytest.raises(ValidationError): + parse_extracted_report(b'{"foo": "bar"}') + + def test_rejects_invalid_json(self): + with pytest.raises(json.JSONDecodeError): + parse_extracted_report(b"not json") diff --git a/uv.lock b/uv.lock index b624444..66ddd81 100644 --- a/uv.lock +++ b/uv.lock @@ -274,27 +274,17 @@ wheels = [ ] [[package]] -name = "azure-ai-formrecognizer" -version = "3.3.3" +name = "azure-ai-documentintelligence" +version = "1.0.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "azure-common" }, { name = "azure-core" }, - { name = "msrest" }, + { name = "isodate" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1c/03/ab76ece556f13e84481d74d79dc74ad8f8e84bd030468f01ae81adebfb52/azure-ai-formrecognizer-3.3.3.tar.gz", hash = "sha256:9fc09788bbb65866630fa870cca1933bfd7298b8055236530bcc0e40d81fcccf", size = 397879, upload-time = "2024-04-09T23:23:33.458Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/c0/88b760e94bb330a1b31af204378563524c72d48f1c62c338fe1d18fdc894/azure_ai_formrecognizer-3.3.3-py3-none-any.whl", hash = "sha256:81fc1abda8bd898426ee3bbc1b9c6bd164514201ce282129a31d4664f9d1f3bc", size = 301373, upload-time = "2024-04-09T23:23:36.545Z" }, -] - -[[package]] -name = "azure-common" -version = "1.1.28" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3e/71/f6f71a276e2e69264a97ad39ef850dca0a04fce67b12570730cb38d0ccac/azure-common-1.1.28.zip", hash = "sha256:4ac0cd3214e36b6a1b6a442686722a5d8cc449603aa833f3f0f40bda836704a3", size = 20914, upload-time = "2022-02-03T19:39:44.373Z" } +sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940, upload-time = "2025-03-27T02:46:20.606Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/62/55/7f118b9c1b23ec15ca05d15a578d8207aa1706bc6f7c87218efffbbf875d/azure_common-1.1.28-py2.py3-none-any.whl", hash = "sha256:5c12d3dcf4ec20599ca6b0d3e09e86e146353d443e7fcc050c9a19c1f9df20ad", size = 14462, upload-time = "2022-02-03T19:39:42.417Z" }, + { url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005, upload-time = "2025-03-27T02:46:22.356Z" }, ] [[package]] @@ -397,10 +387,10 @@ wheels = [ [[package]] name = "bc2" -version = "0.7.10" -source = { git = "https://github.com/comppolicylab/bc2.git?rev=ff6f2b4#ff6f2b4e2deba5d4d637ca9175dccb344e2561b8" } +version = "0.8.0b1" +source = { git = "https://github.com/comppolicylab/bc2.git#80c8c8c58bb9185d52e6c9e162e8ef3f2bac41e5" } dependencies = [ - { name = "azure-ai-formrecognizer" }, + { name = "azure-ai-documentintelligence" }, { name = "azure-identity" }, { name = "azure-storage-blob" }, { name = "click" }, @@ -431,7 +421,7 @@ wheels = [ [[package]] name = "blind-charging-api" -version = "0.12.9" +version = "0.13.0b2" source = { virtual = "." } dependencies = [ { name = "aiohttp" }, @@ -495,7 +485,7 @@ requires-dist = [ { name = "argon2-cffi", specifier = ">=23.1.0,<24.0.0" }, { name = "azure-monitor-opentelemetry", specifier = ">=1.8.7,<2.0.0" }, { name = "azure-storage-blob", specifier = ">=12.20.0,<13.0.0" }, - { name = "bc2", git = "https://github.com/comppolicylab/bc2.git?rev=ff6f2b4" }, + { name = "bc2", git = "https://github.com/comppolicylab/bc2.git" }, { name = "celery", specifier = "==5.4" }, { name = "certifi", specifier = ">=2024.12.14,<2025.0.0" }, { name = "fakeredis", specifier = "==2.24.1" }, @@ -1646,7 +1636,7 @@ wheels = [ [[package]] name = "openai" -version = "2.20.0" +version = "2.32.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1658,9 +1648,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6e/5a/f495777c02625bfa18212b6e3b73f1893094f2bf660976eb4bc6f43a1ca2/openai-2.20.0.tar.gz", hash = "sha256:2654a689208cd0bf1098bb9462e8d722af5cbe961e6bba54e6f19fb843d88db1", size = 642355, upload-time = "2026-02-10T19:02:54.145Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/59/bdcc6b759b8c42dd73afaf5bf8f902c04b37987a5514dbc1c64dba390fef/openai-2.32.0.tar.gz", hash = "sha256:c54b27a9e4cb8d51f0dd94972ffd1a04437efeb259a9e60d8922b8bd26fe55e0", size = 693286, upload-time = "2026-04-15T22:28:19.434Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/a0/cf4297aa51bbc21e83ef0ac018947fa06aea8f2364aad7c96cbf148590e6/openai-2.20.0-py3-none-any.whl", hash = "sha256:38d989c4b1075cd1f76abc68364059d822327cf1a932531d429795f4fc18be99", size = 1098479, upload-time = "2026-02-10T19:02:52.157Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c1/d6e64ccd0536bf616556f0cad2b6d94a8125f508d25cfd814b1d2db4e2f1/openai-2.32.0-py3-none-any.whl", hash = "sha256:4dcc9badeb4bf54ad0d187453742f290226d30150890b7890711bda4f32f192f", size = 1162570, upload-time = "2026-04-15T22:28:17.714Z" }, ] [[package]] @@ -2266,15 +2256,16 @@ wheels = [ [[package]] name = "pydantic-settings" -version = "2.5.2" +version = "2.13.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, + { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/68/27/0bed9dd26b93328b60a1402febc780e7be72b42847fa8b5c94b7d0aeb6d1/pydantic_settings-2.5.2.tar.gz", hash = "sha256:f90b139682bee4d2065273d5185d71d37ea46cfe57e1b5ae184fc6a0b2484ca0", size = 70938, upload-time = "2024-09-11T09:08:08.489Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/8d/29e82e333f32d9e2051c10764b906c2a6cd140992910b5f49762790911ba/pydantic_settings-2.5.2-py3-none-any.whl", hash = "sha256:2c912e55fd5794a59bf8c832b9de832dcfdf4778d79ff79b708744eed499a907", size = 26864, upload-time = "2024-09-11T09:08:07.242Z" }, + { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, ] [[package]] @@ -2316,18 +2307,18 @@ wheels = [ [[package]] name = "pymupdf" -version = "1.27.2" +version = "1.27.2.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a4/fb/d80374ab091ab7ad5a5e7981a45c877ae094db668c1ab4d30f1109a4ec6a/pymupdf-1.27.2.tar.gz", hash = "sha256:37fc9cedeafb40839f86a074d4d9feab725144bdd4bbfd20308ff8957e2b10af", size = 85353104, upload-time = "2026-03-10T12:53:01.697Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/32/f6b645c51d79a188a4844140c5dabca7b487ad56c4be69c4bc782d0d11a9/pymupdf-1.27.2.2.tar.gz", hash = "sha256:ea8fdc3ab6671ca98f629d5ec3032d662c8cf1796b146996b7ad306ac7ed3335", size = 85354380, upload-time = "2026-03-20T09:47:58.386Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/98/ee/2c10b6bde83ee42f5150b690ace952a802a7e632776dadd42bbfe5b68601/pymupdf-1.27.2-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:a60ff9010d7025428e31d92ac2c9b4218c7c4844409d0b31a050565ea0a955fd", size = 23987468, upload-time = "2026-03-10T12:37:06.593Z" }, - { url = "https://files.pythonhosted.org/packages/44/06/c8cc8c8ade83f5a75ac0f543edc2bc3c52d8c38c1d55d1e0713558258540/pymupdf-1.27.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:5095efb242cfe1c46fec1c864a13f000098564829c98366582dde7ad9e61aa32", size = 23262964, upload-time = "2026-03-10T12:37:23.915Z" }, - { url = "https://files.pythonhosted.org/packages/1a/8e/df2ab91a680a77c82bc4501cdca60767b3758d75552e4d2849647a16cbc0/pymupdf-1.27.2-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1081235fcfad268d801cd73a7b69c629939e2c46ed4d97035cb1bb7b5b90dc54", size = 24318675, upload-time = "2026-03-10T12:37:42.249Z" }, - { url = "https://files.pythonhosted.org/packages/ab/56/c6c16fa2dcfe2476ec28a9aaaca773dc35c593699e81e573211c91442770/pymupdf-1.27.2-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:917f4dd52daea504d5c60e1430c17d637b5014a43e66d068b4b356effe087dba", size = 24947974, upload-time = "2026-03-10T12:38:00.779Z" }, - { url = "https://files.pythonhosted.org/packages/7b/4f/1659f1d80b5d2f5aad134c2ca63894c63daf47a3ffb7e18987fe25e49097/pymupdf-1.27.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9617d5e71c334937c804544fa201946c5f73d0a97b5842b96857bdabfefbc343", size = 25169417, upload-time = "2026-03-10T12:38:18.912Z" }, - { url = "https://files.pythonhosted.org/packages/05/23/e34d704f7242885dd1d67cfbe1040051a04b4b7e2cf1cbd27af9bd4500a3/pymupdf-1.27.2-cp310-abi3-win32.whl", hash = "sha256:6deef49e06c9a5d8670bf5835a911ab887dac4b3ed4bd60ab7d93da6aa8ff6f1", size = 18008725, upload-time = "2026-03-10T12:38:31.915Z" }, - { url = "https://files.pythonhosted.org/packages/f5/fb/a3f1f8813f6e93c65d1f7ebca6530a889f1ae109229b537f7a617b2aab57/pymupdf-1.27.2-cp310-abi3-win_amd64.whl", hash = "sha256:acdfdb7329882246545a0f6bc85f91739e2773ed81f9301c1687cffb826470f3", size = 19237944, upload-time = "2026-03-10T12:38:45.603Z" }, - { url = "https://files.pythonhosted.org/packages/e6/a4/e9257882f0569a21d51207a58f7586a799e76dc6b4008029a04f2329194c/pymupdf-1.27.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:261c916915cede4c546559810d3210277f86f31b52dd3de138f1e12d95a4c6b6", size = 24985149, upload-time = "2026-03-10T12:39:02.636Z" }, + { url = "https://files.pythonhosted.org/packages/90/88/d01992a50165e22dec057a1129826846c547feb4ba07f42720ac030ce438/pymupdf-1.27.2.2-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:800f43e60a6f01f644343c2213b8613db02eaf4f4ba235b417b3351fa99e01c0", size = 23987563, upload-time = "2026-03-19T12:35:42.989Z" }, + { url = "https://files.pythonhosted.org/packages/6d/0e/9f526bc1d49d8082eff0d1547a69d541a0c5a052e71da625559efaba46a6/pymupdf-1.27.2.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:8e2e4299ef1ac0c9dff9be096cbd22783699673abecfa7c3f73173ae06421d73", size = 23263089, upload-time = "2026-03-20T09:44:16.982Z" }, + { url = "https://files.pythonhosted.org/packages/42/be/984f0d6343935b5dd30afaed6be04fc753146bf55709e63ef28bf9ef7497/pymupdf-1.27.2.2-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5e3d54922db1c7da844f1208ac1db05704770988752311f81dd36694ae0a07b", size = 24318817, upload-time = "2026-03-20T09:44:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/22/8e/85e9d9f11dbf34036eb1df283805ef6b885f2005a56d6533bb58ab0b8a11/pymupdf-1.27.2.2-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:892698c9768457eb0991c102c96a856c0a7062539371df5e6bee0816f3ef498e", size = 24948135, upload-time = "2026-03-20T09:44:51.012Z" }, + { url = "https://files.pythonhosted.org/packages/db/e6/386edb017e5b93f1ab0bf6653ae32f3dd8dfc834ed770212e10ca62f4af9/pymupdf-1.27.2.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b4bbfa6ef347fade678771a93f6364971c51a2cdc44cd2400dc4eeed1ddb4e6", size = 25169585, upload-time = "2026-03-20T09:45:05.393Z" }, + { url = "https://files.pythonhosted.org/packages/ba/fd/f1ebe24fcd31aaea8b85b3a7ac4c3fc96e20388be5466ace27c9a3c546d9/pymupdf-1.27.2.2-cp310-abi3-win32.whl", hash = "sha256:0b8e924433b7e0bd46be820899300259235997d5a747638471fb2762baa8ee30", size = 18008861, upload-time = "2026-03-20T09:45:21.353Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b6/2a9a8556000199bbf80a5915dcd15d550d1e5288894316445c54726aaf53/pymupdf-1.27.2.2-cp310-abi3-win_amd64.whl", hash = "sha256:09bb53f9486ccb5297030cbc2dbdae845ba1c3c5126e96eb2d16c4f118de0b5b", size = 19238032, upload-time = "2026-03-20T09:45:37.941Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c6/e3e11c42f09b9c34ec332c0f37b817671b59ef4001895b854f0494092105/pymupdf-1.27.2.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6cebfbbdfd219ebdebf4d8e3914624b2e3d3a844c43f4f76935822dd9b13cc12", size = 24985299, upload-time = "2026-03-20T09:45:53.26Z" }, ] [[package]] @@ -2352,11 +2343,11 @@ wheels = [ [[package]] name = "pypdf" -version = "6.10.0" +version = "6.10.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b8/9f/ca96abf18683ca12602065e4ed2bec9050b672c87d317f1079abc7b6d993/pypdf-6.10.0.tar.gz", hash = "sha256:4c5a48ba258c37024ec2505f7e8fd858525f5502784a2e1c8d415604af29f6ef", size = 5314833, upload-time = "2026-04-10T09:34:57.102Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/3f/9f2167401c2e94833ca3b69535bad89e533b5de75fefe4197a2c224baec2/pypdf-6.10.2.tar.gz", hash = "sha256:7d09ce108eff6bf67465d461b6ef352dcb8d84f7a91befc02f904455c6eea11d", size = 5315679, upload-time = "2026-04-15T16:37:36.978Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/55/f2/7ebe366f633f30a6ad105f650f44f24f98cb1335c4157d21ae47138b3482/pypdf-6.10.0-py3-none-any.whl", hash = "sha256:90005e959e1596c6e6c84c8b0ad383285b3e17011751cedd17f2ce8fcdfc86de", size = 334459, upload-time = "2026-04-10T09:34:54.966Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d6/1d5c60cc17bbdf37c1552d9c03862fc6d32c5836732a0415b2d637edc2d0/pypdf-6.10.2-py3-none-any.whl", hash = "sha256:aa53be9826655b51c96741e5d7983ca224d898ac0a77896e64636810517624aa", size = 336308, upload-time = "2026-04-15T16:37:34.851Z" }, ] [[package]] @@ -2568,65 +2559,65 @@ wheels = [ [[package]] name = "rapidfuzz" -version = "3.14.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d3/28/9d808fe62375b9aab5ba92fa9b29371297b067c2790b2d7cda648b1e2f8d/rapidfuzz-3.14.3.tar.gz", hash = "sha256:2491937177868bc4b1e469087601d53f925e8d270ccc21e07404b4b5814b7b5f", size = 57863900, upload-time = "2025-11-01T11:54:52.321Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/8e/3c215e860b458cfbedb3ed73bc72e98eb7e0ed72f6b48099604a7a3260c2/rapidfuzz-3.14.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:685c93ea961d135893b5984a5a9851637d23767feabe414ec974f43babbd8226", size = 1945306, upload-time = "2025-11-01T11:53:06.452Z" }, - { url = "https://files.pythonhosted.org/packages/36/d9/31b33512015c899f4a6e6af64df8dfe8acddf4c8b40a4b3e0e6e1bcd00e5/rapidfuzz-3.14.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fa7c8f26f009f8c673fbfb443792f0cf8cf50c4e18121ff1e285b5e08a94fbdb", size = 1390788, upload-time = "2025-11-01T11:53:08.721Z" }, - { url = "https://files.pythonhosted.org/packages/a9/67/2ee6f8de6e2081ccd560a571d9c9063184fe467f484a17fa90311a7f4a2e/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57f878330c8d361b2ce76cebb8e3e1dc827293b6abf404e67d53260d27b5d941", size = 1374580, upload-time = "2025-11-01T11:53:10.164Z" }, - { url = "https://files.pythonhosted.org/packages/30/83/80d22997acd928eda7deadc19ccd15883904622396d6571e935993e0453a/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c5f545f454871e6af05753a0172849c82feaf0f521c5ca62ba09e1b382d6382", size = 3154947, upload-time = "2025-11-01T11:53:12.093Z" }, - { url = "https://files.pythonhosted.org/packages/5b/cf/9f49831085a16384695f9fb096b99662f589e30b89b4a589a1ebc1a19d34/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:07aa0b5d8863e3151e05026a28e0d924accf0a7a3b605da978f0359bb804df43", size = 1223872, upload-time = "2025-11-01T11:53:13.664Z" }, - { url = "https://files.pythonhosted.org/packages/c8/0f/41ee8034e744b871c2e071ef0d360686f5ccfe5659f4fd96c3ec406b3c8b/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73b07566bc7e010e7b5bd490fb04bb312e820970180df6b5655e9e6224c137db", size = 2392512, upload-time = "2025-11-01T11:53:15.109Z" }, - { url = "https://files.pythonhosted.org/packages/da/86/280038b6b0c2ccec54fb957c732ad6b41cc1fd03b288d76545b9cf98343f/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6de00eb84c71476af7d3110cf25d8fe7c792d7f5fa86764ef0b4ca97e78ca3ed", size = 2521398, upload-time = "2025-11-01T11:53:17.146Z" }, - { url = "https://files.pythonhosted.org/packages/fa/7b/05c26f939607dca0006505e3216248ae2de631e39ef94dd63dbbf0860021/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d7843a1abf0091773a530636fdd2a49a41bcae22f9910b86b4f903e76ddc82dc", size = 4259416, upload-time = "2025-11-01T11:53:19.34Z" }, - { url = "https://files.pythonhosted.org/packages/40/eb/9e3af4103d91788f81111af1b54a28de347cdbed8eaa6c91d5e98a889aab/rapidfuzz-3.14.3-cp312-cp312-win32.whl", hash = "sha256:dea97ac3ca18cd3ba8f3d04b5c1fe4aa60e58e8d9b7793d3bd595fdb04128d7a", size = 1709527, upload-time = "2025-11-01T11:53:20.949Z" }, - { url = "https://files.pythonhosted.org/packages/b8/63/d06ecce90e2cf1747e29aeab9f823d21e5877a4c51b79720b2d3be7848f8/rapidfuzz-3.14.3-cp312-cp312-win_amd64.whl", hash = "sha256:b5100fd6bcee4d27f28f4e0a1c6b5127bc8ba7c2a9959cad9eab0bf4a7ab3329", size = 1538989, upload-time = "2025-11-01T11:53:22.428Z" }, - { url = "https://files.pythonhosted.org/packages/fc/6d/beee32dcda64af8128aab3ace2ccb33d797ed58c434c6419eea015fec779/rapidfuzz-3.14.3-cp312-cp312-win_arm64.whl", hash = "sha256:4e49c9e992bc5fc873bd0fff7ef16a4405130ec42f2ce3d2b735ba5d3d4eb70f", size = 811161, upload-time = "2025-11-01T11:53:23.811Z" }, - { url = "https://files.pythonhosted.org/packages/e4/4f/0d94d09646853bd26978cb3a7541b6233c5760687777fa97da8de0d9a6ac/rapidfuzz-3.14.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dbcb726064b12f356bf10fffdb6db4b6dce5390b23627c08652b3f6e49aa56ae", size = 1939646, upload-time = "2025-11-01T11:53:25.292Z" }, - { url = "https://files.pythonhosted.org/packages/b6/eb/f96aefc00f3bbdbab9c0657363ea8437a207d7545ac1c3789673e05d80bd/rapidfuzz-3.14.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1704fc70d214294e554a2421b473779bcdeef715881c5e927dc0f11e1692a0ff", size = 1385512, upload-time = "2025-11-01T11:53:27.594Z" }, - { url = "https://files.pythonhosted.org/packages/26/34/71c4f7749c12ee223dba90017a5947e8f03731a7cc9f489b662a8e9e643d/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc65e72790ddfd310c2c8912b45106e3800fefe160b0c2ef4d6b6fec4e826457", size = 1373571, upload-time = "2025-11-01T11:53:29.096Z" }, - { url = "https://files.pythonhosted.org/packages/32/00/ec8597a64f2be301ce1ee3290d067f49f6a7afb226b67d5f15b56d772ba5/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e38c1305cffae8472572a0584d4ffc2f130865586a81038ca3965301f7c97c", size = 3156759, upload-time = "2025-11-01T11:53:30.777Z" }, - { url = "https://files.pythonhosted.org/packages/61/d5/b41eeb4930501cc899d5a9a7b5c9a33d85a670200d7e81658626dcc0ecc0/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:e195a77d06c03c98b3fc06b8a28576ba824392ce40de8c708f96ce04849a052e", size = 1222067, upload-time = "2025-11-01T11:53:32.334Z" }, - { url = "https://files.pythonhosted.org/packages/2a/7d/6d9abb4ffd1027c6ed837b425834f3bed8344472eb3a503ab55b3407c721/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1b7ef2f4b8583a744338a18f12c69693c194fb6777c0e9ada98cd4d9e8f09d10", size = 2394775, upload-time = "2025-11-01T11:53:34.24Z" }, - { url = "https://files.pythonhosted.org/packages/15/ce/4f3ab4c401c5a55364da1ffff8cc879fc97b4e5f4fa96033827da491a973/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a2135b138bcdcb4c3742d417f215ac2d8c2b87bde15b0feede231ae95f09ec41", size = 2526123, upload-time = "2025-11-01T11:53:35.779Z" }, - { url = "https://files.pythonhosted.org/packages/c1/4b/54f804975376a328f57293bd817c12c9036171d15cf7292032e3f5820b2d/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:33a325ed0e8e1aa20c3e75f8ab057a7b248fdea7843c2a19ade0008906c14af0", size = 4262874, upload-time = "2025-11-01T11:53:37.866Z" }, - { url = "https://files.pythonhosted.org/packages/e9/b6/958db27d8a29a50ee6edd45d33debd3ce732e7209183a72f57544cd5fe22/rapidfuzz-3.14.3-cp313-cp313-win32.whl", hash = "sha256:8383b6d0d92f6cd008f3c9216535be215a064b2cc890398a678b56e6d280cb63", size = 1707972, upload-time = "2025-11-01T11:53:39.442Z" }, - { url = "https://files.pythonhosted.org/packages/07/75/fde1f334b0cec15b5946d9f84d73250fbfcc73c236b4bc1b25129d90876b/rapidfuzz-3.14.3-cp313-cp313-win_amd64.whl", hash = "sha256:e6b5e3036976f0fde888687d91be86d81f9ac5f7b02e218913c38285b756be6c", size = 1537011, upload-time = "2025-11-01T11:53:40.92Z" }, - { url = "https://files.pythonhosted.org/packages/2e/d7/d83fe001ce599dc7ead57ba1debf923dc961b6bdce522b741e6b8c82f55c/rapidfuzz-3.14.3-cp313-cp313-win_arm64.whl", hash = "sha256:7ba009977601d8b0828bfac9a110b195b3e4e79b350dcfa48c11269a9f1918a0", size = 810744, upload-time = "2025-11-01T11:53:42.723Z" }, - { url = "https://files.pythonhosted.org/packages/92/13/a486369e63ff3c1a58444d16b15c5feb943edd0e6c28a1d7d67cb8946b8f/rapidfuzz-3.14.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0a28add871425c2fe94358c6300bbeb0bc2ed828ca003420ac6825408f5a424", size = 1967702, upload-time = "2025-11-01T11:53:44.554Z" }, - { url = "https://files.pythonhosted.org/packages/f1/82/efad25e260b7810f01d6b69122685e355bed78c94a12784bac4e0beb2afb/rapidfuzz-3.14.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:010e12e2411a4854b0434f920e72b717c43f8ec48d57e7affe5c42ecfa05dd0e", size = 1410702, upload-time = "2025-11-01T11:53:46.066Z" }, - { url = "https://files.pythonhosted.org/packages/ba/1a/34c977b860cde91082eae4a97ae503f43e0d84d4af301d857679b66f9869/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cfc3d57abd83c734d1714ec39c88a34dd69c85474918ebc21296f1e61eb5ca8", size = 1382337, upload-time = "2025-11-01T11:53:47.62Z" }, - { url = "https://files.pythonhosted.org/packages/88/74/f50ea0e24a5880a9159e8fd256b84d8f4634c2f6b4f98028bdd31891d907/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:89acb8cbb52904f763e5ac238083b9fc193bed8d1f03c80568b20e4cef43a519", size = 3165563, upload-time = "2025-11-01T11:53:49.216Z" }, - { url = "https://files.pythonhosted.org/packages/e8/7a/e744359404d7737049c26099423fc54bcbf303de5d870d07d2fb1410f567/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_31_armv7l.whl", hash = "sha256:7d9af908c2f371bfb9c985bd134e295038e3031e666e4b2ade1e7cb7f5af2f1a", size = 1214727, upload-time = "2025-11-01T11:53:50.883Z" }, - { url = "https://files.pythonhosted.org/packages/d3/2e/87adfe14ce75768ec6c2b8acd0e05e85e84be4be5e3d283cdae360afc4fe/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1f1925619627f8798f8c3a391d81071336942e5fe8467bc3c567f982e7ce2897", size = 2403349, upload-time = "2025-11-01T11:53:52.322Z" }, - { url = "https://files.pythonhosted.org/packages/70/17/6c0b2b2bff9c8b12e12624c07aa22e922b0c72a490f180fa9183d1ef2c75/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:152555187360978119e98ce3e8263d70dd0c40c7541193fc302e9b7125cf8f58", size = 2507596, upload-time = "2025-11-01T11:53:53.835Z" }, - { url = "https://files.pythonhosted.org/packages/c3/d1/87852a7cbe4da7b962174c749a47433881a63a817d04f3e385ea9babcd9e/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52619d25a09546b8db078981ca88939d72caa6b8701edd8b22e16482a38e799f", size = 4273595, upload-time = "2025-11-01T11:53:55.961Z" }, - { url = "https://files.pythonhosted.org/packages/c1/ab/1d0354b7d1771a28fa7fe089bc23acec2bdd3756efa2419f463e3ed80e16/rapidfuzz-3.14.3-cp313-cp313t-win32.whl", hash = "sha256:489ce98a895c98cad284f0a47960c3e264c724cb4cfd47a1430fa091c0c25204", size = 1757773, upload-time = "2025-11-01T11:53:57.628Z" }, - { url = "https://files.pythonhosted.org/packages/0b/0c/71ef356adc29e2bdf74cd284317b34a16b80258fa0e7e242dd92cc1e6d10/rapidfuzz-3.14.3-cp313-cp313t-win_amd64.whl", hash = "sha256:656e52b054d5b5c2524169240e50cfa080b04b1c613c5f90a2465e84888d6f15", size = 1576797, upload-time = "2025-11-01T11:53:59.455Z" }, - { url = "https://files.pythonhosted.org/packages/fe/d2/0e64fc27bb08d4304aa3d11154eb5480bcf5d62d60140a7ee984dc07468a/rapidfuzz-3.14.3-cp313-cp313t-win_arm64.whl", hash = "sha256:c7e40c0a0af02ad6e57e89f62bef8604f55a04ecae90b0ceeda591bbf5923317", size = 829940, upload-time = "2025-11-01T11:54:01.1Z" }, - { url = "https://files.pythonhosted.org/packages/32/6f/1b88aaeade83abc5418788f9e6b01efefcd1a69d65ded37d89cd1662be41/rapidfuzz-3.14.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:442125473b247227d3f2de807a11da6c08ccf536572d1be943f8e262bae7e4ea", size = 1942086, upload-time = "2025-11-01T11:54:02.592Z" }, - { url = "https://files.pythonhosted.org/packages/a0/2c/b23861347436cb10f46c2bd425489ec462790faaa360a54a7ede5f78de88/rapidfuzz-3.14.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ec0c8c0c3d4f97ced46b2e191e883f8c82dbbf6d5ebc1842366d7eff13cd5a6", size = 1386993, upload-time = "2025-11-01T11:54:04.12Z" }, - { url = "https://files.pythonhosted.org/packages/83/86/5d72e2c060aa1fbdc1f7362d938f6b237dff91f5b9fc5dd7cc297e112250/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2dc37bc20272f388b8c3a4eba4febc6e77e50a8f450c472def4751e7678f55e4", size = 1379126, upload-time = "2025-11-01T11:54:05.777Z" }, - { url = "https://files.pythonhosted.org/packages/c9/bc/ef2cee3e4d8b3fc22705ff519f0d487eecc756abdc7c25d53686689d6cf2/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dee362e7e79bae940a5e2b3f6d09c6554db6a4e301cc68343886c08be99844f1", size = 3159304, upload-time = "2025-11-01T11:54:07.351Z" }, - { url = "https://files.pythonhosted.org/packages/a0/36/dc5f2f62bbc7bc90be1f75eeaf49ed9502094bb19290dfb4747317b17f12/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:4b39921df948388a863f0e267edf2c36302983459b021ab928d4b801cbe6a421", size = 1218207, upload-time = "2025-11-01T11:54:09.641Z" }, - { url = "https://files.pythonhosted.org/packages/df/7e/8f4be75c1bc62f47edf2bbbe2370ee482fae655ebcc4718ac3827ead3904/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:beda6aa9bc44d1d81242e7b291b446be352d3451f8217fcb068fc2933927d53b", size = 2401245, upload-time = "2025-11-01T11:54:11.543Z" }, - { url = "https://files.pythonhosted.org/packages/05/38/f7c92759e1bb188dd05b80d11c630ba59b8d7856657baf454ff56059c2ab/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:6a014ba09657abfcfeed64b7d09407acb29af436d7fc075b23a298a7e4a6b41c", size = 2518308, upload-time = "2025-11-01T11:54:13.134Z" }, - { url = "https://files.pythonhosted.org/packages/c7/ac/85820f70fed5ecb5f1d9a55f1e1e2090ef62985ef41db289b5ac5ec56e28/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:32eeafa3abce138bb725550c0e228fc7eaeec7059aa8093d9cbbec2b58c2371a", size = 4265011, upload-time = "2025-11-01T11:54:15.087Z" }, - { url = "https://files.pythonhosted.org/packages/46/a9/616930721ea9835c918af7cde22bff17f9db3639b0c1a7f96684be7f5630/rapidfuzz-3.14.3-cp314-cp314-win32.whl", hash = "sha256:adb44d996fc610c7da8c5048775b21db60dd63b1548f078e95858c05c86876a3", size = 1742245, upload-time = "2025-11-01T11:54:17.19Z" }, - { url = "https://files.pythonhosted.org/packages/06/8a/f2fa5e9635b1ccafda4accf0e38246003f69982d7c81f2faa150014525a4/rapidfuzz-3.14.3-cp314-cp314-win_amd64.whl", hash = "sha256:f3d15d8527e2b293e38ce6e437631af0708df29eafd7c9fc48210854c94472f9", size = 1584856, upload-time = "2025-11-01T11:54:18.764Z" }, - { url = "https://files.pythonhosted.org/packages/ef/97/09e20663917678a6d60d8e0e29796db175b1165e2079830430342d5298be/rapidfuzz-3.14.3-cp314-cp314-win_arm64.whl", hash = "sha256:576e4b9012a67e0bf54fccb69a7b6c94d4e86a9540a62f1a5144977359133583", size = 833490, upload-time = "2025-11-01T11:54:20.753Z" }, - { url = "https://files.pythonhosted.org/packages/03/1b/6b6084576ba87bf21877c77218a0c97ba98cb285b0c02eaaee3acd7c4513/rapidfuzz-3.14.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:cec3c0da88562727dd5a5a364bd9efeb535400ff0bfb1443156dd139a1dd7b50", size = 1968658, upload-time = "2025-11-01T11:54:22.25Z" }, - { url = "https://files.pythonhosted.org/packages/38/c0/fb02a0db80d95704b0a6469cc394e8c38501abf7e1c0b2afe3261d1510c2/rapidfuzz-3.14.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d1fa009f8b1100e4880868137e7bf0501422898f7674f2adcd85d5a67f041296", size = 1410742, upload-time = "2025-11-01T11:54:23.863Z" }, - { url = "https://files.pythonhosted.org/packages/a4/72/3fbf12819fc6afc8ec75a45204013b40979d068971e535a7f3512b05e765/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b86daa7419b5e8b180690efd1fdbac43ff19230803282521c5b5a9c83977655", size = 1382810, upload-time = "2025-11-01T11:54:25.571Z" }, - { url = "https://files.pythonhosted.org/packages/0f/18/0f1991d59bb7eee28922a00f79d83eafa8c7bfb4e8edebf4af2a160e7196/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7bd1816db05d6c5ffb3a4df0a2b7b56fb8c81ef584d08e37058afa217da91b1", size = 3166349, upload-time = "2025-11-01T11:54:27.195Z" }, - { url = "https://files.pythonhosted.org/packages/0d/f0/baa958b1989c8f88c78bbb329e969440cf330b5a01a982669986495bb980/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:33da4bbaf44e9755b0ce192597f3bde7372fe2e381ab305f41b707a95ac57aa7", size = 1214994, upload-time = "2025-11-01T11:54:28.821Z" }, - { url = "https://files.pythonhosted.org/packages/e4/a0/cd12ec71f9b2519a3954febc5740291cceabc64c87bc6433afcb36259f3b/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3fecce764cf5a991ee2195a844196da840aba72029b2612f95ac68a8b74946bf", size = 2403919, upload-time = "2025-11-01T11:54:30.393Z" }, - { url = "https://files.pythonhosted.org/packages/0b/ce/019bd2176c1644098eced4f0595cb4b3ef52e4941ac9a5854f209d0a6e16/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:ecd7453e02cf072258c3a6b8e930230d789d5d46cc849503729f9ce475d0e785", size = 2508346, upload-time = "2025-11-01T11:54:32.048Z" }, - { url = "https://files.pythonhosted.org/packages/23/f8/be16c68e2c9e6c4f23e8f4adbb7bccc9483200087ed28ff76c5312da9b14/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ea188aa00e9bcae8c8411f006a5f2f06c4607a02f24eab0d8dc58566aa911f35", size = 4274105, upload-time = "2025-11-01T11:54:33.701Z" }, - { url = "https://files.pythonhosted.org/packages/a1/d1/5ab148e03f7e6ec8cd220ccf7af74d3aaa4de26dd96df58936beb7cba820/rapidfuzz-3.14.3-cp314-cp314t-win32.whl", hash = "sha256:7ccbf68100c170e9a0581accbe9291850936711548c6688ce3bfb897b8c589ad", size = 1793465, upload-time = "2025-11-01T11:54:35.331Z" }, - { url = "https://files.pythonhosted.org/packages/cd/97/433b2d98e97abd9fff1c470a109b311669f44cdec8d0d5aa250aceaed1fb/rapidfuzz-3.14.3-cp314-cp314t-win_amd64.whl", hash = "sha256:9ec02e62ae765a318d6de38df609c57fc6dacc65c0ed1fd489036834fd8a620c", size = 1623491, upload-time = "2025-11-01T11:54:38.085Z" }, - { url = "https://files.pythonhosted.org/packages/e2/f6/e2176eb94f94892441bce3ddc514c179facb65db245e7ce3356965595b19/rapidfuzz-3.14.3-cp314-cp314t-win_arm64.whl", hash = "sha256:e805e52322ae29aa945baf7168b6c898120fbc16d2b8f940b658a5e9e3999253", size = 851487, upload-time = "2025-11-01T11:54:40.176Z" }, +version = "3.14.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/21/ef6157213316e85790041254259907eb722e00b03480256c0545d98acd33/rapidfuzz-3.14.5.tar.gz", hash = "sha256:ba10ac57884ce82112f7ed910b67e7fb6072d8ef2c06e30dc63c0f604a112e0e", size = 57901753, upload-time = "2026-04-07T11:16:31.931Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/e3/574435c6aafb80254c191ef40d7aca2cb2bb97a095ec9395e9fa59ac307a/rapidfuzz-3.14.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0d3378f471ef440473a396ce2f8e97ee12f89a78b495540e0a5617bbfe895638", size = 1944601, upload-time = "2026-04-07T11:14:18.771Z" }, + { url = "https://files.pythonhosted.org/packages/d0/1f/fbad3102a255ecc112ce9a7e779bacab7fd14398217be8868dc9082ba363/rapidfuzz-3.14.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e910eebca9fd0eba245c0555e764597e8a0cccb673a92da2dc2397050725f48", size = 1164293, upload-time = "2026-04-07T11:14:20.534Z" }, + { url = "https://files.pythonhosted.org/packages/88/37/a3eb7ff6121ed3a5f199a8c38cc86c8e481816f879cb0e0b738b078c9a7e/rapidfuzz-3.14.5-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01550fe5f60fd176aa66b7611289d46dc4aa4b1b904874c7b6d1d54e581c5ec1", size = 1371999, upload-time = "2026-04-07T11:14:22.63Z" }, + { url = "https://files.pythonhosted.org/packages/79/72/97a9728c711c7c1b06e107d3f0623880fb4ef90e147ed13c551a1730e7cc/rapidfuzz-3.14.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:48bee0b91bebfaec41e1081e351000659ab7570cc4598d617aa04d5bf827f9e6", size = 3145715, upload-time = "2026-04-07T11:14:24.508Z" }, + { url = "https://files.pythonhosted.org/packages/ed/54/d5caabbea233ac90c286c87c260e49d7641467e87438a18d858e41c82e91/rapidfuzz-3.14.5-cp312-cp312-manylinux_2_39_riscv64.whl", hash = "sha256:7e580cb04ad849ae9b786fa21383c6b994b6e6c1444ad1cb9f22392759d72741", size = 1456304, upload-time = "2026-04-07T11:14:26.515Z" }, + { url = "https://files.pythonhosted.org/packages/fc/a7/2d1a81250ac8c01a0100c026018e76f0e7a097ff63e4c553e02a6938c6fb/rapidfuzz-3.14.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:09d6c9ba091854f07817055d795d604179c12a8f308ba4c7d56f3719dfea1646", size = 2389089, upload-time = "2026-04-07T11:14:28.635Z" }, + { url = "https://files.pythonhosted.org/packages/65/0d/c47c3872203ae88e6506997c0b576ad731f5261daa25d559be09c9756658/rapidfuzz-3.14.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:1e989f86113be66574113b9c7bdf4793f3f863d248e47d911b355e05ca6b6b10", size = 2493404, upload-time = "2026-04-07T11:14:30.577Z" }, + { url = "https://files.pythonhosted.org/packages/8f/2f/71e0a5a3130792146c8a200a2dd1e52aa16f7c1074012e17f2601eea9a90/rapidfuzz-3.14.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ebd1a18e2e47bc0b292a07e6ed9c3642f8aaa672d12253885f599b50807a4f9", size = 4251709, upload-time = "2026-04-07T11:14:32.451Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/d39874901abacef325adb5b34ae416817c8486dfb4fb87c7a9b74ec5b072/rapidfuzz-3.14.5-cp312-cp312-win32.whl", hash = "sha256:9981d38a703b86f0e315a3cd229fd1906fe1d91c989ed121fb975b3c849f89f5", size = 1710069, upload-time = "2026-04-07T11:14:34.37Z" }, + { url = "https://files.pythonhosted.org/packages/85/0b/f65572c53de8a1c704bda707f63a447b67bdbe95d7cdc70d18885e191df5/rapidfuzz-3.14.5-cp312-cp312-win_amd64.whl", hash = "sha256:d8375e3da319593389727c3187ccaf3e0e84199accc530866b8e0f2b79af05e9", size = 1540630, upload-time = "2026-04-07T11:14:36.287Z" }, + { url = "https://files.pythonhosted.org/packages/5e/c3/143be3a578f989758cae516f3270d5cbb49783a7bfdf57cc27a670e00456/rapidfuzz-3.14.5-cp312-cp312-win_arm64.whl", hash = "sha256:478b59bb018a6780d73f33e38d0b3ec5e968a6c1ed42876b993dd456b7aa20e8", size = 813137, upload-time = "2026-04-07T11:14:38.289Z" }, + { url = "https://files.pythonhosted.org/packages/11/66/252803f2010ba699618cdc048b6e1f7cc1f433c08b4a9a17579b92ab0142/rapidfuzz-3.14.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ebd8fd343bf8492a1e60bcb6dc99f90f74f65d98d8241a6b3e1fed225b76ecd6", size = 1940205, upload-time = "2026-04-07T11:14:40.319Z" }, + { url = "https://files.pythonhosted.org/packages/ea/59/b2afd98e41af9cd54554a4c1c423d84cdd60e6b1c0a09496f033b55f60ec/rapidfuzz-3.14.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6737b35d5af7479c5bf9710f7b17edd9d2c43128d974d25fb4ea653e42c64609", size = 1159639, upload-time = "2026-04-07T11:14:42.52Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/7aa7e62c4c516a7af322ed0c4f0774208b72d457d0cfec808bad0df12f4a/rapidfuzz-3.14.5-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b002c7994cc9f2bc9d9856f0fbaee6e8072c983873846c92f25cefba5b2a925f", size = 1367194, upload-time = "2026-04-07T11:14:44.25Z" }, + { url = "https://files.pythonhosted.org/packages/90/79/2fc252a63bc91d3c3b234d0a3a6ad4ebc460037a23cdcdaf9285f986e6c9/rapidfuzz-3.14.5-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:17a34330cd2a538c1ce5d400b61ba358c5b72c654b928ff87b362e88f8b864c7", size = 3151805, upload-time = "2026-04-07T11:14:46.21Z" }, + { url = "https://files.pythonhosted.org/packages/17/54/0c83508f2683ea70e2d05f8527eb07328acf7bb1e9d97a3bece5702378e7/rapidfuzz-3.14.5-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:95d937e74c1a7a1287dfb03b62a827be08ede10a155cf1af73bbf47f2b73ee6e", size = 1455667, upload-time = "2026-04-07T11:14:47.991Z" }, + { url = "https://files.pythonhosted.org/packages/71/1b/070175e873177814d58850a01ebe80e20ae11e93eb4da894d563988660fa/rapidfuzz-3.14.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:46b92a9970dcc34f0096901c792644094cab49554ac3547f35e3aebbdf0a3610", size = 2388246, upload-time = "2026-04-07T11:14:50.098Z" }, + { url = "https://files.pythonhosted.org/packages/c9/dd/77caf7aaf9c2be050ad1f128d7c24ff0f59079aa62c5f62f9df41c0af45e/rapidfuzz-3.14.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:e012177c8e8a8a0754ae0d6027d63042aa5ff036d9f40f07cb3466a6082e21b8", size = 2494333, upload-time = "2026-04-07T11:14:52.303Z" }, + { url = "https://files.pythonhosted.org/packages/2c/e2/dd7e1f2aa31a8fbbfc16b0610af1d770ffaf1287490f3c8c5b1c52da264f/rapidfuzz-3.14.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a2ae6f53f99c9a0eca7a0afc5b4e45fc73bc1dd4ac74c00509031d76df80ed98", size = 4258579, upload-time = "2026-04-07T11:14:54.538Z" }, + { url = "https://files.pythonhosted.org/packages/9c/0a/ac99e1ba347ba0e85e0bb60b74231d55fb93c0eff43f2920ccb413d0be08/rapidfuzz-3.14.5-cp313-cp313-win32.whl", hash = "sha256:4a60f0057231188e3bd30216f7b4e0f279b11fa4ec818bb6c1d9f014d1562fbc", size = 1709231, upload-time = "2026-04-07T11:14:56.524Z" }, + { url = "https://files.pythonhosted.org/packages/cf/cb/0e251d731b3166378644238e8f0cf9e89858c024e19f75ca9f7e3ae83fd5/rapidfuzz-3.14.5-cp313-cp313-win_amd64.whl", hash = "sha256:11bfc2ed8fbe4ab86bd516fadefab126f90e6dcadffa761739fcb304707dfd35", size = 1538519, upload-time = "2026-04-07T11:14:58.635Z" }, + { url = "https://files.pythonhosted.org/packages/30/6f/4548132acc947db6d5346a248e44a8b3a22d608ef30e770fb578caaf2d00/rapidfuzz-3.14.5-cp313-cp313-win_arm64.whl", hash = "sha256:b486b5218808f6f4dc471b114b1054e63553db69705c97da0271f47bd706aedd", size = 812628, upload-time = "2026-04-07T11:15:00.552Z" }, + { url = "https://files.pythonhosted.org/packages/00/60/69b177577290c5eab892c6f75fe89c3aff3f9ae80298a78d9372b1cecb9a/rapidfuzz-3.14.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:39ef8658aaf67d51667e7bdaf7096f432333377d8302ac43c70b5df8a4cf89b8", size = 1970231, upload-time = "2026-04-07T11:15:02.603Z" }, + { url = "https://files.pythonhosted.org/packages/48/38/2fd790052659cc4e2907b63c25433f0987864b445c1aeec1a302ef5ad948/rapidfuzz-3.14.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9ad37a0be705b544af6296da8edddc260d10a8ae5462530fc9991f66498bb1f9", size = 1194394, upload-time = "2026-04-07T11:15:04.572Z" }, + { url = "https://files.pythonhosted.org/packages/80/f4/28430ad8472fc3536e8ebd51a864a226e979cfe924c6e3f83d111373aa74/rapidfuzz-3.14.5-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d45e06f60729e07d9b20c205f7e5cff90b6ef2584e852eecf46e045aea69627d", size = 1377051, upload-time = "2026-04-07T11:15:06.728Z" }, + { url = "https://files.pythonhosted.org/packages/77/7e/9aeacabcfd1e77397968362e5b98fe14248b8307011136b17daf99752a8e/rapidfuzz-3.14.5-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e52da10236aa6212de71b9e170bace65b64b129c0dea7fc243d6c9ce976f5074", size = 3160565, upload-time = "2026-04-07T11:15:08.667Z" }, + { url = "https://files.pythonhosted.org/packages/56/f4/db4dd7be0cd2f2022117ac5407d905f435d60e48baaea313a567ad27e865/rapidfuzz-3.14.5-cp313-cp313t-manylinux_2_39_riscv64.whl", hash = "sha256:440d30faaf682ca496170a7f0cc5453ec942e3e079f0fd802c9a7f938dfb50a3", size = 1442113, upload-time = "2026-04-07T11:15:11.138Z" }, + { url = "https://files.pythonhosted.org/packages/a4/99/0e9f6aa57f3e32a767216f797e56dc96b720fcecfb9d8ee907ecc82f8d66/rapidfuzz-3.14.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:56227a61fd3d17b0cd9793132431f3a3d07c8654be96794ba9f89fe0fc8b2d09", size = 2396618, upload-time = "2026-04-07T11:15:13.154Z" }, + { url = "https://files.pythonhosted.org/packages/60/94/44a78e39ffce17cbdd3e2b53b696acc751d5d153be0f499d052b07a4d904/rapidfuzz-3.14.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:2e83cd2e25bb4edd97b689d9979d9c3acccdaaf26ceac08212ceece202febcfa", size = 2478220, upload-time = "2026-04-07T11:15:15.193Z" }, + { url = "https://files.pythonhosted.org/packages/dd/df/454311469a09a507e9d784a35796742bec22e4cebe75551e2da4e0e290fd/rapidfuzz-3.14.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:af3b859726cd3374287e405e14b9634563c078c5531a4f62375508addebddad1", size = 4265027, upload-time = "2026-04-07T11:15:17.28Z" }, + { url = "https://files.pythonhosted.org/packages/fc/01/175465a9ab3e3b70ba669058372f009d1d49c1746e2dcd56b69df188d3a5/rapidfuzz-3.14.5-cp313-cp313t-win32.whl", hash = "sha256:8ce1d850b3c0178440efde9e884d98421b5e87ff925f364d6d79e23910d7593f", size = 1766814, upload-time = "2026-04-07T11:15:19.687Z" }, + { url = "https://files.pythonhosted.org/packages/1b/a0/a9b84a47af06ebed94a1439eb2f02adebfb8628bcd30af1fe3e02f5ef56c/rapidfuzz-3.14.5-cp313-cp313t-win_amd64.whl", hash = "sha256:c84af70bcf34e99aee894e46a0f1ac77f17d0ef828179c387407642e2466d28a", size = 1582448, upload-time = "2026-04-07T11:15:21.98Z" }, + { url = "https://files.pythonhosted.org/packages/1e/f1/5937800238b3f8248e70860d79f69ba8f73e764fff47e36bc9e2f26dbcc6/rapidfuzz-3.14.5-cp313-cp313t-win_arm64.whl", hash = "sha256:aac0ad28c686a5e72b81668b906c030ee28050b244544b8af68e12fb32543895", size = 832932, upload-time = "2026-04-07T11:15:24.358Z" }, + { url = "https://files.pythonhosted.org/packages/81/41/aa3ffb3355e62e1bf91f6599b3092e866bc88487a07c524004943c7676df/rapidfuzz-3.14.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:1a31cc6d7d03e7318a0974c038959c59e19c752b81115f2e9138b3331cd64d45", size = 1943327, upload-time = "2026-04-07T11:15:26.266Z" }, + { url = "https://files.pythonhosted.org/packages/2d/e1/c2141f1840a41e07ad2db6f724945f8f8ff3065463899a22939152dd6e09/rapidfuzz-3.14.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0298d357e2bc59d572da4db0bc631009b6f8f6c9bc8c11e99a12b833f16b6575", size = 1161755, upload-time = "2026-04-07T11:15:28.659Z" }, + { url = "https://files.pythonhosted.org/packages/ca/07/66e753eeaa353161d1d331b7dd517bb349b0bacfebe8496d7b26be26f81f/rapidfuzz-3.14.5-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:59b3dba758661a318995655435c6ab20a04ade79fa51e75bc8dc107cac8df280", size = 1376571, upload-time = "2026-04-07T11:15:31.225Z" }, + { url = "https://files.pythonhosted.org/packages/c8/85/9535df0b78ba51f478c9ce7eb6d1f85535cc31fe356773b48fd9d3e563ca/rapidfuzz-3.14.5-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4900143d82071bdda533b00300c40b14b963ff826b3642cc463b6dd0f036585e", size = 3156468, upload-time = "2026-04-07T11:15:33.428Z" }, + { url = "https://files.pythonhosted.org/packages/81/ee/b667eb93bba6dc4e0de658edd778e1619dc4d6aab68fa5e5c7f075152735/rapidfuzz-3.14.5-cp314-cp314-manylinux_2_39_riscv64.whl", hash = "sha256:feedf219672eef83ea6be6f3bb093bba396a8560fc75be85ba225f082903df0a", size = 1458311, upload-time = "2026-04-07T11:15:35.557Z" }, + { url = "https://files.pythonhosted.org/packages/7d/ce/479074f5624364a48df3403c538797ef22d3ac49c19dc76c3f79fcdcc70c/rapidfuzz-3.14.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:419e4397a36e2665ec992d8d64c20ba4b2a42500c76ecadeca78a4f19cb9cc32", size = 2398228, upload-time = "2026-04-07T11:15:37.669Z" }, + { url = "https://files.pythonhosted.org/packages/0b/15/a8982f649150fffbdcd6f17565974501f6ab33b2795267bffbd4a7ba905b/rapidfuzz-3.14.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:97131ab2be39043054ee28d99e09efe316e6d53449b7e962dfcf3c2de8b2b246", size = 2497226, upload-time = "2026-04-07T11:15:39.857Z" }, + { url = "https://files.pythonhosted.org/packages/19/52/5267c03ef6759831b7d4625a0c9c06e87baa2fae084b61ac9c388858317b/rapidfuzz-3.14.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:593c00dac4e30231c35bf3b4f1da8ec0998762e9e94425586a5d636fcd57f9d0", size = 4262283, upload-time = "2026-04-07T11:15:42.279Z" }, + { url = "https://files.pythonhosted.org/packages/71/c0/2579f343a97f5254c43bb5853baccc01488357dcb64a27bcb869b7888a4a/rapidfuzz-3.14.5-cp314-cp314-win32.whl", hash = "sha256:0084b687b02b4e569b46d8d6d4ad25659528e6081cd6d067ca453a69035f07e4", size = 1744614, upload-time = "2026-04-07T11:15:44.498Z" }, + { url = "https://files.pythonhosted.org/packages/17/eb/8edfed1e80119dc9c35b11df4bc701eea85622ad681fff0263b6961d3224/rapidfuzz-3.14.5-cp314-cp314-win_amd64.whl", hash = "sha256:5dfa89d78f22cd773054caff44827b846161a29f2dcf7e78b8f90d086621e502", size = 1588971, upload-time = "2026-04-07T11:15:46.86Z" }, + { url = "https://files.pythonhosted.org/packages/f6/04/5676df93c85cfa57a3045d8047318df9f3cd58c7b8a99340dd95f874795e/rapidfuzz-3.14.5-cp314-cp314-win_arm64.whl", hash = "sha256:67f3f9d2b444268ab53e47d31bab89954888d23c04c6789f2c727e51fe4b1d13", size = 834985, upload-time = "2026-04-07T11:15:49.411Z" }, + { url = "https://files.pythonhosted.org/packages/f7/0d/4a8988cea658fe335048ddef8c876addff1b6daa3c9ca8ad65a5a2196e69/rapidfuzz-3.14.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:77eac0526899b3c3ad1454bb2b03cdb491d67358ec8ef0c9c48bd61b632b431d", size = 1972517, upload-time = "2026-04-07T11:15:51.819Z" }, + { url = "https://files.pythonhosted.org/packages/1c/a3/f5cfd9965a9d9a9e32249159797c47b5d6299ea6d1629f9126b25f1c10a3/rapidfuzz-3.14.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b9c6bd754d11f6e78ac54e3d86b4b11dc1ba2f13e5fc958899574532897f5a99", size = 1196056, upload-time = "2026-04-07T11:15:54.292Z" }, + { url = "https://files.pythonhosted.org/packages/64/07/561c2e40cfd10e6630a7b0ac5a2a813aef50d944bcd1f3d260319d659d5b/rapidfuzz-3.14.5-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:738c96944d076deeaff70e92b65696ab4f7ecb8081d7791c5403a3257dfaf8ff", size = 1374732, upload-time = "2026-04-07T11:15:56.584Z" }, + { url = "https://files.pythonhosted.org/packages/c2/39/123bb94fee40e2fb3b7c49b80827c7ef42d838e18def3fc2fef5a3cf817a/rapidfuzz-3.14.5-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4c1bca487a17fe4226b4ffb2d30e799d2b274d692cffa76bd0746f56235fca3", size = 3166902, upload-time = "2026-04-07T11:15:58.768Z" }, + { url = "https://files.pythonhosted.org/packages/75/0a/45716fafc9fd2e028cf20b5ac5bc704887081cd312f84edb0e325599414b/rapidfuzz-3.14.5-cp314-cp314t-manylinux_2_39_riscv64.whl", hash = "sha256:af6a90a4ed2a48fa1a2d17e9d824e6c7c950bea5bad0b707c77fd55751e6bfef", size = 1452130, upload-time = "2026-04-07T11:16:01.453Z" }, + { url = "https://files.pythonhosted.org/packages/ca/49/4e96c413114398481c0a5b0086af32c364a18613c9a2ea578d17c4bea4ee/rapidfuzz-3.14.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bf5018938208d4597b2e679a4f8cff9fd252f1df53583130ae56281a21801b64", size = 2396308, upload-time = "2026-04-07T11:16:03.588Z" }, + { url = "https://files.pythonhosted.org/packages/89/b7/49fea9fc6878d59bd259d01dd1972d9b86117992b1c66d9b16f0a65273c3/rapidfuzz-3.14.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:c0919d1f89ddf91129906705723118ea09754171e4116f5a5dbc667c7bc9b261", size = 2488210, upload-time = "2026-04-07T11:16:05.871Z" }, + { url = "https://files.pythonhosted.org/packages/0c/44/a1f732b93ffacbdad077b7c801149549b2938e1bece6addb5ad85ed74df8/rapidfuzz-3.14.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:93d8da883a35116d6813432177f35e570db5b0a5e30ecb0cbd7cb39c815735df", size = 4270621, upload-time = "2026-04-07T11:16:08.483Z" }, + { url = "https://files.pythonhosted.org/packages/bb/ce/ff942d19fce5385054650bb71a58495ddda299d94661ccc4e6e7fa44868b/rapidfuzz-3.14.5-cp314-cp314t-win32.whl", hash = "sha256:0f23e37019ec07712d58976b1ab2b889f8649a7f7c2f626a2f34ea9139e79279", size = 1803950, upload-time = "2026-04-07T11:16:10.873Z" }, + { url = "https://files.pythonhosted.org/packages/5c/0f/9aafc63f9661222b819b391c187eed29fc90ad5935f9690e5ecc2d2047a4/rapidfuzz-3.14.5-cp314-cp314t-win_amd64.whl", hash = "sha256:7d5ca9c7832e6879a707296d1463685f7c243a27846227044504741640caec66", size = 1632357, upload-time = "2026-04-07T11:16:13.1Z" }, + { url = "https://files.pythonhosted.org/packages/70/a6/51fc1b0e61e3326e1c68a61cfd0c6b3c34c843681c4b1eefbf0596f59162/rapidfuzz-3.14.5-cp314-cp314t-win_arm64.whl", hash = "sha256:3e91dcd2549b8f8d843f98ba03a17e01f3d8b72ce942adbbb6761bc58ffce813", size = 855409, upload-time = "2026-04-07T11:16:15.787Z" }, ] [[package]] @@ -3092,14 +3083,14 @@ wheels = [ [[package]] name = "tqdm" -version = "4.66.5" +version = "4.67.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/58/83/6ba9844a41128c62e810fddddd72473201f3eacde02046066142a2d96cc5/tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad", size = 169504, upload-time = "2024-08-03T22:35:40.339Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/5d/acf5905c36149bbaec41ccf7f2b68814647347b72075ac0b1fe3022fdc73/tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd", size = 78351, upload-time = "2024-08-03T22:35:36.644Z" }, + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] [[package]]