Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bc2/core/analyze/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from typing import Union

from .azuredi import AzureDIAnalyzeConfig

AnalyzeConfig = Union[AzureDIAnalyzeConfig,]
2 changes: 1 addition & 1 deletion bc2/core/analyze/azuredi.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
class AzureDIAnalyzeConfig(BaseModel):
"""Azure DI Analyze config."""

engine: Literal["analyze:azuredi"]
engine: Literal["analyze:azuredi"] = "analyze:azuredi"
endpoint: str
api_key: str
# Todo: Add api_version, since we'll need to match what's on GovCloud,
Expand Down
6 changes: 6 additions & 0 deletions bc2/core/common/all.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import typing

from ..analyze import AnalyzeConfig
from ..extract import ExtractConfig
from ..input import InputConfig
from ..inspect import InspectConfig
from ..ontology import OntologyConfig
Comment thread
jnu marked this conversation as resolved.
from ..output import OutputConfig
from ..paint import PaintConfig
from ..parse import ParseConfig
from ..redact import RedactConfig
from ..render import RenderConfig
Expand All @@ -14,7 +17,10 @@
]

AnyProcessingConfig = typing.Union[
AnalyzeConfig,
ExtractConfig,
OntologyConfig,
PaintConfig,
RedactConfig,
InspectConfig,
ParseConfig,
Expand Down
5 changes: 5 additions & 0 deletions bc2/core/ontology/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from typing import Union

from .openai import OpenAIOntologyConfig

OntologyConfig = Union[OpenAIOntologyConfig,]
6 changes: 5 additions & 1 deletion bc2/core/ontology/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod
from typing import Generic, TypeVar

from ..common.context import Context
from ..common.file import MemoryFile
from ..common.ontology import PoliceReportParseResult
from ..common.preprocess import PreprocessMixin
Expand All @@ -15,7 +16,7 @@ class EmptyOntologyError(Exception):


class BaseOntologyDriver(ABC, Generic[T], PreprocessMixin[T]):
def __call__(self, file: MemoryFile) -> MemoryFile:
def __call__(self, file: MemoryFile, context: Context) -> MemoryFile:
"""Extract a structured police report ontology from a file."""
data = self.preprocess(file)
result = self.extract(data)
Expand All @@ -24,6 +25,9 @@ def __call__(self, file: MemoryFile) -> MemoryFile:
"No source chunks found in ontology extraction result."
)

# Save the extracted ontology in context.
context.ontology = result

# Serialize for transport.
f = MemoryFile(
content=result.model_dump_json().encode("utf-8"),
Expand Down
4 changes: 2 additions & 2 deletions bc2/core/ontology/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from functools import cached_property
from typing import Literal

from azure.ai.formrecognizer import AnalyzeResult
from azure.ai.documentintelligence.models import AnalyzeResult
from openai import OpenAI

from ..common.file import MemoryFile
Expand All @@ -24,7 +24,7 @@
class OpenAIOntologyConfig(OpenAIConfig):
"""OpenAI Ontology config."""

engine: Literal["ontology:openai"]
engine: Literal["ontology:openai"] = "ontology:openai"
generator: OpenAIChatConfig[PoliceReport]

@cached_property
Expand Down
5 changes: 5 additions & 0 deletions bc2/core/paint/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from typing import Union

from .ontology import OntologyPainterConfig

PaintConfig = Union[OntologyPainterConfig,]
2 changes: 1 addition & 1 deletion bc2/core/paint/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
T = TypeVar("T")


class BasePainter(ABC, Generic[T], PreprocessMixin[T]):
class BasePainterDriver(ABC, Generic[T], PreprocessMixin[T]):
def __call__(self, file: MemoryFile, context: Context) -> MemoryFile:
"""Paint a file, returning an annotated version.

Expand Down
12 changes: 10 additions & 2 deletions bc2/core/paint/ontology.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from functools import cached_property
from typing import Literal

import pymupdf
Expand All @@ -8,7 +9,7 @@
from ..common.ontopainter import OntoPainter, OntoPainterFieldConfig, OntoPainterMark
from ..common.palette import Palette
from ..common.preprocess import register_preprocessor
from .base import BasePainter
from .base import BasePainterDriver

painter = OntoPainter(
fields=[
Expand Down Expand Up @@ -151,8 +152,15 @@
class OntologyPainterConfig(BaseModel):
engine: Literal["paint:ontology"] = "paint:ontology"

@cached_property
def driver(self) -> "OntologyPainterDriver":
return OntologyPainterDriver(self)


class OntologyPainterDriver(BasePainterDriver[PoliceReportParseResult]):
def __init__(self, config: OntologyPainterConfig):
self.config = config

class OntologyPainter(BasePainter[PoliceReportParseResult]):
@register_preprocessor(r"application/x-ontology")
def preprocess_ontology(self, file: MemoryFile) -> PoliceReportParseResult:
"""Deserialize an ontology MemoryFile into a PoliceReportParseResult."""
Expand Down
Loading