IgnatG
diff --git a/‎.env.example‎
Lines changed: 22 additions & 0 deletions b/‎.env.example‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎app/core/config.py‎
Lines changed: 14 additions & 0 deletions b/‎app/core/config.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎app/schemas/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎app/schemas/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎app/schemas/requests.py‎
Lines changed: 117 additions & 1 deletion b/‎app/schemas/requests.py‎
Lines changed: 117 additions & 1 deletion
diff --git a/‎app/services/extractor.py‎
Lines changed: 15 additions & 0 deletions b/‎app/services/extractor.py‎
Lines changed: 15 additions & 0 deletions
@@ -41,3 +41,25 @@ EXTRACTION_CACHE_BACKEND=redis
 TASK_TIME_LIMIT=3600
 TASK_SOFT_TIME_LIMIT=3300
 RESULT_EXPIRES=86400
+
+# ── Audit logging (langextract-audit) ────────────────────────────────────────
+# Enable structured audit logging for every LLM inference call
+AUDIT_ENABLED=false
+# Sink type: logging (stdlib), jsonfile (NDJSON file), otel (OpenTelemetry)
+AUDIT_SINK=logging
+# Path for the NDJSON audit file (only used when AUDIT_SINK=jsonfile)
+AUDIT_LOG_PATH=audit.jsonl
+# Truncated sample length for prompt/response in audit records (unset = disabled)
+# AUDIT_SAMPLE_LENGTH=200
+
+# ── Guardrails / output validation (langextract-guardrails) ──────────────────
+# Enable LLM output validation with automatic retry & corrective prompting
+GUARDRAILS_ENABLED=false
+# Maximum retry attempts when validation fails (default 3)
+GUARDRAILS_MAX_RETRIES=3
+# Include invalid output in correction prompt (set false to save tokens)
+GUARDRAILS_INCLUDE_OUTPUT_IN_CORRECTION=true
+# Truncate original prompt in correction prompts (unset = no limit)
+# GUARDRAILS_MAX_CORRECTION_PROMPT_LENGTH=2000
+# Truncate invalid output in correction prompts (unset = no limit)
+# GUARDRAILS_MAX_CORRECTION_OUTPUT_LENGTH=1000
@@ -97,6 +97,20 @@ class Settings(BaseSettings):
     EXTRACTION_CACHE_TTL: int = 86400  # seconds (24 h)
     EXTRACTION_CACHE_BACKEND: str = "redis"  # redis | disk | none
 
+    # ── Audit logging ───────────────────────────────────────────────
+    AUDIT_ENABLED: bool = False
+    AUDIT_SINK: str = "logging"  # logging | jsonfile | otel
+    AUDIT_LOG_PATH: str = "audit.jsonl"
+    AUDIT_SAMPLE_LENGTH: int | None = None
+
+    # ── Guardrails (output validation) ──────────────────────────────
+    GUARDRAILS_ENABLED: bool = False
+    GUARDRAILS_MAX_RETRIES: int = 3
+    GUARDRAILS_MAX_CONCURRENCY: int | None = None
+    GUARDRAILS_INCLUDE_OUTPUT_IN_CORRECTION: bool = True
+    GUARDRAILS_MAX_CORRECTION_PROMPT_LENGTH: int | None = None
+    GUARDRAILS_MAX_CORRECTION_OUTPUT_LENGTH: int | None = None
+
     @field_validator("CORS_ORIGINS", mode="before")
     @classmethod
     def _parse_cors(cls, v: str | list[str]) -> list[str]:
 
@@ -13,9 +13,11 @@
 from app.schemas.enums import TaskState
 from app.schemas.health import CeleryHealthResponse, HealthResponse
 from app.schemas.requests import (
+    AuditConfig,
     BatchExtractionRequest,
     ExtractionConfig,
     ExtractionRequest,
+    GuardrailsConfig,
     Provider,
 )
 from app.schemas.responses import (
@@ -31,6 +33,7 @@
 )
 
 __all__ = [
+    "AuditConfig",
     "BatchExtractionRequest",
     "BatchTaskSubmitResponse",
     "CeleryHealthResponse",
@@ -39,6 +42,7 @@
     "ExtractionMetadata",
     "ExtractionRequest",
     "ExtractionResult",
+    "GuardrailsConfig",
     "HealthResponse",
     "Provider",
     "TaskRevokeResponse",
 
@@ -79,6 +79,98 @@
 # ── Extraction configuration model ─────────────────────────
 
 
+class GuardrailsConfig(BaseModel):
+    """Per-request guardrails configuration.
+
+    Controls LLM output validation, retry, and corrective
+    prompting via ``langextract-guardrails``.  When provided
+    in ``extraction_config``, overrides the global
+    ``GUARDRAILS_*`` settings for this request.
+    """
+
+    enabled: bool | None = Field(
+        default=None,
+        description=(
+            "Enable output validation with retry. "
+            "When ``None``, falls back to the global "
+            "``GUARDRAILS_ENABLED`` setting."
+        ),
+    )
+    json_schema: dict[str, Any] | None = Field(
+        default=None,
+        description=(
+            "JSON Schema dict to validate LLM output against. "
+            "When set, a ``JsonSchemaValidator`` is created "
+            "automatically."
+        ),
+    )
+    regex_pattern: str | None = Field(
+        default=None,
+        description=(
+            "Regex pattern the LLM output must match. "
+            "When set, a ``RegexValidator`` is created."
+        ),
+    )
+    regex_description: str | None = Field(
+        default=None,
+        description=(
+            "Human-readable description of the regex pattern, "
+            "used in corrective error messages."
+        ),
+    )
+    max_retries: int | None = Field(
+        default=None,
+        ge=0,
+        le=10,
+        description=(
+            "Maximum retry attempts on validation failure. "
+            "Overrides ``GUARDRAILS_MAX_RETRIES``."
+        ),
+    )
+    include_output_in_correction: bool | None = Field(
+        default=None,
+        description=(
+            "Include the invalid output in the correction "
+            "prompt.  Set ``False`` for error-only mode to "
+            "save tokens."
+        ),
+    )
+    json_schema_strict: bool = Field(
+        default=True,
+        description=(
+            "When ``True``, additional properties not in "
+            "the schema cause validation failure."
+        ),
+    )
+
+
+class AuditConfig(BaseModel):
+    """Per-request audit configuration.
+
+    Controls structured audit logging via
+    ``langextract-audit``.  When provided in
+    ``extraction_config``, overrides the global
+    ``AUDIT_*`` settings for this request.
+    """
+
+    enabled: bool | None = Field(
+        default=None,
+        description=(
+            "Enable audit logging. When ``None``, falls back "
+            "to the global ``AUDIT_ENABLED`` setting."
+        ),
+    )
+    sample_length: int | None = Field(
+        default=None,
+        ge=0,
+        description=(
+            "Store truncated prompt/response samples in audit "
+            "records for debugging.  Overrides "
+            "``AUDIT_SAMPLE_LENGTH``."
+        ),
+    )
+
+
 class ExtractionConfig(BaseModel):
     """Typed extraction configuration overrides.
 
@@ -158,14 +250,38 @@ class ExtractionConfig(BaseModel):
             "prompt-only extraction."
         ),
     )
+    guardrails: GuardrailsConfig | None = Field(
+        default=None,
+        description=(
+            "Output validation and retry configuration via "
+            "langextract-guardrails.  When unset, falls back "
+            "to the global ``GUARDRAILS_*`` settings."
+        ),
+    )
+    audit: AuditConfig | None = Field(
+        default=None,
+        description=(
+            "Audit logging configuration via "
+            "langextract-audit.  When unset, falls back "
+            "to the global ``AUDIT_*`` settings."
+        ),
+    )
 
     def to_flat_dict(self) -> dict[str, Any]:
         """Return a dict with only non-None values.
 
+        Nested models (``guardrails``, ``audit``) are serialized
+        to plain dicts so the result is JSON-serializable and
+        safe for Celery task arguments.
+
         Returns:
             Flat dict suitable for ``run_extraction``.
         """
-        return {k: v for k, v in self.model_dump().items() if v is not None}
+        data: dict[str, Any] = {}
+        for k, v in self.model_dump().items():
+            if v is not None:
+                data[k] = v
+        return data
 
 
 # ── Request models ──────────────────────────────────────────
 
@@ -44,6 +44,7 @@
     ExtractionCache,
     build_cache_key,
 )
+from app.services.model_wrappers import apply_model_wrappers
 from app.services.provider_manager import ProviderManager
 from app.services.providers import is_openai_model, resolve_api_key
 from app.services.structured_output import (
@@ -349,6 +350,13 @@ def run_extraction(
         response_format=response_format,
     )
 
+    # ── Step 3b: Apply guardrails & audit wrappers ──────────
+    cached_model = apply_model_wrappers(
+        cached_model,
+        provider,
+        extraction_config,
+    )
+
     extract_kwargs: dict[str, Any] = {
         "text_or_documents": text_input,
         "prompt_description": prompt_description,
@@ -625,6 +633,13 @@ async def async_run_extraction(
         response_format=response_format_async,
     )
 
+    # ── Step 3b: Apply guardrails & audit wrappers (async) ───
+    cached_model = apply_model_wrappers(
+        cached_model,
+        provider,
+        extraction_config,
+    )
+
     extract_kwargs: dict[str, Any] = {
         "text_or_documents": text_input,
         "prompt_description": prompt_description,