|
79 | 79 | # ── Extraction configuration model ───────────────────────── |
80 | 80 |
|
81 | 81 |
|
| 82 | +class GuardrailsConfig(BaseModel): |
| 83 | + """Per-request guardrails configuration. |
| 84 | +
|
| 85 | + Controls LLM output validation, retry, and corrective |
| 86 | + prompting via ``langextract-guardrails``. When provided |
| 87 | + in ``extraction_config``, overrides the global |
| 88 | + ``GUARDRAILS_*`` settings for this request. |
| 89 | + """ |
| 90 | + |
| 91 | + enabled: bool | None = Field( |
| 92 | + default=None, |
| 93 | + description=( |
| 94 | + "Enable output validation with retry. " |
| 95 | + "When ``None``, falls back to the global " |
| 96 | + "``GUARDRAILS_ENABLED`` setting." |
| 97 | + ), |
| 98 | + ) |
| 99 | + json_schema: dict[str, Any] | None = Field( |
| 100 | + default=None, |
| 101 | + description=( |
| 102 | + "JSON Schema dict to validate LLM output against. " |
| 103 | + "When set, a ``JsonSchemaValidator`` is created " |
| 104 | + "automatically." |
| 105 | + ), |
| 106 | + ) |
| 107 | + regex_pattern: str | None = Field( |
| 108 | + default=None, |
| 109 | + description=( |
| 110 | + "Regex pattern the LLM output must match. " |
| 111 | + "When set, a ``RegexValidator`` is created." |
| 112 | + ), |
| 113 | + ) |
| 114 | + regex_description: str | None = Field( |
| 115 | + default=None, |
| 116 | + description=( |
| 117 | + "Human-readable description of the regex pattern, " |
| 118 | + "used in corrective error messages." |
| 119 | + ), |
| 120 | + ) |
| 121 | + max_retries: int | None = Field( |
| 122 | + default=None, |
| 123 | + ge=0, |
| 124 | + le=10, |
| 125 | + description=( |
| 126 | + "Maximum retry attempts on validation failure. " |
| 127 | + "Overrides ``GUARDRAILS_MAX_RETRIES``." |
| 128 | + ), |
| 129 | + ) |
| 130 | + include_output_in_correction: bool | None = Field( |
| 131 | + default=None, |
| 132 | + description=( |
| 133 | + "Include the invalid output in the correction " |
| 134 | + "prompt. Set ``False`` for error-only mode to " |
| 135 | + "save tokens." |
| 136 | + ), |
| 137 | + ) |
| 138 | + json_schema_strict: bool = Field( |
| 139 | + default=True, |
| 140 | + description=( |
| 141 | + "When ``True``, additional properties not in " |
| 142 | + "the schema cause validation failure." |
| 143 | + ), |
| 144 | + ) |
| 145 | + |
| 146 | + |
| 147 | +class AuditConfig(BaseModel): |
| 148 | + """Per-request audit configuration. |
| 149 | +
|
| 150 | + Controls structured audit logging via |
| 151 | + ``langextract-audit``. When provided in |
| 152 | + ``extraction_config``, overrides the global |
| 153 | + ``AUDIT_*`` settings for this request. |
| 154 | + """ |
| 155 | + |
| 156 | + enabled: bool | None = Field( |
| 157 | + default=None, |
| 158 | + description=( |
| 159 | + "Enable audit logging. When ``None``, falls back " |
| 160 | + "to the global ``AUDIT_ENABLED`` setting." |
| 161 | + ), |
| 162 | + ) |
| 163 | + sample_length: int | None = Field( |
| 164 | + default=None, |
| 165 | + ge=0, |
| 166 | + description=( |
| 167 | + "Store truncated prompt/response samples in audit " |
| 168 | + "records for debugging. Overrides " |
| 169 | + "``AUDIT_SAMPLE_LENGTH``." |
| 170 | + ), |
| 171 | + ) |
| 172 | + |
| 173 | + |
82 | 174 | class ExtractionConfig(BaseModel): |
83 | 175 | """Typed extraction configuration overrides. |
84 | 176 |
|
@@ -158,14 +250,38 @@ class ExtractionConfig(BaseModel): |
158 | 250 | "prompt-only extraction." |
159 | 251 | ), |
160 | 252 | ) |
| 253 | + guardrails: GuardrailsConfig | None = Field( |
| 254 | + default=None, |
| 255 | + description=( |
| 256 | + "Output validation and retry configuration via " |
| 257 | + "langextract-guardrails. When unset, falls back " |
| 258 | + "to the global ``GUARDRAILS_*`` settings." |
| 259 | + ), |
| 260 | + ) |
| 261 | + audit: AuditConfig | None = Field( |
| 262 | + default=None, |
| 263 | + description=( |
| 264 | + "Audit logging configuration via " |
| 265 | + "langextract-audit. When unset, falls back " |
| 266 | + "to the global ``AUDIT_*`` settings." |
| 267 | + ), |
| 268 | + ) |
161 | 269 |
|
162 | 270 | def to_flat_dict(self) -> dict[str, Any]: |
163 | 271 | """Return a dict with only non-None values. |
164 | 272 |
|
| 273 | + Nested models (``guardrails``, ``audit``) are serialized |
| 274 | + to plain dicts so the result is JSON-serializable and |
| 275 | + safe for Celery task arguments. |
| 276 | +
|
165 | 277 | Returns: |
166 | 278 | Flat dict suitable for ``run_extraction``. |
167 | 279 | """ |
168 | | - return {k: v for k, v in self.model_dump().items() if v is not None} |
| 280 | + data: dict[str, Any] = {} |
| 281 | + for k, v in self.model_dump().items(): |
| 282 | + if v is not None: |
| 283 | + data[k] = v |
| 284 | + return data |
169 | 285 |
|
170 | 286 |
|
171 | 287 | # ── Request models ────────────────────────────────────────── |
|
0 commit comments