Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 48 additions & 2 deletions .github/workflows/deploy-environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,37 @@ on:
required: false
type: string
default: "15"
description: Wait sleep for model registration
description: Sleep seconds between model availability checks
smoke_models_wait_attempts:
required: false
type: string
default: "1"
description: Number of attempts to wait for models to become available
include_aoai_host_check:
required: false
type: boolean
default: false
description: Include AOAI endpoint host validation
environment:
required: false
type: string
default: ""
description: GitHub environment to use
secrets:
AZURE_CLIENT_ID:
required: true
AZURE_TENANT_ID:
required: true
AZURE_SUBSCRIPTION_ID:
required: true
TF_BACKEND_RG:
required: true
TF_BACKEND_SA:
required: true
TF_BACKEND_CONTAINER:
required: true
EXPECTED_AOAI_ENDPOINT_HOST:
required: false
AZURE_OPENAI_ENDPOINT:
required: true
AZURE_OPENAI_API_KEY:
Expand All @@ -49,8 +73,25 @@ on:
required: true
AIGATEWAY_KEY:
required: true
STATE_SERVICE_CONTAINER_IMAGE:
required: false
STATE_SERVICE_SHARED_TOKEN:
required: false
STATE_SERVICE_REGISTRY_PASSWORD:
required: false
DASHBOARD_CONTAINER_IMAGE:
required: false
GRAFANA_URL:
required: false

env:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
TF_VAR_env: ${{ inputs.env_name }}
TF_VAR_projname: "aigateway"
TF_VAR_location: "southafricanorth"
Expand All @@ -64,10 +105,15 @@ env:
TF_VAR_codex_api_version: ${{ inputs.codex_api_version }}
TF_VAR_embedding_deployment: "text-embedding-3-large"
TF_VAR_embeddings_api_version: "2024-02-01"
TF_VAR_state_service_container_image: ${{ secrets.STATE_SERVICE_CONTAINER_IMAGE }}
TF_VAR_secrets_expiration_date: "2027-03-31T00:00:00Z"
TF_VAR_dashboard_container_image: ${{ secrets.DASHBOARD_CONTAINER_IMAGE || 'ghcr.io/phoenixvc/ai-gateway-dashboard:latest' }}
TF_VAR_grafana_url: ${{ secrets.GRAFANA_URL }}

jobs:
deploy:
runs-on: ubuntu-latest
environment: ${{ inputs.environment || inputs.env_name }}
defaults:
run:
working-directory: ${{ inputs.terraform_working_directory }}
Expand Down Expand Up @@ -208,7 +254,7 @@ jobs:
aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
max_attempts: "3"
retry_sleep: ${{ inputs.smoke_retry_sleep }}
models_wait_attempts: ${{ if(inputs.env_name == 'prod', '3', '1') }}
models_wait_attempts: ${{ inputs.smoke_models_wait_attempts }}
models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }}

- name: Smoke test shared state API (dashboard proxy)
Expand Down
45 changes: 42 additions & 3 deletions .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ jobs:
name: Deploy dev
needs: plan
if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev'
environment: dev
uses: ./.github/workflows/deploy-environment.yaml
with:
env_name: dev
Expand All @@ -160,19 +159,32 @@ jobs:
terraform_working_directory: infra/env/dev
smoke_retry_sleep: "10"
smoke_models_wait_sleep: "15"
smoke_models_wait_attempts: "1"
include_aoai_host_check: false
environment: dev
secrets:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
GRAFANA_URL: ${{ secrets.GRAFANA_URL }}

deploy-staging:
name: Deploy staging
needs: plan
if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging')
environment: staging
uses: ./.github/workflows/deploy-environment.yaml
with:
env_name: staging
Expand All @@ -182,19 +194,32 @@ jobs:
terraform_working_directory: infra/env/staging
smoke_retry_sleep: "10"
smoke_models_wait_sleep: "15"
smoke_models_wait_attempts: "1"
include_aoai_host_check: false
environment: staging
secrets:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
GRAFANA_URL: ${{ secrets.GRAFANA_URL }}

deploy-prod:
name: Deploy prod
needs: plan
if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
environment: prod
uses: ./.github/workflows/deploy-environment.yaml
with:
env_name: prod
Expand All @@ -204,12 +229,26 @@ jobs:
terraform_working_directory: infra/env/prod
smoke_retry_sleep: "15"
smoke_models_wait_sleep: "30"
smoke_models_wait_attempts: "3"
include_aoai_host_check: true
environment: prod
secrets:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
GRAFANA_URL: ${{ secrets.GRAFANA_URL }}

# Legacy inline deployments removed - see deploy-environment.yaml
3 changes: 3 additions & 0 deletions docs/architecture/02-container-architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ flowchart TB
G4[Budget Router]
G5[Semantic Cache]
G6[Escalation Judge]
W[Webhook Auth]
end

subgraph Mesh
Expand Down Expand Up @@ -57,6 +58,8 @@ flowchart TB

C1 --> G1
C2 --> G1
C3 --> W
W --> G1
C4 --> G1

G1 --> G2
Expand Down
27 changes: 26 additions & 1 deletion docs/architecture/04-observability-telemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ flowchart TB

subgraph Ingest
I1[OpenTelemetry]
I2[Azure Monitor]
I2[Application Insights]
I3[Blob Export]
I4[Prometheus]
end

subgraph Analytics
Expand All @@ -62,15 +63,39 @@ flowchart TB
S4 --> I1
S5 --> I2
S6 --> I3
S5 --> I4

I1 --> A1
I2 --> A1
I3 --> A1
I4 --> V1

A1 --> V1
V1 --> V2
```

### Telemetry Sinks

LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint which is scraped by Prometheus for application metrics collection. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.

The primary telemetry sinks are:

- **OpenTelemetry**: Traces and spans
- **Application Insights**: Azure Monitor implementation using `APPLICATIONINSIGHTS_CONNECTION_STRING` env var for OTEL exporter
- **Blob Export**: Raw event storage
- **Prometheus**: Application metrics via `/metrics` endpoint

## Retention Policies

Application Insights retention defaults:

- **Production**: 90 days
- **Non-production (dev/staging)**: 30 days

These are environment-specific settings configured in the Application Insights resource. Operators can adjust retention in the Azure Portal under Application Insights resource settings.

Include retention expectations in operational runbooks to align cost and data availability expectations.

## Key Metrics

### Gateway
Expand Down
20 changes: 15 additions & 5 deletions docs/architecture/reference/matrix-gateway.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,26 @@ flowchart TD

```json
{
"intent": "code_review",
"request_id": "req_abc123",
"label": "code_review",
"complexity": "medium",
"tool_candidate": true,
"recommended_target": "codeflow-engine",
"recommended_model_tier": "small",
"escalation_required": false,
"recommended_tier": "slm",
"cacheable": true,
"confidence": 0.93
}
```

> **Migration Note (v1.0.0)**: The response contract has been updated. Legacy field names `intent`, `recommended_target`, `recommended_model_tier`, and `escalation_required` are deprecated. Update clients to use the new fields:
>
> - `intent` → `label`
> - `recommended_target` → removed (use `recommended_tier` for routing)
> - `recommended_model_tier` → `recommended_tier`
> - `escalation_required` → derive from `confidence < 0.75` threshold
> - `cacheable` is a new field (previously not returned)
>
> **Deprecation window**: Legacy fields will be removed in v1.2.0. Clients should update by then. For backwards compatibility, implement fallback logic checking both old and new field names.

## Contract Shapes

```typescript
Expand Down Expand Up @@ -91,7 +101,7 @@ interface PolicyScreenOutput {
| Condition | Action |
| -------------------------------- | ---------------------- |
| `policy-screen.allowed == false` | Block or redact |
| `confidence < 0.70` | Escalate to LLM |
| `confidence < 0.75` | Escalate to LLM |
| Tool suggested but no mapping | Send to general LLM |
| Tagging fails | Mark telemetry partial |

Expand Down
4 changes: 2 additions & 2 deletions docs/architecture/reference/matrix-rooivalk.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ flowchart TD

## CRITICAL: SLM is for Reporting Only

```
```text
┌─────────────────────────────────────────────────────────┐
│ IMPORTANT - SAFETY BOUNDARY │
├─────────────────────────────────────────────────────────┤
Expand Down Expand Up @@ -109,7 +109,7 @@ interface SuggestSopOutput {
```typescript
const DEFAULT_THRESHOLDS = {
operator_summary: { direct_use: 0.8, facts_only: 0.65 },
sop_suggestion: { direct_suggest: 0.78, manual_lookup: 0.65 },
sop_suggestion: { direct_suggest: 0.8, manual_lookup: 0.65 },
};
```

Expand Down
8 changes: 4 additions & 4 deletions docs/architecture/reference/slm-implementation-matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ This document provides a repo-by-repo implementation matrix showing SLM endpoint

## Documentation Structure

```
```text
reference/
├── slm-implementation-matrix.md # This file
├── matrix-gateway.md # AI Gateway details
├── matrix-cognitive-mesh.md # Cognitive Mesh details
├── matrix-codeflow.md # CodeFlow Engine details
├── matrix-agentkit.md # AgentKit Forge details
├── matrix-codeflow.md # CodeFlow Engine details
├── matrix-agentkit.md # AgentKit Forge details
├── matrix-rooivalk.md # PhoenixRooivalk details
└── matrix-mystira.md # Mystira details
```
Expand Down Expand Up @@ -251,7 +251,7 @@ This is a practical role map, not a vendor mandate.

### Standard Fallback Pattern

```
```text
1. SLM timeout → Deterministic rules
2. Low confidence → LLM escalation
3. Safety critical → Block immediately
Expand Down
8 changes: 4 additions & 4 deletions docs/architecture/reference/slm-management-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Maintain a tiered model portfolio:

Implement cost controls at each layer:

```
```text
Cost Control Layers
┌─────────────────────────────────────┐
│ 1. Budget caps per project │
Expand Down Expand Up @@ -172,7 +172,7 @@ async def security_pipeline(request: Request) -> SecurityResult:

### Fallback Hierarchy

```
```text
Request
▼ Primary SLM
Expand Down Expand Up @@ -216,7 +216,7 @@ Request

### Model Lifecycle

```
```text
Discovery → Testing → Staging → Production → Deprecated → Retired
│ │ │ │ │
▼ ▼ ▼ ▼ ▼
Expand Down Expand Up @@ -271,4 +271,4 @@ Discovery → Testing → Staging → Production → Deprecated → Retired
6. [ ] Define fallback hierarchies
7. [ ] Implement observability stack
8. [ ] Document model lifecycle process
9. [ ] **Add explicit safety boundary for PhoenixRooivalk**
9. [x] Add explicit safety boundary for PhoenixRooivalk
Loading
Loading