Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 45 additions & 4 deletions .github/workflows/deploy-environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,36 @@ on:
type: string
default: "10"
description: Retry sleep for smoke tests
smoke_models_wait_sleep:
smoke_models_wait_attempts:
required: false
type: string
default: "15"
description: Wait sleep for model registration
default: "1"
description: Number of attempts to wait for models to become available
include_aoai_host_check:
required: false
type: boolean
default: false
description: Include AOAI endpoint host validation
environment:
required: false
type: string
default: ""
description: GitHub environment to use
secrets:
AZURE_CLIENT_ID:
required: true
AZURE_TENANT_ID:
required: true
AZURE_SUBSCRIPTION_ID:
required: true
TF_BACKEND_RG:
required: true
TF_BACKEND_SA:
required: true
TF_BACKEND_CONTAINER:
required: true
EXPECTED_AOAI_ENDPOINT_HOST:
required: false
AZURE_OPENAI_ENDPOINT:
required: true
AZURE_OPENAI_API_KEY:
Expand All @@ -49,8 +68,25 @@ on:
required: true
AIGATEWAY_KEY:
required: true
STATE_SERVICE_CONTAINER_IMAGE:
required: false
STATE_SERVICE_SHARED_TOKEN:
required: false
STATE_SERVICE_REGISTRY_PASSWORD:
required: false
DASHBOARD_CONTAINER_IMAGE:
required: false
GRAFANA_URL:
required: false

env:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
TF_VAR_env: ${{ inputs.env_name }}
TF_VAR_projname: "aigateway"
TF_VAR_location: "southafricanorth"
Expand All @@ -64,10 +100,15 @@ env:
TF_VAR_codex_api_version: ${{ inputs.codex_api_version }}
TF_VAR_embedding_deployment: "text-embedding-3-large"
TF_VAR_embeddings_api_version: "2024-02-01"
TF_VAR_state_service_container_image: ${{ secrets.STATE_SERVICE_CONTAINER_IMAGE }}
TF_VAR_secrets_expiration_date: "2027-03-31T00:00:00Z"
TF_VAR_dashboard_container_image: ${{ secrets.DASHBOARD_CONTAINER_IMAGE || 'ghcr.io/phoenixvc/ai-gateway-dashboard:latest' }}
TF_VAR_grafana_url: ${{ secrets.GRAFANA_URL }}

jobs:
deploy:
runs-on: ubuntu-latest
environment: ${{ inputs.environment || inputs.env_name }}
defaults:
run:
working-directory: ${{ inputs.terraform_working_directory }}
Expand Down Expand Up @@ -208,7 +249,7 @@ jobs:
aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
max_attempts: "3"
retry_sleep: ${{ inputs.smoke_retry_sleep }}
models_wait_attempts: ${{ if(inputs.env_name == 'prod', '3', '1') }}
models_wait_attempts: ${{ inputs.smoke_models_wait_attempts }}
models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }}

- name: Smoke test shared state API (dashboard proxy)
Expand Down
45 changes: 42 additions & 3 deletions .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ jobs:
name: Deploy dev
needs: plan
if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev'
environment: dev
uses: ./.github/workflows/deploy-environment.yaml
with:
env_name: dev
Expand All @@ -160,19 +159,32 @@ jobs:
terraform_working_directory: infra/env/dev
smoke_retry_sleep: "10"
smoke_models_wait_sleep: "15"
smoke_models_wait_attempts: "1"
include_aoai_host_check: false
environment: dev
secrets:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
GRAFANA_URL: ${{ secrets.GRAFANA_URL }}

deploy-staging:
name: Deploy staging
needs: plan
if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging')
environment: staging
uses: ./.github/workflows/deploy-environment.yaml
with:
env_name: staging
Expand All @@ -182,19 +194,32 @@ jobs:
terraform_working_directory: infra/env/staging
smoke_retry_sleep: "10"
smoke_models_wait_sleep: "15"
smoke_models_wait_attempts: "1"
include_aoai_host_check: false
environment: staging
secrets:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
GRAFANA_URL: ${{ secrets.GRAFANA_URL }}

deploy-prod:
name: Deploy prod
needs: plan
if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
environment: prod
uses: ./.github/workflows/deploy-environment.yaml
with:
env_name: prod
Expand All @@ -204,12 +229,26 @@ jobs:
terraform_working_directory: infra/env/prod
smoke_retry_sleep: "15"
smoke_models_wait_sleep: "30"
smoke_models_wait_attempts: "3"
include_aoai_host_check: true
environment: prod
secrets:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
GRAFANA_URL: ${{ secrets.GRAFANA_URL }}

# Legacy inline deployments removed - see deploy-environment.yaml
1 change: 1 addition & 0 deletions docs/architecture/02-container-architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ flowchart TB

C1 --> G1
C2 --> G1
C3 --> G1
C4 --> G1

G1 --> G2
Expand Down
27 changes: 26 additions & 1 deletion docs/architecture/04-observability-telemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ flowchart TB

subgraph Ingest
I1[OpenTelemetry]
I2[Azure Monitor]
I2[Application Insights]
I3[Blob Export]
I4[Prometheus]
end

subgraph Analytics
Expand All @@ -62,15 +63,39 @@ flowchart TB
S4 --> I1
S5 --> I2
S6 --> I3
S5 --> I4

I1 --> A1
I2 --> A1
I3 --> A1
I4 --> V1

A1 --> V1
V1 --> V2
```

### Telemetry Sinks

LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint that scrapes application metrics. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.

The primary telemetry sinks are:

- **OpenTelemetry**: Traces and spans
- **Application Insights**: Azure Monitor implementation using `APPLICATIONINSIGHTS_CONNECTION_STRING` env var for OTEL exporter
- **Blob Export**: Raw event storage
- **Prometheus**: Application metrics via `/metrics` endpoint

## Retention Policies

Application Insights retention defaults:

- **Production**: 90 days
- **Non-production (dev/staging)**: 30 days

These are environment-specific settings configured in the Application Insights resource. Operators can adjust retention in the Azure Portal under Application Insights resource settings.

Include retention expectations in operational runbooks to align cost and data availability expectations.

## Key Metrics

### Gateway
Expand Down
10 changes: 5 additions & 5 deletions docs/architecture/reference/matrix-gateway.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ flowchart TD

```json
{
"intent": "code_review",
"request_id": "req_abc123",
"label": "code_review",
"complexity": "medium",
"tool_candidate": true,
"recommended_target": "codeflow-engine",
"recommended_model_tier": "small",
"escalation_required": false,
"recommended_tier": "slm",
"cacheable": true,
"confidence": 0.93
}
```
Expand Down Expand Up @@ -91,7 +91,7 @@ interface PolicyScreenOutput {
| Condition | Action |
| -------------------------------- | ---------------------- |
| `policy-screen.allowed == false` | Block or redact |
| `confidence < 0.70` | Escalate to LLM |
| `confidence < 0.75` | Escalate to LLM |
| Tool suggested but no mapping | Send to general LLM |
| Tagging fails | Mark telemetry partial |

Expand Down
4 changes: 2 additions & 2 deletions docs/architecture/reference/matrix-rooivalk.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ flowchart TD

## CRITICAL: SLM is for Reporting Only

```
```text
┌─────────────────────────────────────────────────────────┐
│ IMPORTANT - SAFETY BOUNDARY │
├─────────────────────────────────────────────────────────┤
Expand Down Expand Up @@ -109,7 +109,7 @@ interface SuggestSopOutput {
```typescript
const DEFAULT_THRESHOLDS = {
operator_summary: { direct_use: 0.8, facts_only: 0.65 },
sop_suggestion: { direct_suggest: 0.78, manual_lookup: 0.65 },
sop_suggestion: { direct_suggest: 0.8, manual_lookup: 0.65 },
};
```

Expand Down
8 changes: 4 additions & 4 deletions docs/architecture/reference/slm-implementation-matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ This document provides a repo-by-repo implementation matrix showing SLM endpoint

## Documentation Structure

```
```text
reference/
├── slm-implementation-matrix.md # This file
├── matrix-gateway.md # AI Gateway details
├── matrix-cognitive-mesh.md # Cognitive Mesh details
├── matrix-codeflow.md # CodeFlow Engine details
├── matrix-agentkit.md # AgentKit Forge details
├── matrix-codeflow.md # CodeFlow Engine details
├── matrix-agentkit.md # AgentKit Forge details
├── matrix-rooivalk.md # PhoenixRooivalk details
└── matrix-mystira.md # Mystira details
```
Expand Down Expand Up @@ -251,7 +251,7 @@ This is a practical role map, not a vendor mandate.

### Standard Fallback Pattern

```
```text
1. SLM timeout → Deterministic rules
2. Low confidence → LLM escalation
3. Safety critical → Block immediately
Expand Down
8 changes: 4 additions & 4 deletions docs/architecture/reference/slm-management-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Maintain a tiered model portfolio:

Implement cost controls at each layer:

```
```text
Cost Control Layers
┌─────────────────────────────────────┐
│ 1. Budget caps per project │
Expand Down Expand Up @@ -172,7 +172,7 @@ async def security_pipeline(request: Request) -> SecurityResult:

### Fallback Hierarchy

```
```text
Request
▼ Primary SLM
Expand Down Expand Up @@ -216,7 +216,7 @@ Request

### Model Lifecycle

```
```text
Discovery → Testing → Staging → Production → Deprecated → Retired
│ │ │ │ │
▼ ▼ ▼ ▼ ▼
Expand Down Expand Up @@ -271,4 +271,4 @@ Discovery → Testing → Staging → Production → Deprecated → Retired
6. [ ] Define fallback hierarchies
7. [ ] Implement observability stack
8. [ ] Document model lifecycle process
9. [ ] **Add explicit safety boundary for PhoenixRooivalk**
9. [x] Add explicit safety boundary for PhoenixRooivalk
16 changes: 8 additions & 8 deletions docs/architecture/reference/strategic/07-deployment-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ flowchart TD

## Decision Matrix

| System | Best SLM Jobs | Less Suitable |
| --------------- | -------------------------- | ------------------------------ |
| AI Gateway | routing, screening, cost | Nuanced synthesis |
| Cognitive Mesh | routing, decomposition | Final judgment |
| CodeFlow | PR triage, log analysis | Root cause across dependencies |
| AgentKit | tool selection, extraction | Multi-step planning |
| PhoenixRooivalk | summaries, alerts | Sole threat authority |
| Mystira | safety, continuity | Rich narrative |
| System | Best SLM Jobs | Less Suitable |
| --------------- | ------------------------------------------------------------- | ------------------------------ |
| AI Gateway | routing, screening, cost | Nuanced synthesis |
| Cognitive Mesh | routing, decomposition | Final judgment |
| CodeFlow | PR classification, CI failure triage, release-note extraction | Root cause across dependencies |
| AgentKit | tool selection, extraction | Multi-step planning |
| PhoenixRooivalk | summaries, alerts | Sole threat authority |
| Mystira | safety, continuity | Rich narrative |

## Practical Gateway Flow

Expand Down
2 changes: 1 addition & 1 deletion docs/architecture/systems/agentkit-forge.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ AgentKit Forge builds AI agents and orchestration workflows. SLMs help when agen

## Architecture

```
```text
Agent Task
Expand Down
Loading