Skip to content

docs: update planning doc with implementation status and infrastructure decisions #100

docs: update planning doc with implementation status and infrastructure decisions

docs: update planning doc with implementation status and infrastructure decisions #100

Workflow file for this run

name: Deploy AI Gateway
on:
push:
branches:
- main
pull_request:
branches:
- dev
- main
workflow_dispatch:
permissions:
id-token: write
contents: read
env:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
TF_VAR_secrets_expiration_date: "2027-03-31T00:00:00Z"
# Dashboard: image and optional Grafana URL (set DASHBOARD_CONTAINER_IMAGE as a
# repository/environment variable to pin a specific digest; falls back to :latest)
TF_VAR_dashboard_container_image: ${{ vars.DASHBOARD_CONTAINER_IMAGE || 'ghcr.io/phoenixvc/ai-gateway-dashboard:latest' }}
TF_VAR_state_service_container_image: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE || '' }}
TF_VAR_state_service_shared_token: ${{ secrets.STATE_SERVICE_SHARED_TOKEN || '' }}
TF_VAR_state_service_registry_username: ${{ vars.STATE_SERVICE_REGISTRY_USERNAME || github.repository_owner }}
TF_VAR_state_service_registry_password: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD || '' }}
TF_VAR_grafana_url: ${{ secrets.GRAFANA_URL || '' }}
jobs:
plan:
# PR into dev → dev | PR into main + label 'run-uat' → uat | Push to main/workflow_dispatch → prod
# Skip plan for PRs from forks (no repo secrets; avoids AADSTS700213)
# Runtime UAT toggle: add PR label 'run-uat' to enable UAT on PRs into main.
if: |
(github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false) &&
(
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
(github.event_name == 'workflow_dispatch') ||
(github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev') ||
(github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-uat'))
)
name: Plan ${{ matrix.environment }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
environment: ${{ (github.event_name == 'workflow_dispatch' && fromJSON('["prod"]')) || (github.event_name == 'push' && github.ref == 'refs/heads/main' && fromJSON('["prod"]')) || (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev' && fromJSON('["dev"]')) || (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-uat') && fromJSON('["uat"]')) || fromJSON('["prod"]') }}
environment: ${{ matrix.environment }}
defaults:
run:
working-directory: infra/env/${{ matrix.environment }}
env:
# Terraform Variables (Environment Specific)
TF_VAR_env: "${{ matrix.environment }}"
TF_VAR_projname: "aigateway"
TF_VAR_location: "southafricanorth"
TF_VAR_location_short: "san"
# Terraform Variables (Secrets & Config) - sourced from per-environment GitHub secrets
TF_VAR_azure_openai_endpoint: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
TF_VAR_azure_openai_api_key: ${{ secrets.AZURE_OPENAI_API_KEY }}
TF_VAR_azure_openai_embedding_endpoint: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
TF_VAR_azure_openai_embedding_api_key: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
TF_VAR_gateway_key: ${{ secrets.AIGATEWAY_KEY }}
# Model Configuration (environment-specific to match deploy jobs)
TF_VAR_codex_model: ${{ matrix.environment == 'prod' && 'gpt-4o' || 'gpt-5.3-codex' }}
TF_VAR_codex_api_version: ${{ matrix.environment == 'prod' && '2025-01-01-preview' || '2025-04-01-preview' }}
TF_VAR_embedding_deployment: "text-embedding-3-large"
TF_VAR_embeddings_api_version: "2024-02-01"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Quickcheck required secrets and config
shell: bash
run: |
set -euo pipefail
missing=0
required=(
AZURE_CLIENT_ID
AZURE_TENANT_ID
AZURE_SUBSCRIPTION_ID
TF_BACKEND_RG
TF_BACKEND_SA
TF_BACKEND_CONTAINER
TF_VAR_azure_openai_endpoint
TF_VAR_azure_openai_api_key
TF_VAR_gateway_key
)
for v in "${required[@]}"; do
if [ -z "${!v:-}" ]; then
echo "::error::Missing required value: ${v}"
missing=1
else
echo "${v}=SET"
fi
done
echo "TF_VAR_env=${TF_VAR_env:-unset}"
echo "TF_VAR_embedding_deployment=${TF_VAR_embedding_deployment:-unset}"
echo "TF_VAR_codex_model=${TF_VAR_codex_model:-unset}"
if [ -n "${TF_VAR_azure_openai_endpoint:-}" ]; then
echo "Azure OpenAI endpoint=${TF_VAR_azure_openai_endpoint}"
endpoint_host=$(echo "${TF_VAR_azure_openai_endpoint}" | sed -E 's#^https?://([^/]+)/?.*$#\1#')
echo "Azure OpenAI endpoint host=${endpoint_host}"
fi
if [ "${missing}" -ne 0 ]; then
exit 1
fi
- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ env.AZURE_CLIENT_ID }}
tenant-id: ${{ env.AZURE_TENANT_ID }}
subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_version: 1.14.6
- name: Terraform Init
run: |
terraform init \
-backend-config="resource_group_name=${TF_BACKEND_RG}" \
-backend-config="storage_account_name=${TF_BACKEND_SA}" \
-backend-config="container_name=${TF_BACKEND_CONTAINER}" \
-backend-config="key=${{ matrix.environment }}.terraform.tfstate"
- name: Terraform Plan
run: |
terraform plan -out=tfplan
- name: Upload Plan
uses: actions/upload-artifact@v4
with:
name: tfplan-${{ matrix.environment }}
path: infra/env/${{ matrix.environment }}/tfplan
retention-days: 1
deploy-dev:
name: Deploy dev
needs: plan
runs-on: ubuntu-latest
if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev'
environment: dev
defaults:
run:
working-directory: infra/env/dev
env:
TF_VAR_env: "dev"
TF_VAR_projname: "aigateway"
TF_VAR_location: "southafricanorth"
TF_VAR_location_short: "san"
TF_VAR_azure_openai_endpoint: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
TF_VAR_azure_openai_api_key: ${{ secrets.AZURE_OPENAI_API_KEY }}
TF_VAR_azure_openai_embedding_endpoint: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
TF_VAR_azure_openai_embedding_api_key: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
TF_VAR_gateway_key: ${{ secrets.AIGATEWAY_KEY }}
TF_VAR_codex_model: "gpt-5.3-codex"
TF_VAR_codex_api_version: "2025-04-01-preview"
TF_VAR_embedding_deployment: "text-embedding-3-large"
TF_VAR_embeddings_api_version: "2024-02-01"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Quickcheck required secrets and config
shell: bash
run: |
set -euo pipefail
missing=0
required=(
AZURE_CLIENT_ID
AZURE_TENANT_ID
AZURE_SUBSCRIPTION_ID
TF_BACKEND_RG
TF_BACKEND_SA
TF_BACKEND_CONTAINER
TF_VAR_azure_openai_endpoint
TF_VAR_azure_openai_api_key
TF_VAR_gateway_key
)
for v in "${required[@]}"; do
if [ -z "${!v:-}" ]; then
echo "::error::Missing required value: ${v}"
missing=1
else
echo "${v}=SET"
fi
done
echo "TF_VAR_env=${TF_VAR_env:-unset}"
echo "TF_VAR_embedding_deployment=${TF_VAR_embedding_deployment:-unset}"
echo "TF_VAR_codex_model=${TF_VAR_codex_model:-unset}"
if [ -n "${TF_VAR_azure_openai_endpoint:-}" ]; then
echo "Azure OpenAI endpoint=${TF_VAR_azure_openai_endpoint}"
endpoint_host=$(echo "${TF_VAR_azure_openai_endpoint}" | sed -E 's#^https?://([^/]+)/?.*$#\1#')
echo "Azure OpenAI endpoint host=${endpoint_host}"
fi
if [ "${missing}" -ne 0 ]; then
exit 1
fi
- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ env.AZURE_CLIENT_ID }}
tenant-id: ${{ env.AZURE_TENANT_ID }}
subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_version: 1.14.6
- name: Terraform Init
run: |
terraform init \
-backend-config="resource_group_name=${TF_BACKEND_RG}" \
-backend-config="storage_account_name=${TF_BACKEND_SA}" \
-backend-config="container_name=${TF_BACKEND_CONTAINER}" \
-backend-config="key=dev.terraform.tfstate"
- name: Import existing Container App into Terraform state
uses: ./.github/actions/import-container-app
with:
projname: ${{ env.TF_VAR_projname }}
env: ${{ env.TF_VAR_env }}
location_short: ${{ env.TF_VAR_location_short }}
subscription_id: ${{ env.AZURE_SUBSCRIPTION_ID }}
terraform_working_directory: infra/env/dev
- name: Terraform Plan
run: |
terraform plan -out=tfplan
- name: Terraform Apply
run: |
terraform apply -auto-approve tfplan
- name: Get gateway URL
id: gw
run: echo "url=$(terraform output -raw gateway_url)" >> $GITHUB_OUTPUT
- name: Get dashboard URL
id: db
run: echo "url=$(terraform output -raw dashboard_url 2>/dev/null || true)" >> $GITHUB_OUTPUT
- name: Runtime diagnostics (Container App config)
shell: bash
run: |
set -euo pipefail
RG_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-rg-${TF_VAR_location_short}"
CA_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-ca-${TF_VAR_location_short}"
echo "Resource Group: ${RG_NAME}"
echo "Container App: ${CA_NAME}"
echo "Gateway URL (terraform output): ${{ steps.gw.outputs.url }}"
echo "Latest revision:"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.latestRevisionName" -o tsv
echo "Active revisions (name, active, created):"
az containerapp revision list -g "${RG_NAME}" -n "${CA_NAME}" --query "[].{name:name,active:properties.active,created:properties.createdTime}" -o table
echo "Configured env vars for LiteLLM secret refs:"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_AZURE_OPENAI_API_KEY' || name=='LITELLM_GATEWAY_KEY']" -o json
echo "Configured secret sources (names + key vault URLs):"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.configuration.secrets[].{name:name,keyVaultUrl:keyVaultUrl}" -o table
echo "LITELLM_CONFIG_CONTENT excerpt (first 2000 chars):"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_CONFIG_CONTENT'].value | [0]" -o tsv | head -c 2000 || true
echo
- name: Integration test (Azure OpenAI backend)
shell: bash
env:
AZURE_OPENAI_ENDPOINT: ${{ env.TF_VAR_azure_openai_endpoint }}
AZURE_OPENAI_API_KEY: ${{ env.TF_VAR_azure_openai_api_key }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ env.TF_VAR_azure_openai_embedding_endpoint }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ env.TF_VAR_azure_openai_embedding_api_key }}
AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ env.TF_VAR_embedding_deployment }}
AZURE_OPENAI_API_VERSION: ${{ env.TF_VAR_embeddings_api_version }}
AZURE_OPENAI_CHAT_DEPLOYMENT: "gpt-4.1"
AZURE_OPENAI_CHAT_API_VERSION: ${{ env.TF_VAR_codex_api_version }}
AZURE_OPENAI_CODEX_MODEL: ${{ env.TF_VAR_codex_model }}
working-directory: ${{ github.workspace }}
run: python3 scripts/integration_test.py
- name: Smoke test gateway (embeddings + responses)
uses: ./.github/actions/smoke-test-gateway
with:
gateway_url: ${{ steps.gw.outputs.url }}
gateway_key: ${{ secrets.AIGATEWAY_KEY }}
embedding_model: ${{ env.TF_VAR_embedding_deployment }}
codex_model: ${{ env.TF_VAR_codex_model }}
aoai_endpoint: ${{ env.TF_VAR_azure_openai_endpoint }}
aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
max_attempts: "3"
retry_sleep: "10"
- name: Smoke test shared state API (dashboard proxy)
if: env.TF_VAR_state_service_container_image != ''
shell: bash
run: |
set -euo pipefail
DASHBOARD_URL="${{ steps.db.outputs.url }}"
TEST_USER="ci-smoke-${TF_VAR_env}"
curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/catalog" > /tmp/catalog.json
curl -fsS --connect-timeout 5 --max-time 15 -X PUT "${DASHBOARD_URL}/api/state/selection" \
-H "Content-Type: application/json" \
-H "X-User-Id: ${TEST_USER}" \
-d '{"enabled":true,"selected_model":"'"${TF_VAR_codex_model}"'"}' > /tmp/selection-put.json
curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/selection" \
-H "X-User-Id: ${TEST_USER}" > /tmp/selection-get.json
jq -e '.enabled == true' /tmp/selection-get.json > /dev/null
deploy-uat:
name: Deploy uat
needs: plan
runs-on: ubuntu-latest
if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-uat')
environment: uat
defaults:
run:
working-directory: infra/env/uat
env:
TF_VAR_env: "uat"
TF_VAR_projname: "aigateway"
TF_VAR_location: "southafricanorth"
TF_VAR_location_short: "san"
TF_VAR_azure_openai_endpoint: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
TF_VAR_azure_openai_api_key: ${{ secrets.AZURE_OPENAI_API_KEY }}
TF_VAR_azure_openai_embedding_endpoint: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
TF_VAR_azure_openai_embedding_api_key: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
TF_VAR_gateway_key: ${{ secrets.AIGATEWAY_KEY }}
TF_VAR_codex_model: "gpt-5.3-codex"
TF_VAR_codex_api_version: "2025-04-01-preview"
TF_VAR_embedding_deployment: "text-embedding-3-large"
TF_VAR_embeddings_api_version: "2024-02-01"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Quickcheck required secrets and config
shell: bash
run: |
set -euo pipefail
missing=0
required=(
AZURE_CLIENT_ID
AZURE_TENANT_ID
AZURE_SUBSCRIPTION_ID
TF_BACKEND_RG
TF_BACKEND_SA
TF_BACKEND_CONTAINER
TF_VAR_azure_openai_endpoint
TF_VAR_azure_openai_api_key
TF_VAR_gateway_key
)
for v in "${required[@]}"; do
if [ -z "${!v:-}" ]; then
echo "::error::Missing required value: ${v}"
missing=1
else
echo "${v}=SET"
fi
done
echo "TF_VAR_env=${TF_VAR_env:-unset}"
echo "TF_VAR_embedding_deployment=${TF_VAR_embedding_deployment:-unset}"
echo "TF_VAR_codex_model=${TF_VAR_codex_model:-unset}"
if [ -n "${TF_VAR_azure_openai_endpoint:-}" ]; then
echo "Azure OpenAI endpoint=${TF_VAR_azure_openai_endpoint}"
endpoint_host=$(echo "${TF_VAR_azure_openai_endpoint}" | sed -E 's#^https?://([^/]+)/?.*$#\1#')
echo "Azure OpenAI endpoint host=${endpoint_host}"
fi
if [ "${missing}" -ne 0 ]; then
exit 1
fi
- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ env.AZURE_CLIENT_ID }}
tenant-id: ${{ env.AZURE_TENANT_ID }}
subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_version: 1.14.6
- name: Terraform Init
run: |
terraform init \
-backend-config="resource_group_name=${TF_BACKEND_RG}" \
-backend-config="storage_account_name=${TF_BACKEND_SA}" \
-backend-config="container_name=${TF_BACKEND_CONTAINER}" \
-backend-config="key=uat.terraform.tfstate"
- name: Import existing Container App into Terraform state
uses: ./.github/actions/import-container-app
with:
projname: ${{ env.TF_VAR_projname }}
env: ${{ env.TF_VAR_env }}
location_short: ${{ env.TF_VAR_location_short }}
subscription_id: ${{ env.AZURE_SUBSCRIPTION_ID }}
terraform_working_directory: infra/env/uat
- name: Terraform Plan
run: |
terraform plan -out=tfplan
- name: Terraform Apply
run: |
terraform apply -auto-approve tfplan
- name: Get gateway URL
id: gw
run: echo "url=$(terraform output -raw gateway_url)" >> $GITHUB_OUTPUT
- name: Get dashboard URL
id: db
run: echo "url=$(terraform output -raw dashboard_url 2>/dev/null || true)" >> $GITHUB_OUTPUT
- name: Runtime diagnostics (Container App config)
shell: bash
run: |
set -euo pipefail
RG_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-rg-${TF_VAR_location_short}"
CA_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-ca-${TF_VAR_location_short}"
echo "Resource Group: ${RG_NAME}"
echo "Container App: ${CA_NAME}"
echo "Gateway URL (terraform output): ${{ steps.gw.outputs.url }}"
echo "Latest revision:"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.latestRevisionName" -o tsv
echo "Active revisions (name, active, created):"
az containerapp revision list -g "${RG_NAME}" -n "${CA_NAME}" --query "[].{name:name,active:properties.active,created:properties.createdTime}" -o table
echo "Configured env vars for LiteLLM secret refs:"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_AZURE_OPENAI_API_KEY' || name=='LITELLM_GATEWAY_KEY']" -o json
echo "Configured secret sources (names + key vault URLs):"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.configuration.secrets[].{name:name,keyVaultUrl:keyVaultUrl}" -o table
echo "LITELLM_CONFIG_CONTENT excerpt (first 2000 chars):"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_CONFIG_CONTENT'].value | [0]" -o tsv | head -c 2000 || true
echo
- name: Integration test (Azure OpenAI backend)
shell: bash
env:
AZURE_OPENAI_ENDPOINT: ${{ env.TF_VAR_azure_openai_endpoint }}
AZURE_OPENAI_API_KEY: ${{ env.TF_VAR_azure_openai_api_key }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ env.TF_VAR_azure_openai_embedding_endpoint }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ env.TF_VAR_azure_openai_embedding_api_key }}
AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ env.TF_VAR_embedding_deployment }}
AZURE_OPENAI_API_VERSION: ${{ env.TF_VAR_embeddings_api_version }}
AZURE_OPENAI_CHAT_DEPLOYMENT: "gpt-4.1"
AZURE_OPENAI_CHAT_API_VERSION: ${{ env.TF_VAR_codex_api_version }}
AZURE_OPENAI_CODEX_MODEL: ${{ env.TF_VAR_codex_model }}
working-directory: ${{ github.workspace }}
run: python3 scripts/integration_test.py
- name: Smoke test gateway (embeddings + responses)
uses: ./.github/actions/smoke-test-gateway
with:
gateway_url: ${{ steps.gw.outputs.url }}
gateway_key: ${{ secrets.AIGATEWAY_KEY }}
embedding_model: ${{ env.TF_VAR_embedding_deployment }}
codex_model: ${{ env.TF_VAR_codex_model }}
aoai_endpoint: ${{ env.TF_VAR_azure_openai_endpoint }}
aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
max_attempts: "3"
retry_sleep: "10"
- name: Smoke test shared state API (dashboard proxy)
if: env.TF_VAR_state_service_container_image != ''
shell: bash
run: |
set -euo pipefail
DASHBOARD_URL="${{ steps.db.outputs.url }}"
TEST_USER="ci-smoke-${TF_VAR_env}"
curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/catalog" > /tmp/catalog.json
curl -fsS --connect-timeout 5 --max-time 15 -X PUT "${DASHBOARD_URL}/api/state/selection" \
-H "Content-Type: application/json" \
-H "X-User-Id: ${TEST_USER}" \
-d '{"enabled":true,"selected_model":"'"${TF_VAR_codex_model}"'"}' > /tmp/selection-put.json
curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/selection" \
-H "X-User-Id: ${TEST_USER}" > /tmp/selection-get.json
jq -e '.enabled == true' /tmp/selection-get.json > /dev/null
deploy-prod:
name: Deploy prod
needs: plan
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
environment: prod
defaults:
run:
working-directory: infra/env/prod
env:
TF_VAR_env: "prod"
TF_VAR_projname: "aigateway"
TF_VAR_location: "southafricanorth"
TF_VAR_location_short: "san"
TF_VAR_azure_openai_endpoint: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
TF_VAR_azure_openai_api_key: ${{ secrets.AZURE_OPENAI_API_KEY }}
TF_VAR_azure_openai_embedding_endpoint: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
TF_VAR_azure_openai_embedding_api_key: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
TF_VAR_gateway_key: ${{ secrets.AIGATEWAY_KEY }}
TF_VAR_codex_model: "gpt-4o"
TF_VAR_codex_api_version: "2025-01-01-preview"
TF_VAR_embedding_deployment: "text-embedding-3-large"
TF_VAR_embeddings_api_version: "2024-02-01"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Quickcheck required secrets and config
shell: bash
run: |
set -euo pipefail
missing=0
required=(
AZURE_CLIENT_ID
AZURE_TENANT_ID
AZURE_SUBSCRIPTION_ID
TF_BACKEND_RG
TF_BACKEND_SA
TF_BACKEND_CONTAINER
TF_VAR_azure_openai_endpoint
TF_VAR_azure_openai_api_key
TF_VAR_gateway_key
)
for v in "${required[@]}"; do
if [ -z "${!v:-}" ]; then
echo "::error::Missing required value: ${v}"
missing=1
else
echo "${v}=SET"
fi
done
echo "TF_VAR_env=${TF_VAR_env:-unset}"
echo "TF_VAR_embedding_deployment=${TF_VAR_embedding_deployment:-unset}"
echo "TF_VAR_codex_model=${TF_VAR_codex_model:-unset}"
if [ -n "${TF_VAR_azure_openai_endpoint:-}" ]; then
echo "Azure OpenAI endpoint=${TF_VAR_azure_openai_endpoint}"
endpoint_host=$(echo "${TF_VAR_azure_openai_endpoint}" | sed -E 's#^https?://([^/]+)/?.*$#\1#')
echo "Azure OpenAI endpoint host=${endpoint_host}"
if [ -n "${EXPECTED_AOAI_ENDPOINT_HOST:-}" ] && [ "${endpoint_host}" != "${EXPECTED_AOAI_ENDPOINT_HOST}" ]; then
echo "::error::Prod AOAI endpoint host mismatch. Expected '${EXPECTED_AOAI_ENDPOINT_HOST}', got '${endpoint_host}'. Check environment secret AZURE_OPENAI_ENDPOINT."
missing=1
fi
fi
if [ "${missing}" -ne 0 ]; then
exit 1
fi
- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ env.AZURE_CLIENT_ID }}
tenant-id: ${{ env.AZURE_TENANT_ID }}
subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_version: 1.14.6
- name: Terraform Init
run: |
terraform init \
-backend-config="resource_group_name=${TF_BACKEND_RG}" \
-backend-config="storage_account_name=${TF_BACKEND_SA}" \
-backend-config="container_name=${TF_BACKEND_CONTAINER}" \
-backend-config="key=prod.terraform.tfstate"
- name: Import existing Container App into Terraform state
uses: ./.github/actions/import-container-app
with:
projname: ${{ env.TF_VAR_projname }}
env: ${{ env.TF_VAR_env }}
location_short: ${{ env.TF_VAR_location_short }}
subscription_id: ${{ env.AZURE_SUBSCRIPTION_ID }}
terraform_working_directory: infra/env/prod
- name: Terraform Plan
run: |
terraform plan -out=tfplan
- name: Terraform Apply
run: |
terraform apply -auto-approve tfplan
- name: Get gateway URL
id: gw
run: echo "url=$(terraform output -raw gateway_url)" >> $GITHUB_OUTPUT
- name: Get dashboard URL
id: db
run: echo "url=$(terraform output -raw dashboard_url 2>/dev/null || true)" >> $GITHUB_OUTPUT
- name: Runtime diagnostics (Container App config)
shell: bash
run: |
set -euo pipefail
RG_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-rg-${TF_VAR_location_short}"
CA_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-ca-${TF_VAR_location_short}"
echo "Resource Group: ${RG_NAME}"
echo "Container App: ${CA_NAME}"
echo "Gateway URL (terraform output): ${{ steps.gw.outputs.url }}"
echo "Latest revision:"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.latestRevisionName" -o tsv
echo "Active revisions (name, active, created):"
az containerapp revision list -g "${RG_NAME}" -n "${CA_NAME}" --query "[].{name:name,active:properties.active,created:properties.createdTime}" -o table
echo "Configured env vars for LiteLLM secret refs:"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_AZURE_OPENAI_API_KEY' || name=='LITELLM_GATEWAY_KEY']" -o json
echo "Configured secret sources (names + key vault URLs):"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.configuration.secrets[].{name:name,keyVaultUrl:keyVaultUrl}" -o table
echo "LITELLM_CONFIG_CONTENT excerpt (first 2000 chars):"
az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_CONFIG_CONTENT'].value | [0]" -o tsv | head -c 2000 || true
echo
- name: Integration test (Azure OpenAI backend)
shell: bash
env:
AZURE_OPENAI_ENDPOINT: ${{ env.TF_VAR_azure_openai_endpoint }}
AZURE_OPENAI_API_KEY: ${{ env.TF_VAR_azure_openai_api_key }}
AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ env.TF_VAR_azure_openai_embedding_endpoint }}
AZURE_OPENAI_EMBEDDING_API_KEY: ${{ env.TF_VAR_azure_openai_embedding_api_key }}
AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ env.TF_VAR_embedding_deployment }}
AZURE_OPENAI_API_VERSION: ${{ env.TF_VAR_embeddings_api_version }}
AZURE_OPENAI_CHAT_DEPLOYMENT: "gpt-4.1"
AZURE_OPENAI_CHAT_API_VERSION: ${{ env.TF_VAR_codex_api_version }}
AZURE_OPENAI_CODEX_MODEL: ${{ env.TF_VAR_codex_model }}
working-directory: ${{ github.workspace }}
run: python3 scripts/integration_test.py
- name: Smoke test gateway (embeddings + responses)
uses: ./.github/actions/smoke-test-gateway
with:
gateway_url: ${{ steps.gw.outputs.url }}
gateway_key: ${{ secrets.AIGATEWAY_KEY }}
embedding_model: ${{ env.TF_VAR_embedding_deployment }}
codex_model: ${{ env.TF_VAR_codex_model }}
aoai_endpoint: ${{ env.TF_VAR_azure_openai_endpoint }}
aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
max_attempts: "3"
retry_sleep: "15" # prod: longer cold-start; allow more time between retries
models_wait_attempts: "3" # prod: wait longer for LiteLLM to register healthy deployments
models_wait_sleep: "30"
- name: Smoke test shared state API (dashboard proxy)
if: env.TF_VAR_state_service_container_image != ''
shell: bash
run: |
set -euo pipefail
DASHBOARD_URL="${{ steps.db.outputs.url }}"
TEST_USER="ci-smoke-${TF_VAR_env}"
curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/catalog" > /tmp/catalog.json
curl -fsS --connect-timeout 5 --max-time 15 -X PUT "${DASHBOARD_URL}/api/state/selection" \
-H "Content-Type: application/json" \
-H "X-User-Id: ${TEST_USER}" \
-d '{"enabled":true,"selected_model":"'"${TF_VAR_codex_model}"'"}' > /tmp/selection-put.json
curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/selection" \
-H "X-User-Id: ${TEST_USER}" > /tmp/selection-get.json
jq -e '.enabled == true' /tmp/selection-get.json > /dev/null