Skip to content

Merge pull request #72 from mariusiordan/staging #72

Merge pull request #72 from mariusiordan/staging

Merge pull request #72 from mariusiordan/staging #72

Workflow file for this run

# .github/workflows/deploy.yml
# Trigger: push to main branch
#
# Pipeline 3 - Production Deployment (Blue/Green Strategy)
#
# This pipeline does NOT rebuild images - it promotes the image
# already built, tested, and validated in the Staging pipeline.
# Tests run in test.yml on PR - no need to repeat them here.
#
# Flow:
# promote-image
# └── approve (manual)
# └── detect-environment
# └── deploy-to-idle
# └── smoke-tests
# └── switch-monitor-rollback
# └── aws-update (suspended - activate manually for DR demo)
name: Deploy
on:
push:
branches:
- main
jobs:
# ============================================================
# JOB 1 - Promote Staging Image to Production
# The image was already built and tested in the Staging pipeline
# This job retags the validated ':staging' image with a production tag
# No rebuild occurs - we deploy exactly what was tested on staging
# ============================================================
promote-image:
name: Promote Staging Image to Production
runs-on: ubuntu-24.04
permissions:
contents: read
packages: write
outputs:
image_tag: ${{ steps.tag.outputs.tag }}
steps:
- uses: actions/checkout@v4
# deploy.yml - Generate production image tag
- name: Generate production image tag
id: tag
run: |
# Application version — must match the version set in staging.yml
# Always update both files together when releasing a new version
#
# This tag is applied when promoting the ':staging' image to production
# The same image built in staging.yml is retagged here — no rebuild occurs
# After a successful 10-minute health check, this tag is promoted to ':latest'
VERSION="v1.0"
# Git SHA — links this deployment directly to the source commit
# Allows instant traceability: image tag → git commit → code changes
SHA=$(git rev-parse --short HEAD)
# Final tag format: v{MAJOR}.{MINOR}-sha-{git-sha}
# Example: v1.0-sha-abc1234
TAG="${VERSION}-sha-${SHA}"
echo "tag=${TAG}" >> $GITHUB_OUTPUT
echo "Production tag: ${TAG}"
- name: Login to GitHub Container Registry
run: |
echo "Authenticating with GitHub Container Registry..."
echo "${{ secrets.GHCR_TOKEN }}" | docker login ghcr.io -u mariusiordan --password-stdin
echo "✅ OK - Authentication successful"
- name: Promote frontend image from staging to production
run: |
echo "Promoting frontend image..."
echo "Source: ghcr.io/mariusiordan/silverbank-frontend:staging"
echo "Target: ghcr.io/mariusiordan/silverbank-frontend:${{ steps.tag.outputs.tag }}"
docker buildx imagetools create \
-t ghcr.io/mariusiordan/silverbank-frontend:${{ steps.tag.outputs.tag }} \
ghcr.io/mariusiordan/silverbank-frontend:staging
echo "✅ OK - Frontend image promoted"
- name: Promote backend image from staging to production
run: |
echo "Promoting backend image..."
echo "Source: ghcr.io/mariusiordan/silverbank-backend:staging"
echo "Target: ghcr.io/mariusiordan/silverbank-backend:${{ steps.tag.outputs.tag }}"
docker buildx imagetools create \
-t ghcr.io/mariusiordan/silverbank-backend:${{ steps.tag.outputs.tag }} \
ghcr.io/mariusiordan/silverbank-backend:staging
echo "✅ OK - Backend image promoted"
- name: Promotion summary
run: |
echo "✅ OK - Images promoted from staging to production"
echo "Frontend: ghcr.io/mariusiordan/silverbank-frontend:${{ steps.tag.outputs.tag }}"
echo "Backend: ghcr.io/mariusiordan/silverbank-backend:${{ steps.tag.outputs.tag }}"
echo "Note: ':latest' will only be applied after successful 10-minute health check"
# ============================================================
# JOB 2 - Manual Approval Gate
# Requires a release manager to approve before production deploy
# This is the last checkpoint before touching production
# ============================================================
approve:
name: Manual Approval
runs-on: ubuntu-24.04
needs: promote-image
environment: production
steps:
- name: Approval granted
run: |
echo "✅ OK - Manual approval granted"
echo "Release manager has approved production deployment"
echo "Proceeding with Blue/Green deployment..."
# ============================================================
# JOB 3 - Detect Active Environment
# Reads /opt/current-env on edge nginx to determine which
# environment (Blue or Green) is currently serving traffic
# The idle environment will receive the new deployment
# ============================================================
detect-environment:
name: Detect Active Environment
runs-on: self-hosted
needs: approve
outputs:
active_env: ${{ steps.detect.outputs.active_env }}
idle_env: ${{ steps.detect.outputs.idle_env }}
steps:
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "StrictHostKeyChecking no" >> ~/.ssh/config
- name: Detect active environment
id: detect
run: |
echo "Reading active environment from edge nginx..."
ACTIVE=$(ssh [email protected] "cat /opt/current-env 2>/dev/null || echo 'green'")
if [ "$ACTIVE" = "blue" ]; then
IDLE="green"
else
IDLE="blue"
fi
echo "active_env=${ACTIVE}" >> $GITHUB_OUTPUT
echo "idle_env=${IDLE}" >> $GITHUB_OUTPUT
echo "✅ OK - Active environment: ${ACTIVE}"
echo "✅ OK - Idle environment (deploy target): ${IDLE}"
# ============================================================
# JOB 4 - Backup Database to S3
# pg_dump before touching production
# Backup is linked to the image tag for traceability
# Stored in S3 — survives a full Proxmox failure
# ============================================================
backup-database:
name: Backup Database to S3
runs-on: self-hosted
needs: [promote-image, detect-environment]
steps:
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "StrictHostKeyChecking no" >> ~/.ssh/config
echo "✅ OK - SSH configured"
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-2
- name: pg_dump and upload to S3
run: |
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
TAG="${{ needs.promote-image.outputs.image_tag }}"
BACKUP_FILE="backup_${TIMESTAMP}_${TAG}.sql"
echo "Creating database backup..."
echo "Tag: ${TAG}"
echo "File: ${BACKUP_FILE}"
ssh [email protected] \
"docker exec postgres pg_dump -U devop_db appdb > /tmp/${BACKUP_FILE}"
echo "✅ OK - pg_dump complete"
scp [email protected]:/tmp/${BACKUP_FILE} /tmp/${BACKUP_FILE}
echo "✅ OK - Backup copied to runner"
/usr/local/bin/aws s3 cp /tmp/${BACKUP_FILE} \
s3://silverbank-tfstate-mariusiordan/db-backups/${BACKUP_FILE}
echo "✅ OK - Backup uploaded to S3"
echo "Location: s3://silverbank-tfstate-mariusiordan/db-backups/${BACKUP_FILE}"
ssh [email protected] "rm /tmp/${BACKUP_FILE}"
rm /tmp/${BACKUP_FILE}
echo "✅ OK - Cleanup complete"
# ============================================================
# JOB 5 - Deploy to Idle Environment
# Deploys the promoted production image to the idle environment
# Traffic is still flowing to the active environment at this point
# No user impact during this step
# ============================================================
deploy-to-idle:
name: Deploy to Idle Environment
runs-on: self-hosted
needs: [promote-image, detect-environment, backup-database]
steps:
- name: Checkout infrastructure repo
uses: actions/checkout@v4
with:
repository: mariusiordan/DevOps-final-project
token: ${{ secrets.GHCR_TOKEN }}
- name: Install Ansible
run: |
echo "Installing Ansible..."
pip install ansible --break-system-packages
echo "$HOME/.local/bin" >> $GITHUB_PATH
echo "✅ OK - Ansible installed"
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "StrictHostKeyChecking no" >> ~/.ssh/config
echo "✅ OK - SSH configured"
- name: Setup Ansible vault password
run: |
echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
chmod 600 ~/.vault-password
echo "✅ OK - Vault password configured"
- name: Deploy to idle environment
run: |
echo "Deploying to idle environment: ${{ needs.detect-environment.outputs.idle_env }}"
echo "Image tag: ${{ needs.promote-image.outputs.image_tag }}"
echo "Active environment keeps serving traffic during this step..."
cd proxmox-silverbank/ansible
ansible-playbook playbooks/deploy-idle.yml \
-e "app_tag=${{ needs.promote-image.outputs.image_tag }}" \
-e "idle_env=${{ needs.detect-environment.outputs.idle_env }}" \
-i inventory.ini
echo "✅ OK - Deployment to ${{ needs.detect-environment.outputs.idle_env }} complete"
# ============================================================
# JOB 6 - Smoke Tests
# Runs health checks directly on the idle environment
# Bypasses nginx - traffic still flows to active environment
# If smoke tests fail - deployment stops, no traffic switch occurs
# ============================================================
smoke-tests:
name: Smoke Tests
runs-on: self-hosted
needs: [promote-image, detect-environment, deploy-to-idle]
steps:
- name: Checkout infrastructure repo
uses: actions/checkout@v4
with:
repository: mariusiordan/DevOps-final-project
token: ${{ secrets.GHCR_TOKEN }}
- name: Install Ansible
run: |
pip install ansible --break-system-packages
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "StrictHostKeyChecking no" >> ~/.ssh/config
- name: Setup Ansible vault password
run: |
echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
chmod 600 ~/.vault-password
- name: Run smoke tests on idle environment
run: |
echo "Running smoke tests on idle environment: ${{ needs.detect-environment.outputs.idle_env }}"
echo "Bypassing nginx - testing directly on idle VM..."
cd proxmox-silverbank/ansible
ansible-playbook playbooks/smoke-tests.yml \
-e "idle_env=${{ needs.detect-environment.outputs.idle_env }}" \
-i inventory.ini
echo "✅ OK - Smoke tests passed on ${{ needs.detect-environment.outputs.idle_env }}"
# ============================================================
# JOB 7 - Switch Traffic
# Change nginx config to point to the newly deployed environment
# ============================================================
switch-traffic:
name: Switch Traffic
runs-on: self-hosted
needs: [promote-image, detect-environment, smoke-tests]
steps:
- name: Checkout infrastructure repo
uses: actions/checkout@v4
with:
repository: mariusiordan/DevOps-final-project
token: ${{ secrets.GHCR_TOKEN }}
- name: Install Ansible
run: |
pip install ansible --break-system-packages
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "StrictHostKeyChecking no" >> ~/.ssh/config
- name: Setup Ansible vault password
run: |
echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
chmod 600 ~/.vault-password
- name: Switch nginx traffic
run: |
echo "Switching traffic to: ${{ needs.detect-environment.outputs.idle_env }}"
cd proxmox-silverbank/ansible
ansible-playbook playbooks/switch-traffic.yml \
-e "idle_env=${{ needs.detect-environment.outputs.idle_env }}" \
-i inventory.ini
echo "✅ OK - Traffic switched to ${{ needs.detect-environment.outputs.idle_env }}"
# ============================================================
# JOB 8 - Monitor
# Monitor 10 minute after switch
# Output: stable=true/false for conditional rollback
# ============================================================
monitor:
name: Monitor (10 min)
runs-on: self-hosted
needs: [promote-image, detect-environment, switch-traffic]
outputs:
stable: ${{ steps.run-monitor.outputs.stable }}
steps:
- name: Checkout infrastructure repo
uses: actions/checkout@v4
with:
repository: mariusiordan/DevOps-final-project
token: ${{ secrets.GHCR_TOKEN }}
- name: Install Ansible
run: |
pip install ansible --break-system-packages
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "StrictHostKeyChecking no" >> ~/.ssh/config
- name: Setup Ansible vault password
run: |
echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
chmod 600 ~/.vault-password
- name: Run monitoring
id: run-monitor
run: |
cd proxmox-silverbank/ansible
if ansible-playbook playbooks/rollback.yml \
-e "app_tag=${{ needs.promote-image.outputs.image_tag }}" \
-e "new_env=${{ needs.detect-environment.outputs.idle_env }}" \
-e "previous_env=${{ needs.detect-environment.outputs.active_env }}" \
-i inventory.ini; then
echo "stable=true" >> $GITHUB_OUTPUT
echo "✅ OK - Deployment stable"
else
echo "stable=false" >> $GITHUB_OUTPUT
echo "❌ Monitoring failed - rollback needed"
fi
# ============================================================
# JOB 9 - Production Health Check
# Runs only if monitor detects stability (stable=true)
# ============================================================
production-health-check:
name: Production Health Check
runs-on: self-hosted
needs: monitor
if: needs.monitor.outputs.stable == 'true'
steps:
- name: Final health check
run: |
sleep 10
curl -f http://192.168.7.50/api/health || exit 1
echo "✅ OK - Production is healthy"
echo "✅ OK - Deployment complete"
# ============================================================
# JOB 10 - Rollback
# Runs only if monitor detects instability (stable=false)
# Switches nginx back to the previous environment
# Verifies rollback success with a health check
# ============================================================
rollback:
name: Rollback to Previous Environment
runs-on: self-hosted
needs: [detect-environment, monitor]
if: needs.monitor.outputs.stable == 'false'
steps:
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
echo "StrictHostKeyChecking no" >> ~/.ssh/config
- name: Rollback nginx to previous environment
run: |
echo "Rolling back to: ${{ needs.detect-environment.outputs.active_env }}"
ssh [email protected] \
"sudo /opt/switch-backend.sh ${{ needs.detect-environment.outputs.active_env }}"
echo "✅ OK - Rolled back to ${{ needs.detect-environment.outputs.active_env }}"
- name: Verify rollback
run: |
sleep 10
curl -f http://192.168.7.50/api/health || exit 1
echo "✅ OK - Previous environment healthy after rollback"
- name: Fail pipeline
run: |
echo "❌ Deployment failed — rolled back to ${{ needs.detect-environment.outputs.active_env }}"
echo "❌ :latest was NOT updated"
exit 1
# ============================================================
# JOB 7 - Update AWS Disaster Recovery (if active) - suspended
# Checks if the AWS DR environment is running
# If active - pulls ':latest' image and redeploys
# If not active - skips silently
# AWS IPs are read dynamically from Terraform state in S3
# ============================================================
# ============================================================
# JOB 7 - Update AWS Disaster Recovery (suspended)
# Automatically updates AWS DR environment after every successful
# production deployment when AWS infrastructure is active.
#
# Suspended reason: AWS DR is activated manually for demo/DR purposes only.
# Cost optimization — NAT Gateway costs ~$33/month when running.
#
# To re-enable:
# 1. Uncomment this job
# 2. Ensure AWS infrastructure is provisioned (terraform apply)
# 3. Ensure GitHub secrets are set: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
#
# Flow when active:
# - Reads edge IP dynamically from Terraform state in S3
# - Checks if AWS DR is responding on /api/health
# - If active → pulls ':latest' image and redeploys via Ansible
# - If not active → skips silently, no action required
# ============================================================
# aws-update:
# name: Update AWS DR (if active)
# runs-on: self-hosted
# needs: switch-monitor-rollback
# steps:
# - name: Checkout infrastructure repo
# uses: actions/checkout@v4
# with:
# repository: mariusiordan/DevOps-final-project
# token: ${{ secrets.GHCR_TOKEN }}
#
# - name: Configure AWS credentials
# uses: aws-actions/configure-aws-credentials@v4
# with:
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
# aws-region: eu-west-2
#
# - name: Install unzip
# run: |
# sudo apt-get install -y unzip
# echo "✅ OK - unzip installed"
#
# - name: Setup Terraform
# uses: hashicorp/setup-terraform@v3
# with:
# terraform_wrapper: false
#
# - name: Get AWS IPs from Terraform state
# id: tf
# working-directory: aws-silverbank/terraform
# run: |
# echo "Reading infrastructure IPs from Terraform state in S3..."
# terraform init -reconfigure
# echo "edge_ip=$(terraform output -raw edge_elastic_ip)" >> $GITHUB_OUTPUT
# echo "blue_ip=$(terraform output -raw blue_private_ip)" >> $GITHUB_OUTPUT
# echo "db_ip=$(terraform output -raw db_private_ip)" >> $GITHUB_OUTPUT
# echo "✅ OK - IPs retrieved from Terraform state"
#
# - name: Check if AWS DR is active
# id: aws_check
# run: |
# echo "Checking if AWS Disaster Recovery environment is active..."
# if curl -sf --max-time 10 http://${{ steps.tf.outputs.edge_ip }}/api/health > /dev/null 2>&1; then
# echo "active=true" >> $GITHUB_OUTPUT
# echo "✅ OK - AWS DR is active - update will proceed"
# else
# echo "active=false" >> $GITHUB_OUTPUT
# echo "INFO - AWS DR is not active - skipping update"
# fi
#
# - name: Install Ansible
# if: steps.aws_check.outputs.active == 'true'
# run: |
# pip install ansible --break-system-packages
# echo "$HOME/.local/bin" >> $GITHUB_PATH
# echo "✅ OK - Ansible installed"
#
# - name: Setup SSH key
# if: steps.aws_check.outputs.active == 'true'
# run: |
# mkdir -p ~/.ssh
# echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
# chmod 600 ~/.ssh/id_ed25519
# echo "StrictHostKeyChecking no" >> ~/.ssh/config
# echo "✅ OK - SSH configured"
#
# - name: Setup Ansible vault password
# if: steps.aws_check.outputs.active == 'true'
# run: |
# echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
# chmod 600 ~/.vault-password
# echo "✅ OK - Vault password configured"
#
# - name: Generate AWS inventory
# if: steps.aws_check.outputs.active == 'true'
# run: |
# echo "Generating Ansible inventory from Terraform outputs..."
# cat > aws-silverbank/ansible/inventory-aws.ini << EOF
# [edge]
# edge-nginx ansible_host=${{ steps.tf.outputs.edge_ip }} ansible_user=ubuntu ansible_ssh_private_key_file=~/.ssh/id_ed25519 ansible_ssh_common_args='-o StrictHostKeyChecking=no'
#
# [prod]
# prod-vm1-BLUE ansible_host=${{ steps.tf.outputs.blue_ip }}
#
# [prod:vars]
# ansible_user=ubuntu
# ansible_ssh_private_key_file=~/.ssh/id_ed25519
# ansible_ssh_common_args='-o StrictHostKeyChecking=no -o ProxyJump=ubuntu@${{ steps.tf.outputs.edge_ip }}'
#
# [db]
# db-postgresql ansible_host=${{ steps.tf.outputs.db_ip }}
#
# [db:vars]
# ansible_user=ubuntu
# ansible_ssh_private_key_file=~/.ssh/id_ed25519
# ansible_ssh_common_args='-o StrictHostKeyChecking=no -o ProxyJump=ubuntu@${{ steps.tf.outputs.edge_ip }}'
# EOF
# echo "✅ OK - Inventory generated"
#
# - name: Update app on AWS DR
# if: steps.aws_check.outputs.active == 'true'
# working-directory: aws-silverbank/ansible
# run: |
# echo "Deploying ':latest' image to AWS DR environment..."
# echo "This image was just promoted after a successful monitoring window"
# ansible-playbook playbooks/deploy-production.yml \
# -i inventory-aws.ini \
# -e "app_tag=latest" \
# --vault-password-file ~/.vault-password
#
# - name: Health check AWS DR
# if: steps.aws_check.outputs.active == 'true'
# run: |
# echo "Running health check on AWS DR environment..."
# sleep 30
# curl -f http://${{ steps.tf.outputs.edge_ip }}/api/health || exit 1
# echo "✅ OK - AWS DR updated and healthy"
# echo "✅ OK - Production and DR environments are now in sync"