Merge pull request #72 from mariusiordan/staging #72

Workflow file for this run

	# .github/workflows/deploy.yml
	# Trigger: push to main branch
	#
	# Pipeline 3 - Production Deployment (Blue/Green Strategy)
	#
	# This pipeline does NOT rebuild images - it promotes the image
	# already built, tested, and validated in the Staging pipeline.
	# Tests run in test.yml on PR - no need to repeat them here.
	#
	# Flow:
	# promote-image
	# └── approve (manual)
	# └── detect-environment
	# └── deploy-to-idle
	# └── smoke-tests
	# └── switch-monitor-rollback
	# └── aws-update (suspended - activate manually for DR demo)

	name: Deploy

	on:
	push:
	branches:
	- main

	jobs:
	# ============================================================
	# JOB 1 - Promote Staging Image to Production
	# The image was already built and tested in the Staging pipeline
	# This job retags the validated ':staging' image with a production tag
	# No rebuild occurs - we deploy exactly what was tested on staging
	# ============================================================
	promote-image:
	name: Promote Staging Image to Production
	runs-on: ubuntu-24.04
	permissions:
	contents: read
	packages: write
	outputs:
	image_tag: ${{ steps.tag.outputs.tag }}
	steps:
	- uses: actions/checkout@v4

	# deploy.yml - Generate production image tag
	- name: Generate production image tag
	id: tag
	run: \|
	# Application version — must match the version set in staging.yml
	# Always update both files together when releasing a new version
	#
	# This tag is applied when promoting the ':staging' image to production
	# The same image built in staging.yml is retagged here — no rebuild occurs
	# After a successful 10-minute health check, this tag is promoted to ':latest'
	VERSION="v1.0"

	# Git SHA — links this deployment directly to the source commit
	# Allows instant traceability: image tag → git commit → code changes
	SHA=$(git rev-parse --short HEAD)

	# Final tag format: v{MAJOR}.{MINOR}-sha-{git-sha}
	# Example: v1.0-sha-abc1234
	TAG="${VERSION}-sha-${SHA}"

	echo "tag=${TAG}" >> $GITHUB_OUTPUT
	echo "Production tag: ${TAG}"

	- name: Login to GitHub Container Registry
	run: \|
	echo "Authenticating with GitHub Container Registry..."
	echo "${{ secrets.GHCR_TOKEN }}" \| docker login ghcr.io -u mariusiordan --password-stdin
	echo "✅ OK - Authentication successful"

	- name: Promote frontend image from staging to production
	run: \|
	echo "Promoting frontend image..."
	echo "Source: ghcr.io/mariusiordan/silverbank-frontend:staging"
	echo "Target: ghcr.io/mariusiordan/silverbank-frontend:${{ steps.tag.outputs.tag }}"
	docker buildx imagetools create \
	-t ghcr.io/mariusiordan/silverbank-frontend:${{ steps.tag.outputs.tag }} \
	ghcr.io/mariusiordan/silverbank-frontend:staging
	echo "✅ OK - Frontend image promoted"

	- name: Promote backend image from staging to production
	run: \|
	echo "Promoting backend image..."
	echo "Source: ghcr.io/mariusiordan/silverbank-backend:staging"
	echo "Target: ghcr.io/mariusiordan/silverbank-backend:${{ steps.tag.outputs.tag }}"
	docker buildx imagetools create \
	-t ghcr.io/mariusiordan/silverbank-backend:${{ steps.tag.outputs.tag }} \
	ghcr.io/mariusiordan/silverbank-backend:staging
	echo "✅ OK - Backend image promoted"

	- name: Promotion summary
	run: \|
	echo "✅ OK - Images promoted from staging to production"
	echo "Frontend: ghcr.io/mariusiordan/silverbank-frontend:${{ steps.tag.outputs.tag }}"
	echo "Backend: ghcr.io/mariusiordan/silverbank-backend:${{ steps.tag.outputs.tag }}"
	echo "Note: ':latest' will only be applied after successful 10-minute health check"

	# ============================================================
	# JOB 2 - Manual Approval Gate
	# Requires a release manager to approve before production deploy
	# This is the last checkpoint before touching production
	# ============================================================
	approve:
	name: Manual Approval
	runs-on: ubuntu-24.04
	needs: promote-image
	environment: production
	steps:
	- name: Approval granted
	run: \|
	echo "✅ OK - Manual approval granted"
	echo "Release manager has approved production deployment"
	echo "Proceeding with Blue/Green deployment..."

	# ============================================================
	# JOB 3 - Detect Active Environment
	# Reads /opt/current-env on edge nginx to determine which
	# environment (Blue or Green) is currently serving traffic
	# The idle environment will receive the new deployment
	# ============================================================
	detect-environment:
	name: Detect Active Environment
	runs-on: self-hosted
	needs: approve
	outputs:
	active_env: ${{ steps.detect.outputs.active_env }}
	idle_env: ${{ steps.detect.outputs.idle_env }}
	steps:
	- name: Setup SSH key
	run: \|
	mkdir -p ~/.ssh
	echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
	chmod 600 ~/.ssh/id_ed25519
	echo "StrictHostKeyChecking no" >> ~/.ssh/config

	- name: Detect active environment
	id: detect
	run: \|
	echo "Reading active environment from edge nginx..."
	ACTIVE=$(ssh [email protected] "cat /opt/current-env 2>/dev/null \|\| echo 'green'")
	if [ "$ACTIVE" = "blue" ]; then
	IDLE="green"
	else
	IDLE="blue"
	fi
	echo "active_env=${ACTIVE}" >> $GITHUB_OUTPUT
	echo "idle_env=${IDLE}" >> $GITHUB_OUTPUT
	echo "✅ OK - Active environment: ${ACTIVE}"
	echo "✅ OK - Idle environment (deploy target): ${IDLE}"
	# ============================================================
	# JOB 4 - Backup Database to S3
	# pg_dump before touching production
	# Backup is linked to the image tag for traceability
	# Stored in S3 — survives a full Proxmox failure
	# ============================================================
	backup-database:
	name: Backup Database to S3
	runs-on: self-hosted
	needs: [promote-image, detect-environment]
	steps:
	- name: Setup SSH key
	run: \|
	mkdir -p ~/.ssh
	echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
	chmod 600 ~/.ssh/id_ed25519
	echo "StrictHostKeyChecking no" >> ~/.ssh/config
	echo "✅ OK - SSH configured"

	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v4
	with:
	aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	aws-region: eu-west-2

	- name: pg_dump and upload to S3
	run: \|
	TIMESTAMP=$(date +%Y%m%d_%H%M%S)
	TAG="${{ needs.promote-image.outputs.image_tag }}"
	BACKUP_FILE="backup_${TIMESTAMP}_${TAG}.sql"

	echo "Creating database backup..."
	echo "Tag: ${TAG}"
	echo "File: ${BACKUP_FILE}"

	ssh [email protected] \
	"docker exec postgres pg_dump -U devop_db appdb > /tmp/${BACKUP_FILE}"
	echo "✅ OK - pg_dump complete"

	scp [email protected]:/tmp/${BACKUP_FILE} /tmp/${BACKUP_FILE}
	echo "✅ OK - Backup copied to runner"

	/usr/local/bin/aws s3 cp /tmp/${BACKUP_FILE} \
	s3://silverbank-tfstate-mariusiordan/db-backups/${BACKUP_FILE}
	echo "✅ OK - Backup uploaded to S3"
	echo "Location: s3://silverbank-tfstate-mariusiordan/db-backups/${BACKUP_FILE}"

	ssh [email protected] "rm /tmp/${BACKUP_FILE}"
	rm /tmp/${BACKUP_FILE}
	echo "✅ OK - Cleanup complete"
	# ============================================================
	# JOB 5 - Deploy to Idle Environment
	# Deploys the promoted production image to the idle environment
	# Traffic is still flowing to the active environment at this point
	# No user impact during this step
	# ============================================================
	deploy-to-idle:
	name: Deploy to Idle Environment
	runs-on: self-hosted
	needs: [promote-image, detect-environment, backup-database]
	steps:
	- name: Checkout infrastructure repo
	uses: actions/checkout@v4
	with:
	repository: mariusiordan/DevOps-final-project
	token: ${{ secrets.GHCR_TOKEN }}

	- name: Install Ansible
	run: \|
	echo "Installing Ansible..."
	pip install ansible --break-system-packages
	echo "$HOME/.local/bin" >> $GITHUB_PATH
	echo "✅ OK - Ansible installed"

	- name: Setup SSH key
	run: \|
	mkdir -p ~/.ssh
	echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
	chmod 600 ~/.ssh/id_ed25519
	echo "StrictHostKeyChecking no" >> ~/.ssh/config
	echo "✅ OK - SSH configured"

	- name: Setup Ansible vault password
	run: \|
	echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
	chmod 600 ~/.vault-password
	echo "✅ OK - Vault password configured"

	- name: Deploy to idle environment
	run: \|
	echo "Deploying to idle environment: ${{ needs.detect-environment.outputs.idle_env }}"
	echo "Image tag: ${{ needs.promote-image.outputs.image_tag }}"
	echo "Active environment keeps serving traffic during this step..."
	cd proxmox-silverbank/ansible
	ansible-playbook playbooks/deploy-idle.yml \
	-e "app_tag=${{ needs.promote-image.outputs.image_tag }}" \
	-e "idle_env=${{ needs.detect-environment.outputs.idle_env }}" \
	-i inventory.ini
	echo "✅ OK - Deployment to ${{ needs.detect-environment.outputs.idle_env }} complete"

	# ============================================================
	# JOB 6 - Smoke Tests
	# Runs health checks directly on the idle environment
	# Bypasses nginx - traffic still flows to active environment
	# If smoke tests fail - deployment stops, no traffic switch occurs
	# ============================================================
	smoke-tests:
	name: Smoke Tests
	runs-on: self-hosted
	needs: [promote-image, detect-environment, deploy-to-idle]
	steps:
	- name: Checkout infrastructure repo
	uses: actions/checkout@v4
	with:
	repository: mariusiordan/DevOps-final-project
	token: ${{ secrets.GHCR_TOKEN }}

	- name: Install Ansible
	run: \|
	pip install ansible --break-system-packages
	echo "$HOME/.local/bin" >> $GITHUB_PATH

	- name: Setup SSH key
	run: \|
	mkdir -p ~/.ssh
	echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
	chmod 600 ~/.ssh/id_ed25519
	echo "StrictHostKeyChecking no" >> ~/.ssh/config

	- name: Setup Ansible vault password
	run: \|
	echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
	chmod 600 ~/.vault-password

	- name: Run smoke tests on idle environment
	run: \|
	echo "Running smoke tests on idle environment: ${{ needs.detect-environment.outputs.idle_env }}"
	echo "Bypassing nginx - testing directly on idle VM..."
	cd proxmox-silverbank/ansible
	ansible-playbook playbooks/smoke-tests.yml \
	-e "idle_env=${{ needs.detect-environment.outputs.idle_env }}" \
	-i inventory.ini
	echo "✅ OK - Smoke tests passed on ${{ needs.detect-environment.outputs.idle_env }}"

	# ============================================================
	# JOB 7 - Switch Traffic
	# Change nginx config to point to the newly deployed environment
	# ============================================================
	switch-traffic:
	name: Switch Traffic
	runs-on: self-hosted
	needs: [promote-image, detect-environment, smoke-tests]
	steps:
	- name: Checkout infrastructure repo
	uses: actions/checkout@v4
	with:
	repository: mariusiordan/DevOps-final-project
	token: ${{ secrets.GHCR_TOKEN }}

	- name: Install Ansible
	run: \|
	pip install ansible --break-system-packages
	echo "$HOME/.local/bin" >> $GITHUB_PATH

	- name: Setup SSH key
	run: \|
	mkdir -p ~/.ssh
	echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
	chmod 600 ~/.ssh/id_ed25519
	echo "StrictHostKeyChecking no" >> ~/.ssh/config

	- name: Setup Ansible vault password
	run: \|
	echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
	chmod 600 ~/.vault-password

	- name: Switch nginx traffic
	run: \|
	echo "Switching traffic to: ${{ needs.detect-environment.outputs.idle_env }}"
	cd proxmox-silverbank/ansible
	ansible-playbook playbooks/switch-traffic.yml \
	-e "idle_env=${{ needs.detect-environment.outputs.idle_env }}" \
	-i inventory.ini
	echo "✅ OK - Traffic switched to ${{ needs.detect-environment.outputs.idle_env }}"

	# ============================================================
	# JOB 8 - Monitor
	# Monitor 10 minute after switch
	# Output: stable=true/false for conditional rollback
	# ============================================================
	monitor:
	name: Monitor (10 min)
	runs-on: self-hosted
	needs: [promote-image, detect-environment, switch-traffic]
	outputs:
	stable: ${{ steps.run-monitor.outputs.stable }}
	steps:
	- name: Checkout infrastructure repo
	uses: actions/checkout@v4
	with:
	repository: mariusiordan/DevOps-final-project
	token: ${{ secrets.GHCR_TOKEN }}

	- name: Install Ansible
	run: \|
	pip install ansible --break-system-packages
	echo "$HOME/.local/bin" >> $GITHUB_PATH

	- name: Setup SSH key
	run: \|
	mkdir -p ~/.ssh
	echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
	chmod 600 ~/.ssh/id_ed25519
	echo "StrictHostKeyChecking no" >> ~/.ssh/config

	- name: Setup Ansible vault password
	run: \|
	echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
	chmod 600 ~/.vault-password

	- name: Run monitoring
	id: run-monitor
	run: \|
	cd proxmox-silverbank/ansible
	if ansible-playbook playbooks/rollback.yml \
	-e "app_tag=${{ needs.promote-image.outputs.image_tag }}" \
	-e "new_env=${{ needs.detect-environment.outputs.idle_env }}" \
	-e "previous_env=${{ needs.detect-environment.outputs.active_env }}" \
	-i inventory.ini; then
	echo "stable=true" >> $GITHUB_OUTPUT
	echo "✅ OK - Deployment stable"
	else
	echo "stable=false" >> $GITHUB_OUTPUT
	echo "❌ Monitoring failed - rollback needed"
	fi
	# ============================================================
	# JOB 9 - Production Health Check
	# Runs only if monitor detects stability (stable=true)
	# ============================================================
	production-health-check:
	name: Production Health Check
	runs-on: self-hosted
	needs: monitor
	if: needs.monitor.outputs.stable == 'true'
	steps:
	- name: Final health check
	run: \|
	sleep 10
	curl -f http://192.168.7.50/api/health \|\| exit 1
	echo "✅ OK - Production is healthy"
	echo "✅ OK - Deployment complete"
	# ============================================================
	# JOB 10 - Rollback
	# Runs only if monitor detects instability (stable=false)
	# Switches nginx back to the previous environment
	# Verifies rollback success with a health check
	# ============================================================
	rollback:
	name: Rollback to Previous Environment
	runs-on: self-hosted
	needs: [detect-environment, monitor]
	if: needs.monitor.outputs.stable == 'false'
	steps:
	- name: Setup SSH key
	run: \|
	mkdir -p ~/.ssh
	echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
	chmod 600 ~/.ssh/id_ed25519
	echo "StrictHostKeyChecking no" >> ~/.ssh/config

	- name: Rollback nginx to previous environment
	run: \|
	echo "Rolling back to: ${{ needs.detect-environment.outputs.active_env }}"
	ssh [email protected] \
	"sudo /opt/switch-backend.sh ${{ needs.detect-environment.outputs.active_env }}"
	echo "✅ OK - Rolled back to ${{ needs.detect-environment.outputs.active_env }}"

	- name: Verify rollback
	run: \|
	sleep 10
	curl -f http://192.168.7.50/api/health \|\| exit 1
	echo "✅ OK - Previous environment healthy after rollback"

	- name: Fail pipeline
	run: \|
	echo "❌ Deployment failed — rolled back to ${{ needs.detect-environment.outputs.active_env }}"
	echo "❌ :latest was NOT updated"
	exit 1



	# ============================================================
	# JOB 7 - Update AWS Disaster Recovery (if active) - suspended
	# Checks if the AWS DR environment is running
	# If active - pulls ':latest' image and redeploys
	# If not active - skips silently
	# AWS IPs are read dynamically from Terraform state in S3
	# ============================================================
	# ============================================================
	# JOB 7 - Update AWS Disaster Recovery (suspended)
	# Automatically updates AWS DR environment after every successful
	# production deployment when AWS infrastructure is active.
	#
	# Suspended reason: AWS DR is activated manually for demo/DR purposes only.
	# Cost optimization — NAT Gateway costs ~$33/month when running.
	#
	# To re-enable:
	# 1. Uncomment this job
	# 2. Ensure AWS infrastructure is provisioned (terraform apply)
	# 3. Ensure GitHub secrets are set: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
	#
	# Flow when active:
	# - Reads edge IP dynamically from Terraform state in S3
	# - Checks if AWS DR is responding on /api/health
	# - If active → pulls ':latest' image and redeploys via Ansible
	# - If not active → skips silently, no action required
	# ============================================================

	# aws-update:
	# name: Update AWS DR (if active)
	# runs-on: self-hosted
	# needs: switch-monitor-rollback
	# steps:
	# - name: Checkout infrastructure repo
	# uses: actions/checkout@v4
	# with:
	# repository: mariusiordan/DevOps-final-project
	# token: ${{ secrets.GHCR_TOKEN }}
	#
	# - name: Configure AWS credentials
	# uses: aws-actions/configure-aws-credentials@v4
	# with:
	# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	# aws-region: eu-west-2
	#
	# - name: Install unzip
	# run: \|
	# sudo apt-get install -y unzip
	# echo "✅ OK - unzip installed"
	#
	# - name: Setup Terraform
	# uses: hashicorp/setup-terraform@v3
	# with:
	# terraform_wrapper: false
	#
	# - name: Get AWS IPs from Terraform state
	# id: tf
	# working-directory: aws-silverbank/terraform
	# run: \|
	# echo "Reading infrastructure IPs from Terraform state in S3..."
	# terraform init -reconfigure
	# echo "edge_ip=$(terraform output -raw edge_elastic_ip)" >> $GITHUB_OUTPUT
	# echo "blue_ip=$(terraform output -raw blue_private_ip)" >> $GITHUB_OUTPUT
	# echo "db_ip=$(terraform output -raw db_private_ip)" >> $GITHUB_OUTPUT
	# echo "✅ OK - IPs retrieved from Terraform state"
	#
	# - name: Check if AWS DR is active
	# id: aws_check
	# run: \|
	# echo "Checking if AWS Disaster Recovery environment is active..."
	# if curl -sf --max-time 10 http://${{ steps.tf.outputs.edge_ip }}/api/health > /dev/null 2>&1; then
	# echo "active=true" >> $GITHUB_OUTPUT
	# echo "✅ OK - AWS DR is active - update will proceed"
	# else
	# echo "active=false" >> $GITHUB_OUTPUT
	# echo "INFO - AWS DR is not active - skipping update"
	# fi
	#
	# - name: Install Ansible
	# if: steps.aws_check.outputs.active == 'true'
	# run: \|
	# pip install ansible --break-system-packages
	# echo "$HOME/.local/bin" >> $GITHUB_PATH
	# echo "✅ OK - Ansible installed"
	#
	# - name: Setup SSH key
	# if: steps.aws_check.outputs.active == 'true'
	# run: \|
	# mkdir -p ~/.ssh
	# echo "${{ secrets.PROXMOX_SSH_KEY }}" > ~/.ssh/id_ed25519
	# chmod 600 ~/.ssh/id_ed25519
	# echo "StrictHostKeyChecking no" >> ~/.ssh/config
	# echo "✅ OK - SSH configured"
	#
	# - name: Setup Ansible vault password
	# if: steps.aws_check.outputs.active == 'true'
	# run: \|
	# echo "${{ secrets.VAULT_PASSWORD }}" > ~/.vault-password
	# chmod 600 ~/.vault-password
	# echo "✅ OK - Vault password configured"
	#
	# - name: Generate AWS inventory
	# if: steps.aws_check.outputs.active == 'true'
	# run: \|
	# echo "Generating Ansible inventory from Terraform outputs..."
	# cat > aws-silverbank/ansible/inventory-aws.ini << EOF
	# [edge]
	# edge-nginx ansible_host=${{ steps.tf.outputs.edge_ip }} ansible_user=ubuntu ansible_ssh_private_key_file=~/.ssh/id_ed25519 ansible_ssh_common_args='-o StrictHostKeyChecking=no'
	#
	# [prod]
	# prod-vm1-BLUE ansible_host=${{ steps.tf.outputs.blue_ip }}
	#
	# [prod:vars]
	# ansible_user=ubuntu
	# ansible_ssh_private_key_file=~/.ssh/id_ed25519
	# ansible_ssh_common_args='-o StrictHostKeyChecking=no -o ProxyJump=ubuntu@${{ steps.tf.outputs.edge_ip }}'
	#
	# [db]
	# db-postgresql ansible_host=${{ steps.tf.outputs.db_ip }}
	#
	# [db:vars]
	# ansible_user=ubuntu
	# ansible_ssh_private_key_file=~/.ssh/id_ed25519
	# ansible_ssh_common_args='-o StrictHostKeyChecking=no -o ProxyJump=ubuntu@${{ steps.tf.outputs.edge_ip }}'
	# EOF
	# echo "✅ OK - Inventory generated"
	#
	# - name: Update app on AWS DR
	# if: steps.aws_check.outputs.active == 'true'
	# working-directory: aws-silverbank/ansible
	# run: \|
	# echo "Deploying ':latest' image to AWS DR environment..."
	# echo "This image was just promoted after a successful monitoring window"
	# ansible-playbook playbooks/deploy-production.yml \
	# -i inventory-aws.ini \
	# -e "app_tag=latest" \
	# --vault-password-file ~/.vault-password
	#
	# - name: Health check AWS DR
	# if: steps.aws_check.outputs.active == 'true'
	# run: \|
	# echo "Running health check on AWS DR environment..."
	# sleep 30
	# curl -f http://${{ steps.tf.outputs.edge_ip }}/api/health \|\| exit 1
	# echo "✅ OK - AWS DR updated and healthy"
	# echo "✅ OK - Production and DR environments are now in sync"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merge pull request #72 from mariusiordan/staging #72

Workflow file

Merge pull request #72 from mariusiordan/staging #72

Uh oh!

Workflow file for this run