Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ansible/inventory-password.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
all:
hosts:
vcl1:
ansible_host: 152.7.176.221
ansible_host: 152.7.176.240
ansible_user: sraval
ansible_connection: ssh
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
Expand All @@ -21,7 +21,7 @@ all:
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'

vcl3:
ansible_host: 152.7.178.104
ansible_host: 152.7.176.221
ansible_user: sraval
ansible_connection: ssh
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
Expand Down
4 changes: 2 additions & 2 deletions ansible/inventory.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
all:
hosts:
vcl1:
ansible_host: 152.7.176.221
ansible_host: 152.7.176.240
ansible_user: sraval
ansible_ssh_private_key_file: ~/.ssh/id_ed25519

Expand All @@ -14,7 +14,7 @@ all:
ansible_ssh_private_key_file: ~/.ssh/id_ed25519

vcl3:
ansible_host: 152.7.178.104
ansible_host: 152.7.176.221
ansible_user: sraval
ansible_ssh_private_key_file: ~/.ssh/id_ed25519

Expand Down
30 changes: 17 additions & 13 deletions ansible/setup-replication.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,57 +20,61 @@
content: |
#!/bin/bash
# Sync database from VCL2 to VCL3
# This stores backups on VCL3 for failover use
# The monitor script will restore when failover is triggered

VCL3_HOST="{{ hostvars['vcl3']['ansible_host'] }}"
VCL3_USER="{{ ansible_user }}"
BACKUP_DIR="/tmp/db-backup"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="coffee_db_${TIMESTAMP}.sql"
DB_NAME="coffee_dev"

log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}

log "Starting database replication to VCL3..."

# Create backup directory
# Create backup directory locally
mkdir -p $BACKUP_DIR

# Dump database from VCL2
log "Creating database dump..."
if sudo docker exec coffee_db pg_dump -U postgres coffee_dev > "${BACKUP_DIR}/${BACKUP_FILE}"; then
if sudo docker exec coffee_db pg_dump -U postgres ${DB_NAME} > "${BACKUP_DIR}/${BACKUP_FILE}"; then
log "Database dump created: ${BACKUP_FILE}"
else
log "ERROR: Failed to create database dump"
exit 1
fi

# Ensure backup directory exists on VCL3
log "Ensuring backup directory exists on VCL3..."
ssh -o StrictHostKeyChecking=no ${VCL3_USER}@${VCL3_HOST} "mkdir -p ${BACKUP_DIR}"

# Copy to VCL3
log "Copying database dump to VCL3..."
if scp -o StrictHostKeyChecking=no "${BACKUP_DIR}/${BACKUP_FILE}" ${VCL3_USER}@${VCL3_HOST}:${BACKUP_DIR}/; then
log "Database dump copied to VCL3"
log "Database dump copied to VCL3: ${BACKUP_DIR}/${BACKUP_FILE}"
else
log "ERROR: Failed to copy database dump to VCL3"
exit 1
fi

# Restore on VCL3
log "Restoring database on VCL3..."
if ssh -o StrictHostKeyChecking=no ${VCL3_USER}@${VCL3_HOST} \
"sudo docker exec -i coffee_db psql -U postgres -d coffee_db < ${BACKUP_DIR}/${BACKUP_FILE}"; then
log "Database restored successfully on VCL3"
else
log "WARNING: Database restore on VCL3 had issues (may be expected if DB doesn't exist yet)"
fi
# Note: We don't restore on VCL3 here because:
# 1. VCL3 is in standby mode (no running containers)
# 2. The monitor script will restore from backup during failover
log "Backup stored on VCL3 for failover use"

# Cleanup old backups (keep last 5)
# Cleanup old backups (keep last 5 on both servers)
log "Cleaning up old backups..."
cd $BACKUP_DIR
ls -t coffee_db_*.sql | tail -n +6 | xargs -r rm
ls -t coffee_db_*.sql 2>/dev/null | tail -n +6 | xargs -r rm
ssh -o StrictHostKeyChecking=no ${VCL3_USER}@${VCL3_HOST} \
"cd ${BACKUP_DIR} && ls -t coffee_db_*.sql 2>/dev/null | tail -n +6 | xargs -r rm" || true

log "Database replication completed successfully"
log "Backup available on VCL3: ${BACKUP_DIR}/${BACKUP_FILE}"

- name: Setup SSH key for passwordless replication
shell: |
Expand Down
149 changes: 139 additions & 10 deletions ansible/setup-vcl3-monitor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@
content: |
#!/bin/bash
# Monitor VCL2 health and activate VCL3 on failure
# Includes database restore from replicated backup and reverse sync on failback

VCL2_HOST="{{ hostvars['vcl2']['ansible_host'] }}"
VCL2_USER="{{ ansible_user }}"
VCL2_PORT="3000"
CHECK_INTERVAL=10
FAIL_THRESHOLD=3
PROJECT_DIR="/home/{{ ansible_user }}/devops-project/coffee_project"
BACKUP_DIR="/tmp/db-backup"
DB_NAME="coffee_dev"

fail_count=0
vcl3_active=false
Expand All @@ -42,37 +46,162 @@
fi
}

restore_database() {
log "Looking for database backup to restore..."

# Find the latest backup file
LATEST_BACKUP=$(ls -t ${BACKUP_DIR}/coffee_db_*.sql 2>/dev/null | head -1)

if [ -n "$LATEST_BACKUP" ] && [ -f "$LATEST_BACKUP" ]; then
log "Found backup: $LATEST_BACKUP"

# Wait for database to be ready
log "Waiting for database container to be ready..."
for i in {1..30}; do
if sudo docker exec coffee_db pg_isready -U postgres > /dev/null 2>&1; then
log "Database is ready"
break
fi
sleep 2
done

# Drop and recreate database to ensure clean state
log "Preparing database for restore..."
sudo docker exec coffee_db psql -U postgres -c "DROP DATABASE IF EXISTS ${DB_NAME};" 2>/dev/null || true
sudo docker exec coffee_db psql -U postgres -c "CREATE DATABASE ${DB_NAME};" 2>/dev/null || true

# Restore the backup
log "Restoring database from backup..."
if cat "$LATEST_BACKUP" | sudo docker exec -i coffee_db psql -U postgres -d ${DB_NAME}; then
log "Database restored successfully from backup!"
return 0
else
log "WARNING: Database restore had issues, app will use migrations"
return 1
fi
else
log "WARNING: No database backup found in ${BACKUP_DIR}"
log "VCL3 will start with fresh database (migrations/seeds)"
return 1
fi
}

activate_vcl3() {
log "Activating VCL3..."
log "========================================="
log "FAILOVER: Activating VCL3..."
log "========================================="
cd $PROJECT_DIR
sudo docker-compose up -d --build
vcl3_active=true
log "VCL3 is now active"

# Start database container first
log "Starting database container..."
sudo docker compose up -d db
sleep 10

# Try to restore from replicated backup
restore_database

# Start the app container
log "Starting application container..."
sudo docker compose up -d app

# Wait for app to be ready
log "Waiting for app to be healthy..."
for i in {1..12}; do
if curl -sf http://localhost:3000/coffees > /dev/null 2>&1; then
vcl3_active=true
log "========================================="
log "VCL3 is now ACTIVE and serving traffic!"
log "========================================="
return 0
fi
log "Waiting for app... ($i/12)"
sleep 5
done

log "ERROR: App started but not responding on port 3000"
return 1
}

sync_database_to_vcl2() {
log "Syncing VCL3 database back to VCL2..."

TIMESTAMP=$(date +%Y%m%d_%H%M%S)
SYNC_FILE="${BACKUP_DIR}/failback_${TIMESTAMP}.sql"

# Dump current VCL3 database
log "Creating database dump from VCL3..."
if sudo docker exec coffee_db pg_dump -U postgres ${DB_NAME} > "$SYNC_FILE"; then
log "Database dump created: $SYNC_FILE"
else
log "ERROR: Failed to create database dump"
return 1
fi

# Copy to VCL2
log "Copying database to VCL2..."
if scp -o StrictHostKeyChecking=no "$SYNC_FILE" ${VCL2_USER}@${VCL2_HOST}:${BACKUP_DIR}/; then
log "Database dump copied to VCL2"
else
log "ERROR: Failed to copy database to VCL2"
return 1
fi

# Restore on VCL2
log "Restoring database on VCL2..."
if ssh -o StrictHostKeyChecking=no ${VCL2_USER}@${VCL2_HOST} \
"sudo docker exec coffee_db psql -U postgres -c 'DROP DATABASE IF EXISTS ${DB_NAME};' && \
sudo docker exec coffee_db psql -U postgres -c 'CREATE DATABASE ${DB_NAME};' && \
cat ${BACKUP_DIR}/failback_${TIMESTAMP}.sql | sudo docker exec -i coffee_db psql -U postgres -d ${DB_NAME}"; then
log "Database synced to VCL2 successfully!"
return 0
else
log "ERROR: Failed to restore database on VCL2"
return 1
fi
}

deactivate_vcl3() {
log "Deactivating VCL3..."
log "========================================="
log "FAILBACK: VCL2 is back, deactivating VCL3..."
log "========================================="

# First sync database back to VCL2
sync_database_to_vcl2

# Now stop VCL3 containers
cd $PROJECT_DIR
sudo docker-compose down
sudo docker compose down
vcl3_active=false
log "VCL3 is now standby"

log "========================================="
log "VCL3 deactivated. VCL2 is PRIMARY again."
log "========================================="
}

# Ensure backup directory exists
mkdir -p $BACKUP_DIR

log "Starting VCL2 health monitor..."
log "Monitoring: http://${VCL2_HOST}:${VCL2_PORT}/coffees"
log "Check interval: ${CHECK_INTERVAL}s, Failure threshold: ${FAIL_THRESHOLD}"

while true; do
if check_vcl2; then
if [ $fail_count -gt 0 ]; then
log "VCL2 health check passed (recovered from $fail_count failures)"
fi
fail_count=0

if [ "$vcl3_active" = true ]; then
log "VCL2 is healthy again, deactivating VCL3"
log "VCL2 is healthy again, initiating failback..."
deactivate_vcl3
fi
else
fail_count=$((fail_count + 1))
log "VCL2 health check failed ($fail_count/$FAIL_THRESHOLD)"
log "VCL2 health check FAILED ($fail_count/$FAIL_THRESHOLD)"

if [ $fail_count -ge $FAIL_THRESHOLD ] && [ "$vcl3_active" = false ]; then
log "VCL2 is down! Triggering failover to VCL3"
log "VCL2 failure threshold reached! Triggering failover..."
activate_vcl3
fi
fi
Expand Down
Loading
Loading