From 4eb23044ad132b87d252b2793d64c5dec811a357 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 2 Sep 2025 16:34:24 +0300 Subject: [PATCH] add-prometheus-exporter --- Dockerfile | 1 + README.md | 7 +- install/assets/defaults/08-prometheus | 7 + install/assets/functions/03-monitoring | 48 ++++ install/assets/functions/08-prometheus | 328 +++++++++++++++++++++++++ install/assets/functions/10-db-backup | 92 ++++++- install/etc/cont-init.d/03-monitoring | 191 ++++++++++++++ install/etc/cont-init.d/10-db-backup | 10 +- 8 files changed, 680 insertions(+), 4 deletions(-) create mode 100644 install/assets/defaults/08-prometheus create mode 100644 install/assets/functions/03-monitoring create mode 100644 install/assets/functions/08-prometheus create mode 100644 install/etc/cont-init.d/03-monitoring mode change 100755 => 100644 install/etc/cont-init.d/10-db-backup diff --git a/Dockerfile b/Dockerfile index 4108b7e8..e11ccecb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -73,6 +73,7 @@ RUN source /assets/functions/00-container && \ xz \ zip \ zstd \ + netcat-openbsd \ && \ \ case "$(uname -m)" in \ diff --git a/README.md b/README.md index 796986f8..ae0151b4 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ Backs up CouchDB, InfluxDB, MySQL/MariaDB, Microsoft SQL, MongoDB, Postgres, Red - encryption support (passphrase and public key) - notify upon job failure to email, matrix, mattermost, rocketchat, custom script - zabbix metrics support +- prometheus metrics support - hooks to execute pre and post backup job for customization purposes - companion script to aid in restores @@ -167,7 +168,7 @@ The following directories are used for configuration and can be mapped for persi #### Base Images used -This image relies on an [Alpine Linux](https://hub.docker.com/r/tiredofit/alpine) base image that relies on an [init system](https://github.com/just-containers/s6-overlay) for added capabilities. Outgoing SMTP capabilities are handled via `msmtp`. Individual container performance monitoring is performed by [zabbix-agent](https://zabbix.org). Additional tools include: `bash`,`curl`,`less`,`logrotate`, `nano`. +This image relies on an [Alpine Linux](https://hub.docker.com/r/tiredofit/alpine) base image that relies on an [init system](https://github.com/just-containers/s6-overlay) for added capabilities. Outgoing SMTP capabilities are handled via `msmtp`. Individual container performance monitoring is performed by [zabbix-agent](https://zabbix.org) or [Prometheus](https://prometheus.io). Additional tools include: `bash`,`curl`,`less`,`logrotate`, `nano`. Be sure to view the following repositories to understand all the customizable options: @@ -187,6 +188,10 @@ Be sure to view the following repositories to understand all the customizable op | `MANUAL_RUN_FOREVER` | `TRUE` or `FALSE` if you wish to try to make the container exit after the backup | `TRUE` | | `DEBUG_MODE` | If set to `true`, print copious shell script messages to the container log. Otherwise only basic messages are printed. | `FALSE` | | `BACKUP_JOB_CONCURRENCY` | How many backup jobs to run concurrently | `1` | +| `CONTAINER_ENABLE_MONITORING` | Enable monitoring with Zabbix or Prometheus | `TRUE` | +| `CONTAINER_MONITORING_BACKEND` | Choose monitoring backend: `zabbix` or `prometheus` | `zabbix` | +| `PROMETHEUS_PORT` | Port for Prometheus metrics endpoint | `9090` | +| `DEBUG_PROMETHEUS` | Enable debug logging for Prometheus metrics | `FALSE` | #### Job Defaults If these are set and no other defaults or variables are set explicitly, they will be added to any of the backup jobs. diff --git a/install/assets/defaults/08-prometheus b/install/assets/defaults/08-prometheus new file mode 100644 index 00000000..9c47057b --- /dev/null +++ b/install/assets/defaults/08-prometheus @@ -0,0 +1,7 @@ +#!/command/with-contenv bash + +# Prometheus monitoring configuration +PROMETHEUS_PORT=${PROMETHEUS_PORT:-"9090"} +PROMETHEUS_METRICS_FILE=${PROMETHEUS_METRICS_FILE:-"/tmp/prometheus_metrics"} +PROMETHEUS_METRICS_LOCK=${PROMETHEUS_METRICS_LOCK:-"/tmp/prometheus_metrics.lock"} +DEBUG_PROMETHEUS=${DEBUG_PROMETHEUS:-"FALSE"} diff --git a/install/assets/functions/03-monitoring b/install/assets/functions/03-monitoring new file mode 100644 index 00000000..12bf9557 --- /dev/null +++ b/install/assets/functions/03-monitoring @@ -0,0 +1,48 @@ +#!/command/with-contenv bash + +ZABBIX_AGENT_LOG_FILE=${ZABBIX_AGENT_LOG_FILE:-"zabbix_agentd.log"} +ZABBIX_AGENT_LOG_PATH=${ZABBIX_AGENT_LOG_PATH:-"/var/log/zabbix/agent/"} +ZABBIX_ALLOW_ROOT=${ZABBIX_ALLOW_ROOT:-"1"} +ZABBIX_BUFFER_SEND=${ZABBIX_BUFFER_SEND:-"5"} +ZABBIX_BUFFER_SIZE=${ZABBIX_BUFFER_SIZE:-"100"} +ZABBIX_CERT_PATH=${ZABBIX_CERT_PATH:-"/etc/zabbix/certs/"} +ZABBIX_CONFIG_FILE=${ZABBIX_CONFIG_FILE:-"zabbix_agentd.conf"} +ZABBIX_CONFIG_PATH=${ZABBIX_CONFIG_PATH:-"/etc/zabbix/"} +ZABBIX_DEBUGLEVEL=${ZABBIX_DEBUGLEVEL:-"1"} +ZABBIX_ENABLE_AUTOREGISTER=${ZABBIX_ENABLE_AUTOREGISTER:-"TRUE"} +ZABBIX_ENABLE_AUTOREGISTER_DNS=${ZABBIX_ENABLE_AUTOREGISTER_DNS:-"TRUE"} +ZABBIX_HOSTNAME=${ZABBIX_HOSTNAME:-"${CONTAINER_NAME}"} +ZABBIX_LISTEN_IP=${ZABBIX_LISTEN_IP:-"0.0.0.0"} +ZABBIX_LISTEN_PORT=${ZABBIX_LISTEN_PORT:-"10050"} +ZABBIX_LOG_FILE_SIZE=${ZABBIX_LOG_FILE_SIZE:-"0"} +ZABBIX_MAXLINES_SECOND=${ZABBIX_MAXLINES_SECOND:-"20"} +ZABBIX_PID=${ZABBIX_PID:-"/var/lib/zabbix/run/zabbix-agent.pid"} +ZABBIX_REFRESH_ACTIVE_CHECKS=${ZABBIX_REFRESH_ACTIVE_CHECKS:-"120"} +ZABBIX_REMOTECOMMANDS_ALLOW=${ZABBIX_REMOTECOMMANDS_ALLOW:-"*"} +ZABBIX_REMOTECOMMANDS_LOG=${ZABBIX_REMOTECOMMANDS_LOG:-"1"} +ZABBIX_SERVER=${ZABBIX_SERVER:-"0.0.0.0/0"} +ZABBIX_SERVER_ACTIVE=${ZABBIX_SERVER_ACTIVE:-"zabbix-proxy"} +ZABBIX_SETUP_TYPE=${ZABBIX_SETUP_TYPE:-"AUTO"} +ZABBIX_SOCKET=${ZABBIX_SOCKET:-"/var/lib/zabbix/run/zabbix-agent.sock"} +ZABBIX_START_AGENTS=${ZABBIX_START_AGENTS:-"1"} +ZABBIX_STATUS_PORT=${ZABBIX_STATUS_PORT:-"8050"} +ZABBIX_USER=${ZABBIX_USER:-"zabbix"} +ZABBIX_USER_DOAS=${ZABBIX_USER_DOAS:-"TRUE"} +ZABBIX_USER_SUDO=${ZABBIX_USER_SUDO:-"TRUE"} +ZABBIX_AGENT_TIMEOUT=${ZABBIX_AGENT_TIMEOUT:-"3"} + + +os=$(cat /etc/os-release |grep ^ID= | cut -d = -f2) +case ${os} in + "alpine" ) + osver=$(cat /etc/os-release | grep VERSION_ID | cut -d = -f 2 | cut -d . -f 2 | cut -d _ -f 1) + if [ "${osver}" -ge 15 ] || [ "$osver" = "edge" ] ; then + ZABBIX_AGENT_TYPE=${ZABBIX_AGENT_TYPE:-"modern"} + else + ZABBIX_AGENT_TYPE=${ZABBIX_AGENT_TYPE:-"classic"} + fi + ;; + "debian" | "ubuntu" ) + ZABBIX_AGENT_TYPE=${ZABBIX_AGENT_TYPE:-"modern"} + ;; +esac \ No newline at end of file diff --git a/install/assets/functions/08-prometheus b/install/assets/functions/08-prometheus new file mode 100644 index 00000000..2c7302de --- /dev/null +++ b/install/assets/functions/08-prometheus @@ -0,0 +1,328 @@ +#!/command/with-contenv bash + +# Prometheus metrics configuration +PROMETHEUS_PORT=${PROMETHEUS_PORT:-"9090"} +PROMETHEUS_METRICS_FILE=${PROMETHEUS_METRICS_FILE:-"/tmp/prometheus_metrics"} +PROMETHEUS_METRICS_LOCK=${PROMETHEUS_METRICS_LOCK:-"/tmp/prometheus_metrics.lock"} + +# Initialize Prometheus metrics file +init_prometheus_metrics() { + if var_true "${DEBUG_PROMETHEUS}" ; then debug on; fi + + # Create metrics file with headers + cat > "${PROMETHEUS_METRICS_FILE}" << 'EOF' +# HELP dbbackup_backup_status Backup job status (0=success, 1=failed) +# TYPE dbbackup_backup_status gauge +# HELP dbbackup_backup_duration_seconds Backup job duration in seconds +# TYPE dbbackup_backup_duration_seconds gauge +# HELP dbbackup_backup_size_bytes Backup file size in bytes +# TYPE dbbackup_backup_size_bytes gauge +# HELP dbbackup_backup_timestamp Backup completion timestamp +# TYPE dbbackup_backup_timestamp gauge +# HELP dbbackup_compression_duration_seconds Compression duration in seconds +# TYPE dbbackup_compression_duration_seconds gauge +# HELP dbbackup_encryption_duration_seconds Encryption duration in seconds +# TYPE dbbackup_encryption_duration_seconds gauge +# HELP dbbackup_checksum_duration_seconds Checksum calculation duration in seconds +# TYPE dbbackup_checksum_duration_seconds gauge +# HELP dbbackup_upload_duration_seconds Upload duration in seconds +# TYPE dbbackup_upload_duration_seconds gauge +# HELP dbbackup_database_availability Database availability status (1=available, 0=unavailable) +# TYPE dbbackup_database_availability gauge +# HELP dbbackup_jobs_total Total number of backup jobs +# TYPE dbbackup_jobs_total counter +# HELP dbbackup_jobs_failed_total Total number of failed backup jobs +# TYPE dbbackup_jobs_failed_total counter +# HELP dbbackup_jobs_success_total Total number of successful backup jobs +# TYPE dbbackup_jobs_success_total counter +EOF + + if var_true "${DEBUG_PROMETHEUS}" ; then debug off; fi +} + +# Start Prometheus HTTP server +start_prometheus_server() { + if var_true "${DEBUG_PROMETHEUS}" ; then debug on; fi + + if var_true "${CONTAINER_ENABLE_MONITORING}" && [ "${CONTAINER_MONITORING_BACKEND,,}" = "prometheus" ]; then + print_notice "Starting Prometheus metrics server on port ${PROMETHEUS_PORT}" + + # Start HTTP server in background using Python if available, otherwise use netcat + ( + while true; do + if command -v python3 >/dev/null 2>&1; then + # Use Python HTTP server + python3 -c " +import http.server +import socketserver +import os + +class MetricsHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header('Content-Type', 'text/plain; version=0.0.4; charset=utf-8') + self.end_headers() + + metrics_file = '${PROMETHEUS_METRICS_FILE}' + if os.path.exists(metrics_file): + with open(metrics_file, 'r') as f: + self.wfile.write(f.read().encode()) + else: + self.wfile.write(b'# No metrics available') + +with socketserver.TCPServer(('', ${PROMETHEUS_PORT}), MetricsHandler) as httpd: + httpd.serve_forever() +" + else + # Fallback to netcat if Python is not available + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\n\r\n$(cat ${PROMETHEUS_METRICS_FILE} 2>/dev/null || echo '# No metrics available')" | \ + nc -l -p "${PROMETHEUS_PORT}" -w 1 + fi + done + ) & + + PROMETHEUS_SERVER_PID=$! + echo "${PROMETHEUS_SERVER_PID}" > /tmp/prometheus_server.pid + print_notice "Prometheus server started with PID ${PROMETHEUS_SERVER_PID}" + fi + + if var_true "${DEBUG_PROMETHEUS}" ; then debug off; fi +} + +# Stop Prometheus HTTP server +stop_prometheus_server() { + if [ -f /tmp/prometheus_server.pid ]; then + local pid=$(cat /tmp/prometheus_server.pid) + if kill -0 "${pid}" 2>/dev/null; then + kill "${pid}" + print_notice "Prometheus server stopped (PID: ${pid})" + fi + rm -f /tmp/prometheus_server.pid + fi +} + +# Add metric to Prometheus file (for counters - always increment) +add_prometheus_counter() { + local metric_name="$1" + local metric_value="$2" + local labels="$3" + + if var_true "${CONTAINER_ENABLE_MONITORING}" && [ "${CONTAINER_MONITORING_BACKEND,,}" = "prometheus" ]; then + # Use lock to prevent concurrent writes + ( + flock -x 200 + local temp_file="${PROMETHEUS_METRICS_FILE}.tmp" + + # Create full metric name with labels + local full_metric_name + if [ -n "${labels}" ]; then + full_metric_name="${metric_name}{${labels}}" + else + full_metric_name="${metric_name}" + fi + + # Get current value of the counter + local current_value=0 + if [ -f "${PROMETHEUS_METRICS_FILE}" ]; then + current_value=$(grep "^${full_metric_name} " "${PROMETHEUS_METRICS_FILE}" | tail -1 | awk '{print $2}' 2>/dev/null || echo "0") + # Remove existing counter metric + grep -v "^${full_metric_name} " "${PROMETHEUS_METRICS_FILE}" > "${temp_file}" 2>/dev/null || true + mv "${temp_file}" "${PROMETHEUS_METRICS_FILE}" + fi + + # Calculate new value (increment by metric_value) + local new_value=$((current_value + metric_value)) + + # Add updated counter metric + echo "${full_metric_name} ${new_value}" >> "${PROMETHEUS_METRICS_FILE}" + ) 200>"${PROMETHEUS_METRICS_LOCK}" + fi +} + +# Update metric in Prometheus file (for gauges - replace existing value) +update_prometheus_gauge() { + local metric_name="$1" + local metric_value="$2" + local labels="$3" + + if var_true "${CONTAINER_ENABLE_MONITORING}" && [ "${CONTAINER_MONITORING_BACKEND,,}" = "prometheus" ]; then + # Use lock to prevent concurrent writes + ( + flock -x 200 + local temp_file="${PROMETHEUS_METRICS_FILE}.tmp" + + # Create full metric name with labels + local full_metric_name + if [ -n "${labels}" ]; then + full_metric_name="${metric_name}{${labels}}" + else + full_metric_name="${metric_name}" + fi + + # Remove existing metric with same name and labels, then add new one + if [ -f "${PROMETHEUS_METRICS_FILE}" ]; then + grep -v "^${full_metric_name} " "${PROMETHEUS_METRICS_FILE}" > "${temp_file}" 2>/dev/null || true + mv "${temp_file}" "${PROMETHEUS_METRICS_FILE}" + fi + + # Add new metric + echo "${full_metric_name} ${metric_value}" >> "${PROMETHEUS_METRICS_FILE}" + ) 200>"${PROMETHEUS_METRICS_LOCK}" + fi +} + +# Update backup status metric (gauge - replace existing value) +update_backup_status_metric() { + local db_host="$1" + local db_name="$2" + local status="$3" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + update_prometheus_gauge "dbbackup_backup_status" "${status}" "${labels}" +} + +# Update backup duration metric (gauge - replace existing value) +update_backup_duration_metric() { + local db_host="$1" + local db_name="$2" + local duration="$3" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + update_prometheus_gauge "dbbackup_backup_duration_seconds" "${duration}" "${labels}" +} + +# Update backup size metric (gauge - replace existing value) +update_backup_size_metric() { + local db_host="$1" + local db_name="$2" + local size="$3" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + update_prometheus_gauge "dbbackup_backup_size_bytes" "${size}" "${labels}" +} + +# Update backup timestamp metric (gauge - replace existing value) +update_backup_timestamp_metric() { + local db_host="$1" + local db_name="$2" + local timestamp="$3" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + update_prometheus_gauge "dbbackup_backup_timestamp" "${timestamp}" "${labels}" +} + +# Update compression duration metric (gauge - replace existing value) +update_compression_duration_metric() { + local db_host="$1" + local db_name="$2" + local duration="$3" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + update_prometheus_gauge "dbbackup_compression_duration_seconds" "${duration}" "${labels}" +} + +# Update encryption duration metric (gauge - replace existing value) +update_encryption_duration_metric() { + local db_host="$1" + local db_name="$2" + local duration="$3" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + update_prometheus_gauge "dbbackup_encryption_duration_seconds" "${duration}" "${labels}" +} + +# Update checksum duration metric (gauge - replace existing value) +update_checksum_duration_metric() { + local db_host="$1" + local db_name="$2" + local duration="$3" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + update_prometheus_gauge "dbbackup_checksum_duration_seconds" "${duration}" "${labels}" +} + +# Update upload duration metric (gauge - replace existing value) +update_upload_duration_metric() { + local db_host="$1" + local db_name="$2" + local duration="$3" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + update_prometheus_gauge "dbbackup_upload_duration_seconds" "${duration}" "${labels}" +} + +# Update database availability metric (gauge - replace existing value) +update_database_availability_metric() { + local db_host="$1" + local db_name="$2" + local db_type="$3" + local availability="$4" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\",db_type=\"${db_type}\"" + + update_prometheus_gauge "dbbackup_database_availability" "${availability}" "${labels}" +} + +# Increment job counter (counter - always increment) +increment_job_counter() { + local db_host="$1" + local db_name="$2" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + add_prometheus_counter "dbbackup_jobs_total" "1" "${labels}" +} + +# Increment failed job counter (counter - always increment) +increment_failed_job_counter() { + local db_host="$1" + local db_name="$2" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + add_prometheus_counter "dbbackup_jobs_failed_total" "1" "${labels}" +} + +# Increment successful job counter (counter - always increment) +increment_successful_job_counter() { + local db_host="$1" + local db_name="$2" + local labels="db_host=\"${db_host}\",db_name=\"${db_name}\"" + + add_prometheus_counter "dbbackup_jobs_success_total" "1" "${labels}" +} + +# Clean old metrics (keep only headers and unique metrics) +cleanup_prometheus_metrics() { + if [ -f "${PROMETHEUS_METRICS_FILE}" ]; then + local line_count=$(wc -l < "${PROMETHEUS_METRICS_FILE}") + if [ "${line_count}" -gt 500 ]; then + # Keep headers and remove duplicates, keeping only the latest value for each metric + ( + flock -x 200 + local temp_file="${PROMETHEUS_METRICS_FILE}.tmp" + + # Keep headers (lines starting with #) + grep "^#" "${PROMETHEUS_METRICS_FILE}" > "${temp_file}" + + # For each unique metric name, keep only the last occurrence + # This ensures counters keep their accumulated values + grep -v "^#" "${PROMETHEUS_METRICS_FILE}" | \ + awk -F'[{} ]' '{ + # Extract metric name and labels + metric_name = $1 + if ($2 != "") { + metric_name = metric_name "{" $2 + } + # Store the latest value for each metric + # For counters, this will be the highest accumulated value + metrics[metric_name] = $0 + } END { + # Output all unique metrics + for (metric in metrics) { + print metrics[metric] + } + }' >> "${temp_file}" + + mv "${temp_file}" "${PROMETHEUS_METRICS_FILE}" + ) 200>"${PROMETHEUS_METRICS_LOCK}" + fi + fi +} diff --git a/install/assets/functions/10-db-backup b/install/assets/functions/10-db-backup index 63aec237..cddbb94d 100644 --- a/install/assets/functions/10-db-backup +++ b/install/assets/functions/10-db-backup @@ -947,6 +947,10 @@ backup_sqlite3() { check_availability() { if var_true "${DEBUG_CHECK_AVAILABILITY}" ; then debug on; fi + + # Initialize availability as unavailable (0) + local availability_status=0 + if var_false "${backup_job_skip_availability_check}" ; then case "${dbtype}" in "couch" ) @@ -957,7 +961,10 @@ check_availability() { fi while [ "${code_received}" != "200" ]; do code_received=$(run_as_user curl -XGET -sSL -o /dev/null -L -w ''%{http_code}'' ${_ca_couch_auth} ${backup_job_db_host}:${backup_job_db_port}) - if [ "${code_received}" = "200" ] ; then break ; fi + if [ "${code_received}" = "200" ] ; then + availability_status=1 + break + fi sleep 5 (( counter+=5 )) write_log warn "CouchDB Host '${backup_job_db_host}' is not accessible, retrying.. ($counter seconds so far)" @@ -972,12 +979,16 @@ check_availability() { (( counter+=5 )) write_log warn "InfluxDB Host '${backup_job_db_host#*//}' is not accessible, retrying.. ($counter seconds so far)" done + availability_status=1 ;; 2 ) code_received=0 while [ "${code_received}" != "200" ]; do code_received=$(run_as_user curl -XGET -sSL -o /dev/null -w ''%{http_code}'' ${backup_job_db_host}:${backup_job_db_port}/health) - if [ "${code_received}" = "200" ] ; then break ; fi + if [ "${code_received}" = "200" ] ; then + availability_status=1 + break + fi sleep 5 (( counter+=5 )) write_log warn "InfluxDB Host '${backup_job_db_host}' is not accessible, retrying.. ($counter seconds so far)" @@ -988,6 +999,7 @@ check_availability() { "mongo" ) if [ -n "${backup_job_mongo_custom_uri}" ] ; then write_log debug "Skipping Connectivity Check" + availability_status=1 else counter=0 while ! (run_as_user nc -z ${backup_job_db_host} ${backup_job_db_port}) ; do @@ -995,6 +1007,7 @@ check_availability() { (( counter+=5 )) write_log warn "Mongo Host '${backup_job_db_host}' is not accessible, retrying.. ($counter seconds so far)" done + availability_status=1 fi ;; "mysql" ) @@ -1005,6 +1018,7 @@ check_availability() { (( counter+=5 )) write_log warn "MySQL/MariaDB Server '${backup_job_db_host}' is not accessible, retrying.. (${counter} seconds so far)" done + availability_status=1 ;; "mssql" ) counter=0 @@ -1013,6 +1027,7 @@ check_availability() { (( counter+=5 )) write_log warn "MSSQL Host '${backup_job_db_host}' is not accessible, retrying.. ($counter seconds so far)" done + availability_status=1 ;; "pgsql" ) counter=0 @@ -1022,6 +1037,7 @@ check_availability() { (( counter+=5 )) write_log warn "Postgres Host '${backup_job_db_host}' is not accessible, retrying.. ($counter seconds so far)" done + availability_status=1 ;; "redis" ) counter=0 @@ -1030,6 +1046,7 @@ check_availability() { (( counter+=5 )) write_log warn "Redis Host '${backup_job_db_host}' is not accessible, retrying.. ($counter seconds so far)" done + availability_status=1 ;; "sqlite3" ) if [[ ! -e "${backup_job_db_host}" ]]; then @@ -1044,10 +1061,20 @@ check_availability() { write_log error "File '${backup_job_db_host}' is not readable." exit_code=2 exit $exit_code + else + availability_status=1 fi ;; esac fi + + # Send database availability metric to Prometheus if enabled + if var_true "${CONTAINER_ENABLE_MONITORING}" && [ "${CONTAINER_MONITORING_BACKEND,,}" = "prometheus" ]; then + source /assets/functions/08-prometheus + source /assets/defaults/08-prometheus + update_database_availability_metric "${backup_job_db_host}" "${1}" "${dbtype}" "${availability_status}" + fi + if var_true "${DEBUG_CHECK_AVAILABILITY}" ; then debug off; fi } @@ -1283,6 +1310,13 @@ EOF ctrl_c() { sed -i "/^${backup_instance_number}/d" /tmp/.container/db-backup-backups symlink_log + + # Stop Prometheus server if running + if var_true "${CONTAINER_ENABLE_MONITORING}" && [ "${CONTAINER_MONITORING_BACKEND,,}" = "prometheus" ]; then + source /assets/functions/08-prometheus + stop_prometheus_server + fi + print_warn "User aborted" exit } @@ -1779,6 +1813,60 @@ EOZP rm -rf "${zabbix_payload}" fi + # Send metrics to Prometheus if enabled + if var_true "${CONTAINER_ENABLE_MONITORING}" && [ "${CONTAINER_MONITORING_BACKEND,,}" = "prometheus" ]; then + source /assets/functions/08-prometheus + source /assets/defaults/08-prometheus + print_notice "Sending Backup Statistics to Prometheus" + + # Increment job counter + increment_job_counter "${backup_job_db_host}" "${1}" + + # Update status metric (0=success, 1=failed) + if [ "${exit_code}" = "0" ]; then + increment_successful_job_counter "${backup_job_db_host}" "${1}" + update_backup_status_metric "${backup_job_db_host}" "${1}" "0" + else + increment_failed_job_counter "${backup_job_db_host}" "${1}" + update_backup_status_metric "${backup_job_db_host}" "${1}" "1" + fi + + # Update duration metric + update_backup_duration_metric "${backup_job_db_host}" "${1}" "${dbbackup_total_time}" + + # Update size metric (convert to bytes if needed) + local size_in_bytes="${filesize}" + if [ -n "${size_in_bytes}" ] && [ "${size_in_bytes}" != "0" ]; then + update_backup_size_metric "${backup_job_db_host}" "${1}" "${size_in_bytes}" + fi + + # Update timestamp metric + update_backup_timestamp_metric "${backup_job_db_host}" "${1}" "${dbbackup_finish_time}" + + # Update compression duration if available + if [ -n "${compression_time}" ] && [ "${compression_time}" != "0" ]; then + update_compression_duration_metric "${backup_job_db_host}" "${1}" "${compression_time}" + fi + + # Update encryption duration if available + if [ -n "${encryption_time}" ] && [ "${encryption_time}" != "0" ]; then + update_encryption_duration_metric "${backup_job_db_host}" "${1}" "${encryption_time}" + fi + + # Update checksum duration if available + if [ -n "${checksum_time}" ] && [ "${checksum_time}" != "0" ]; then + update_checksum_duration_metric "${backup_job_db_host}" "${1}" "${checksum_time}" + fi + + # Update upload duration if available + if [ -n "${upload_time}" ] && [ "${upload_time}" != "0" ]; then + update_upload_duration_metric "${backup_job_db_host}" "${1}" "${upload_time}" + fi + + # Cleanup old metrics periodically + cleanup_prometheus_metrics + fi + ### Post Script Support if [ -n "${backup_job_post_script}" ] ; then if var_true "${backup_job_post_script_x_verify}" ; then diff --git a/install/etc/cont-init.d/03-monitoring b/install/etc/cont-init.d/03-monitoring new file mode 100644 index 00000000..11eda48f --- /dev/null +++ b/install/etc/cont-init.d/03-monitoring @@ -0,0 +1,191 @@ +#!/command/with-contenv bash + +source /assets/functions/00-container +output_off +prepare_service single +PROCESS_NAME="monitoring" + +## Legacy +if var_false "${ENABLE_ZABBIX}"; then CONTAINER_ENABLE_MONITORING=FALSE ; fi +if var_true "${ENABLE_ZABBIX}"; then + CONTAINER_ENABLE_MONITORING=TRUE + CONTAINER_MONITORING_BACKEND=zabbix +fi +## + +if var_true "${CONTAINER_ENABLE_MONITORING}" ; then + case "${CONTAINER_MONITORING_BACKEND,,}" in + "zabbix" ) + print_debug "Using Zabbix Backend" + os=$(cat /etc/os-release |grep ^ID= | cut -d = -f2) + if var_true "${DEBUG_MODE}" ; then + ZABBIX_DEBUGLEVEL=4 + fi + + if var_true "${ZABBIX_USER_SUDO}" ; then + grant_sudo zabbix + fi + + if var_true "${ZABBIX_USER_DOAS}" ; then + grant_doas zabbix + fi + + if [ -n "${ZABBIX_ENCRYPT_PSK_ID}" ] || [ -n "${ZABBIX_ENCRYPT_PSK_KEY}" ] || [ -n "${ZABBIX_ENCRYPT_PSK_FILE}" ] ; then + transform_file_var \ + ZABBIX_ENCRYPT_PSK_ID \ + ZABBIX_ENCRYPT_PSK_KEY + + print_debug "Zabbix Agent - Using PSK Encryption" + if [ ! -n "${ZABBIX_ENCRYPT_PSK_FILE}" ] && [ ! -n "${ZABBIX_ENCRYPT_PSK_KEY}" ] ; then + print_error "You've selected Zabbix Agent PSK Encryption but haven't supplied a file or a Key!" + exit 1 + fi + + if [ ! -n "${ZABBIX_ENCRYPT_PSK_FILE}" ] && [ -n "${ZABBIX_ENCRYPT_PSK_KEY}" ] ; then + print_debug "Zabbix Agent - Only have PSK via ENV Var (Automated creation of file)" + # libressl/openssl rand -hex 32 = 256bit + mkdir -p "${ZABBIX_CERT_PATH}" + ZABBIX_ENCRYPT_PSK_FILE="zabbix_agent.psk" + echo "${ZABBIX_ENCRYPT_PSK_KEY}" > "${ZABBIX_CERT_PATH}"/"${ZABBIX_ENCRYPT_PSK_FILE}" + fi + + chmod -f 0600 "${ZABBIX_CERT_PATH}"/"${ZABBIX_ENCRYPT_PSK_FILE}" + chown -f -R "${ZABBIX_USER}" "${ZABBIX_CERT_PATH}" + tls_psk=$(cat< "${ZABBIX_CONFIG_PATH}"/"${ZABBIX_CONFIG_FILE}" +# Custom Generated Zabbix Agent configuration for version 1. If you want to use your own config set 'ZABBIX_SETUP_TYPE=MANUAL' +# Generated on $(TZ=${TIMEZONE} date +'%Y-%m-%d %H:%M:%S %Z') + +AllowKey=system.run[${ZABBIX_REMOTECOMMANDS_ALLOW}] +AllowRoot=${ZABBIX_ALLOW_ROOT} +BufferSend=${ZABBIX_BUFFER_SEND} +BufferSize=${ZABBIX_BUFFER_SIZE} +DebugLevel=${ZABBIX_DEBUGLEVEL} +Hostname=${ZABBIX_HOSTNAME} +ListenIP=${ZABBIX_LISTEN_IP} +ListenPort=${ZABBIX_LISTEN_PORT} +LogFile=${ZABBIX_AGENT_LOG_PATH}/${ZABBIX_AGENT_LOG_FILE} +LogFileSize=${ZABBIX_LOG_FILE_SIZE} +LogRemoteCommands=${ZABBIX_REMOTECOMMANDS_LOG} +MaxLinesPerSecond=${ZABBIX_MAXLINES_SECOND} +Timeout=${ZABBIX_AGENT_TIMEOUT} +PidFile=${ZABBIX_PID} +RefreshActiveChecks=${ZABBIX_REFRESH_ACTIVE_CHECKS} +Server=${ZABBIX_SERVER} +ServerActive=${ZABBIX_SERVER_ACTIVE} +StartAgents=${ZABBIX_START_AGENTS} +User=${ZABBIX_USER} +${tls_psk} +Include=${ZABBIX_CONFIG_PATH}/${ZABBIX_CONFIG_FILE}.d/*.conf +EOF + else + print_debug "Skipping Configuring Zabbix classic Agent" + fi + ;; + "2" | "m" | "n" | "modern" | "new" ) + case ${os} in + "alpine" ) + osver=$(cat /etc/os-release | grep VERSION_ID | cut -d = -f 2 | cut -d . -f 2 | cut -d _ -f 1) + if [ "${osver}" -ge 11 ] || [ "$osver" = "edge" ] || [ "$osver" = "17*" ]; then + print_debug "We are using newer than Alpine 3.11 to be able to use the Zabbix Modern client.." + else + print_error "Sorry, Modern version not available for this images version (Alpine 3.11 + only)" + exit 1 + fi + ;; + esac + logship_version="modern" + if [ "${ZABBIX_SETUP_TYPE,,}" = "auto" ] ; then + print_debug "Configuring Zabbix modern Agent" + cat < "${ZABBIX_CONFIG_PATH}"/"${ZABBIX_CONFIG_FILE}" +# Custom Generated Zabbix Agent configuration for version 2 If you want to use your own config set 'ZABBIX_SETUP_TYPE=MANUAL' +# Generated on $(TZ=${TIMEZONE} date +'%Y-%m-%d %H:%M:%S %Z') + +AllowKey=system.run[${ZABBIX_REMOTECOMMANDS_ALLOW}] +BufferSend=${ZABBIX_BUFFER_SEND} +BufferSize=${ZABBIX_BUFFER_SIZE} +ControlSocket=${ZABBIX_SOCKET} +DebugLevel=${ZABBIX_DEBUGLEVEL} +Hostname=${ZABBIX_HOSTNAME} +ListenPort=${ZABBIX_LISTEN_PORT} +LogFile=${ZABBIX_AGENT_LOG_PATH}/${ZABBIX_AGENT_LOG_FILE} +LogFileSize=${ZABBIX_LOG_FILE_SIZE} +RefreshActiveChecks=${ZABBIX_REFRESH_ACTIVE_CHECKS} +Server=${ZABBIX_SERVER} +ServerActive=${ZABBIX_SERVER_ACTIVE} +StatusPort=${ZABBIX_STATUS_PORT} +Timeout=${ZABBIX_AGENT_TIMEOUT} +PidFile=${ZABBIX_PID} +${tls_psk} +Include=${ZABBIX_CONFIG_PATH}/${ZABBIX_CONFIG_FILE}.d/*.conf +EOF + else + print_debug "Skipping Configuring Zabbix modern Agent" + fi + ;; + esac + + if [ -n "${ZABBIX_REMOTECOMMANDS_DENY}" ]; then + echo "DenyKey=system.run[${ZABBIX_REMOTECOMMANDS_DENY}]" >> "${ZABBIX_CONFIG_PATH}"/"${ZABBIX_CONFIG_FILE}" + fi + + print_debug "Zabbix Agent - Generating Collection Configuration" + case ${os} in + "alpine" ) + updated_packages="UserParameter=packages.upgradable,doas apk update >/dev/null && apk version | sed '/Installed/d' | wc -l" + ;; + "debian" | "ubuntu" ) + updated_packages="UserParameter=packages.upgradable,doas apt-get update >/dev/null && doas aptitude search '~U' | wc -l" + ;; + esac + + mkdir -p "${ZABBIX_CONFIG_PATH}"/"${ZABBIX_CONFIG_FILE}".d + cat < "${ZABBIX_CONFIG_PATH}"/"${ZABBIX_CONFIG_FILE}".d/tiredofit-container_agent.conf +# Zabbix Container Agent Configuration - Automatically Generated based on Operating System +# Find Companion Zabbix Server Templates at https://github.com/tiredofit/docker-alpine or https://github.com/tiredofit/docker-debian +# Autoregister=toica + +## Report on Container OS +UserParameter=agent.os,grep "PRETTY_NAME" /etc/os-release | cut -d '"' -f2 + +## Report on packages that are available to be upgraded +${updated_packages} +EOF + + mkdir -p "${ZABBIX_AGENT_LOG_PATH}" + chown -R "${ZABBIX_USER}":root "${ZABBIX_AGENT_LOG_PATH}" + chown -R "${ZABBIX_USER}":root "${ZABBIX_CONFIG_PATH}" + chown -R "${ZABBIX_USER}":root /var/lib/zabbix + chmod 775 "${ZABBIX_CONFIG_PATH}" + create_logrotate zabbix-agent "${ZABBIX_AGENT_LOG_PATH}"/"${ZABBIX_AGENT_LOG_FILE}" zabbix-agent-${logship_version} "${ZABBIX_USER}" root + print_notice "Container configured for monitoring with '${CONTAINER_MONITORING_BACKEND} ${ZABBIX_AGENT_TYPE}'" + ;; + "prometheus" ) + print_debug "Using Prometheus Backend" + print_notice "Container configured for monitoring with '${CONTAINER_MONITORING_BACKEND}'" + ;; + * ) + print_error "Unknown Monitoring Backend" + exit 1 + ;; + esac +else + service_stop "$(basename "$0")" +fi + +liftoff +output_on \ No newline at end of file diff --git a/install/etc/cont-init.d/10-db-backup b/install/etc/cont-init.d/10-db-backup old mode 100755 new mode 100644 index 23c855bc..9f02f789 --- a/install/etc/cont-init.d/10-db-backup +++ b/install/etc/cont-init.d/10-db-backup @@ -11,4 +11,12 @@ db_backup_container_init create_schedulers backup create_zabbix dbbackup4 -liftoff +# Initialize Prometheus if enabled +if var_true "${CONTAINER_ENABLE_MONITORING}" && [ "${CONTAINER_MONITORING_BACKEND,,}" = "prometheus" ]; then + source /assets/functions/08-prometheus + source /assets/defaults/08-prometheus + init_prometheus_metrics + start_prometheus_server +fi + +liftoff \ No newline at end of file