|
| 1 | +""" |
| 2 | +Config for the openshift metrics. |
| 3 | +All values are set in the .env file |
| 4 | +All variables in the .env file are lexicographically identical to the python variables below |
| 5 | +""" |
| 6 | + |
| 7 | +import os |
| 8 | +from datetime import datetime, timedelta |
| 9 | + |
| 10 | +# ============================================================================= |
| 11 | +# HARDCODED CONSTANTS (rarely change, application-specific) |
| 12 | +# ============================================================================= |
| 13 | + |
| 14 | +# Prometheus query strings |
| 15 | +PROMETHEUS_QUERIES = { |
| 16 | + "CPU_REQUEST": 'kube_pod_resource_request{resource="cpu", node!=""} unless on(pod, namespace) kube_pod_status_unschedulable', |
| 17 | + "MEMORY_REQUEST": 'kube_pod_resource_request{resource="memory", node!=""} unless on(pod, namespace) kube_pod_status_unschedulable', |
| 18 | + "GPU_REQUEST": 'kube_pod_resource_request{resource=~"nvidia.com.*", node!=""} unless on(pod, namespace) kube_pod_status_unschedulable', |
| 19 | + "KUBE_NODE_LABELS": 'kube_node_labels{label_nvidia_com_gpu_product!=""}', |
| 20 | + "KUBE_POD_LABELS": 'kube_pod_labels{label_nerc_mghpcc_org_class!=""}', |
| 21 | +} |
| 22 | + |
| 23 | +# Cluster name mappings |
| 24 | +CLUSTER_NAME_MAPPING = { |
| 25 | + "https://thanos-querier-openshift-monitoring.apps.shift.nerc.mghpcc.org": "ocp-prod", |
| 26 | + "https://thanos-querier-openshift-monitoring.apps.ocp-test.nerc.mghpcc.org": "ocp-test", |
| 27 | + "https://thanos-querier-openshift-monitoring.apps.edu.nerc.mghpcc.org": "academic", |
| 28 | +} |
| 29 | + |
| 30 | +# Default values for empty fields |
| 31 | +DEFAULT_VALUES = { |
| 32 | + "UNKNOWN_NODE": "Unknown Node", |
| 33 | + "UNKNOWN_MODEL": "Unknown Model", |
| 34 | + "EMPTY_STRING": "", |
| 35 | +} |
| 36 | + |
| 37 | +# ============================================================================= |
| 38 | +# BUSINESS LOGIC CONSTANTS |
| 39 | +# ============================================================================= |
| 40 | +# Note: Business logic constants (GPU types, SU types, etc.) are now in constants.py |
| 41 | +# This file only contains truly configurable values that change between deployments |
| 42 | + |
| 43 | +# ============================================================================= |
| 44 | +# INFRASTRUCTURE CONFIGURATION |
| 45 | +# ============================================================================= |
| 46 | + |
| 47 | +# OpenShift/Prometheus |
| 48 | +OPENSHIFT_PROMETHEUS_URL = os.getenv("OPENSHIFT_PROMETHEUS_URL") |
| 49 | +OPENSHIFT_TOKEN = os.getenv("OPENSHIFT_TOKEN") |
| 50 | + |
| 51 | +# S3 Configuration |
| 52 | +S3_ENDPOINT_URL = os.getenv( |
| 53 | + "S3_OUTPUT_ENDPOINT_URL", "https://s3.us-east-005.backblazeb2.com" |
| 54 | +) |
| 55 | +S3_ACCESS_KEY_ID = os.getenv("S3_OUTPUT_ACCESS_KEY_ID") |
| 56 | +S3_SECRET_ACCESS_KEY = os.getenv("S3_OUTPUT_SECRET_ACCESS_KEY") |
| 57 | +S3_INVOICE_BUCKET = os.getenv("S3_INVOICE_BUCKET", "nerc-invoicing") |
| 58 | +S3_METRICS_BUCKET = os.getenv("S3_METRICS_BUCKET", "openshift_metrics") |
| 59 | + |
| 60 | +# ============================================================================= |
| 61 | +# PROCESSING CONFIGURATION |
| 62 | +# ============================================================================= |
| 63 | + |
| 64 | +# Metrics processing |
| 65 | +INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", "15")) |
| 66 | +STEP_MINUTES = int(os.getenv("STEP_MINUTES", "15")) |
| 67 | +GPU_MAPPING_FILE = os.getenv("GPU_MAPPING_FILE", "gpu_node_map.json") |
| 68 | + |
| 69 | +# HTTP retry configuration |
| 70 | +HTTP_RETRY_CONFIG = { |
| 71 | + "total": int(os.getenv("HTTP_RETRY_TOTAL", "3")), |
| 72 | + "backoff_factor": int(os.getenv("HTTP_RETRY_BACKOFF_FACTOR", "1")), |
| 73 | + "status_forcelist": [429, 500, 502, 503, 504], |
| 74 | +} |
| 75 | + |
| 76 | +# ============================================================================= |
| 77 | +# REPORT CONFIGURATION (formerly CLI arguments) |
| 78 | +# ============================================================================= |
| 79 | + |
| 80 | +# Report dates (with defaults) |
| 81 | +REPORT_START_DATE = os.getenv( |
| 82 | + "REPORT_START_DATE", (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") |
| 83 | +) |
| 84 | +REPORT_END_DATE = os.getenv( |
| 85 | + "REPORT_END_DATE", (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") |
| 86 | +) |
| 87 | + |
| 88 | +# Upload configuration |
| 89 | +UPLOAD_TO_S3 = os.getenv("UPLOAD_TO_S3", "false").lower() == "true" |
| 90 | + |
| 91 | +# File configuration |
| 92 | +OUTPUT_FILE = os.getenv("OUTPUT_FILE") |
| 93 | +INVOICE_FILE = os.getenv("INVOICE_FILE") |
| 94 | +POD_REPORT_FILE = os.getenv("POD_REPORT_FILE") |
| 95 | +CLASS_INVOICE_FILE = os.getenv("CLASS_INVOICE_FILE") |
| 96 | + |
| 97 | +# Ignore hours configuration (comma-separated timestamp ranges) |
| 98 | +IGNORE_HOURS = os.getenv("IGNORE_HOURS", "") |
| 99 | + |
| 100 | +# ============================================================================= |
| 101 | +# RATES AND BILLING CONFIGURATION |
| 102 | +# ============================================================================= |
| 103 | + |
| 104 | +# Rate source configuration |
| 105 | +USE_NERC_RATES = os.getenv("USE_NERC_RATES", "false").lower() == "true" |
| 106 | + |
| 107 | +# Individual rates (Decimal values) |
| 108 | +RATE_CPU_SU = os.getenv("RATE_CPU_SU") |
| 109 | +RATE_GPU_V100_SU = os.getenv("RATE_GPU_V100_SU") |
| 110 | +RATE_GPU_A100SXM4_SU = os.getenv("RATE_GPU_A100SXM4_SU") |
| 111 | +RATE_GPU_A100_SU = os.getenv("RATE_GPU_A100_SU") |
| 112 | +RATE_GPU_H100_SU = os.getenv("RATE_GPU_H100_SU") |
| 113 | + |
| 114 | +# Legacy rates dictionary (for backward compatibility) |
| 115 | +# Note: This would need to import constants if used, but it's marked as legacy |
| 116 | +RATES = { |
| 117 | + # "NVIDIA-A100-40GB": Decimal(os.getenv("GPU_A100_RATE")) if os.getenv("GPU_A100_RATE") else None, |
| 118 | +} |
| 119 | + |
| 120 | +# ============================================================================= |
| 121 | +# BUSINESS LOGIC CONFIGURATION |
| 122 | +# ============================================================================= |
| 123 | + |
| 124 | +# Namespaces that support class-based reporting |
| 125 | +NAMESPACES_WITH_CLASSES = os.getenv("NAMESPACES_WITH_CLASSES", "rhods-notebooks").split( |
| 126 | + "," |
| 127 | +) |
| 128 | + |
| 129 | +# Default filename patterns |
| 130 | +DEFAULT_FILENAME_PATTERNS = { |
| 131 | + "INVOICE_FILE": "NERC OpenShift {report_month}.csv", |
| 132 | + "POD_REPORT_FILE": "Pod NERC OpenShift {report_month}.csv", |
| 133 | + "CLASS_INVOICE_FILE": "NERC OpenShift Classes {report_month}.csv", |
| 134 | + "OUTPUT_FILE_SINGLE": "metrics-{report_date}.json", |
| 135 | + "OUTPUT_FILE_RANGE": "metrics-{start_date}-to-{end_date}.json", |
| 136 | +} |
0 commit comments