CCI-MOC
diff --git a/‎openshift_metrics/.env.example‎
Lines changed: 103 additions & 0 deletions b/‎openshift_metrics/.env.example‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎openshift_metrics/invoice.py‎
Lines changed: 23 additions & 22 deletions b/‎openshift_metrics/invoice.py‎
Lines changed: 23 additions & 22 deletions
diff --git a/‎openshift_metrics/merge.py‎
Lines changed: 20 additions & 12 deletions b/‎openshift_metrics/merge.py‎
Lines changed: 20 additions & 12 deletions
diff --git a/‎openshift_metrics/metrics_processor.py‎
Lines changed: 12 additions & 8 deletions b/‎openshift_metrics/metrics_processor.py‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎openshift_metrics/openshift_prometheus_metrics.py‎
Lines changed: 22 additions & 18 deletions b/‎openshift_metrics/openshift_prometheus_metrics.py‎
Lines changed: 22 additions & 18 deletions
@@ -0,0 +1,103 @@
+# =============================================================================
+# OPENSHIFT METRICS CONFIGURATION
+# =============================================================================
+# This file contains all configuration options for the openshift metrics system.
+# All variables are lexicographically identical to the python variables in config.py
+# 
+# TO USE: Copy this file to .env in the openshift_metrics directory
+# Command: cp env_config.txt .env
+# Then modify the values as needed for your environment.
+
+# =============================================================================
+# INFRASTRUCTURE CONFIGURATION
+# =============================================================================
+
+# OpenShift/Prometheus Configuration
+OPENSHIFT_PROMETHEUS_URL=https://thanos-querier-openshift-monitoring.apps.shift.nerc.mghpcc.org
+OPENSHIFT_TOKEN=your_openshift_token_here
+
+# S3 Configuration
+S3_OUTPUT_ENDPOINT_URL=https://s3.us-east-005.backblazeb2.com
+S3_OUTPUT_ACCESS_KEY_ID=your_s3_access_key_here
+S3_OUTPUT_SECRET_ACCESS_KEY=your_s3_secret_key_here
+S3_INVOICE_BUCKET=nerc-invoicing
+S3_METRICS_BUCKET=openshift_metrics
+
+# =============================================================================
+# PROCESSING CONFIGURATION
+# =============================================================================
+
+# Metrics processing intervals
+INTERVAL_MINUTES=15
+STEP_MINUTES=15
+GPU_MAPPING_FILE=gpu_node_map.json
+
+# HTTP retry configuration
+HTTP_RETRY_TOTAL=3
+HTTP_RETRY_BACKOFF_FACTOR=1
+
+# =============================================================================
+# REPORT CONFIGURATION (formerly CLI arguments)
+# =============================================================================
+
+# Report dates (leave empty to use defaults - yesterday)
+REPORT_START_DATE=
+REPORT_END_DATE=
+
+# Upload configuration
+UPLOAD_TO_S3=false
+
+# File configuration (leave empty to use default naming patterns)
+OUTPUT_FILE=
+INVOICE_FILE=
+POD_REPORT_FILE=
+CLASS_INVOICE_FILE=
+
+# Ignore hours configuration (comma-separated timestamp ranges)
+# Format: YYYY-MM-DDTHH:MM:SS,YYYY-MM-DDTHH:MM:SS
+IGNORE_HOURS=
+
+# =============================================================================
+# RATES AND BILLING CONFIGURATION
+# =============================================================================
+
+# Rate source configuration
+USE_NERC_RATES=false
+
+# Individual rates (Decimal values)
+RATE_CPU_SU=0.013
+RATE_GPU_V100_SU=1.214
+RATE_GPU_A100SXM4_SU=2.078
+RATE_GPU_A100_SU=1.803
+RATE_GPU_H100_SU=6.04
+
+# Legacy rate (for backward compatibility)
+GPU_A100_RATE=1.803
+
+# =============================================================================
+# BUSINESS LOGIC CONFIGURATION
+# =============================================================================
+
+# Namespaces that support class-based reporting (comma-separated)
+NAMESPACES_WITH_CLASSES=rhods-notebooks
+
+# =============================================================================
+# GPU CONFIGURATION
+# =============================================================================
+
+# GPU types
+GPU_A100=NVIDIA-A100-40GB
+GPU_A100_SXM4=NVIDIA-A100-SXM4-40GB
+GPU_V100=Tesla-V100-PCIE-32GB
+GPU_H100=NVIDIA-H100-80GB-HBM3
+
+# GPU Resource - MIG Geometries
+MIG_1G_5GB=nvidia.com/mig-1g.5gb
+MIG_2G_10GB=nvidia.com/mig-2g.10gb
+MIG_3G_20GB=nvidia.com/mig-3g.20gb
+WHOLE_GPU=nvidia.com/gpu
+
+# VM GPU Resources
+VM_GPU_H100=nvidia.com/H100_SXM5_80GB
+VM_GPU_A100_SXM4=nvidia.com/A100_SXM4_40GB
+VM_GPU_V100=nvidia.com/GV100GL_Tesla_V100
@@ -5,34 +5,35 @@
 from decimal import Decimal, ROUND_HALF_UP
 import datetime
 
-# GPU types
-GPU_A100 = "NVIDIA-A100-40GB"
-GPU_A100_SXM4 = "NVIDIA-A100-SXM4-40GB"
-GPU_V100 = "Tesla-V100-PCIE-32GB"
-GPU_H100 = "NVIDIA-H100-80GB-HBM3"
-GPU_UNKNOWN_TYPE = "GPU_UNKNOWN_TYPE"
+from openshift_metrics import config, constants
+
+# Import constants from centralized constants module
+GPU_A100 = constants.GPU_A100
+GPU_A100_SXM4 = constants.GPU_A100_SXM4
+GPU_V100 = constants.GPU_V100
+GPU_H100 = constants.GPU_H100
+GPU_UNKNOWN_TYPE = constants.GPU_UNKNOWN_TYPE
 
 # GPU Resource - MIG Geometries
-# A100 Strategies
-MIG_1G_5GB = "nvidia.com/mig-1g.5gb"
-MIG_2G_10GB = "nvidia.com/mig-2g.10gb"
-MIG_3G_20GB = "nvidia.com/mig-3g.20gb"
-WHOLE_GPU = "nvidia.com/gpu"
+MIG_1G_5GB = constants.MIG_1G_5GB
+MIG_2G_10GB = constants.MIG_2G_10GB
+MIG_3G_20GB = constants.MIG_3G_20GB
+WHOLE_GPU = constants.WHOLE_GPU
 
 # VM GPU Resources
-VM_GPU_H100 = "nvidia.com/H100_SXM5_80GB"
-VM_GPU_A100_SXM4 = "nvidia.com/A100_SXM4_40GB"
-VM_GPU_V100 = "nvidia.com/GV100GL_Tesla_V100"
+VM_GPU_H100 = constants.VM_GPU_H100
+VM_GPU_A100_SXM4 = constants.VM_GPU_A100_SXM4
+VM_GPU_V100 = constants.VM_GPU_V100
 
 # SU Types
-SU_CPU = "OpenShift CPU"
-SU_A100_GPU = "OpenShift GPUA100"
-SU_A100_SXM4_GPU = "OpenShift GPUA100SXM4"
-SU_V100_GPU = "OpenShift GPUV100"
-SU_H100_GPU = "OpenShift GPUH100"
-SU_UNKNOWN_GPU = "OpenShift Unknown GPU"
-SU_UNKNOWN_MIG_GPU = "OpenShift Unknown MIG GPU"
-SU_UNKNOWN = "Openshift Unknown"
+SU_CPU = constants.SU_CPU
+SU_A100_GPU = constants.SU_A100_GPU
+SU_A100_SXM4_GPU = constants.SU_A100_SXM4_GPU
+SU_V100_GPU = constants.SU_V100_GPU
+SU_H100_GPU = constants.SU_H100_GPU
+SU_UNKNOWN_GPU = constants.SU_UNKNOWN_GPU
+SU_UNKNOWN_MIG_GPU = constants.SU_UNKNOWN_MIG_GPU
+SU_UNKNOWN = constants.SU_UNKNOWN
 
 ServiceUnit = namedtuple("ServiceUnit", ["su_type", "su_count", "determinig_resource"])
 
 
@@ -11,7 +11,7 @@
 from decimal import Decimal
 import nerc_rates
 
-from openshift_metrics import utils, invoice
+from openshift_metrics import utils, invoice, config, constants
 from openshift_metrics.metrics_processor import MetricsProcessor
 
 logging.basicConfig(level=logging.INFO)
@@ -56,13 +56,13 @@ def get_su_definitions(report_month) -> dict:
                 )
             )
     # Some internal SUs that I like to map to when there's insufficient data
-    su_definitions[invoice.SU_UNKNOWN_GPU] = {"GPUs": 1, "vCPUs": 8, "RAM": 64 * 1024}
-    su_definitions[invoice.SU_UNKNOWN_MIG_GPU] = {
+    su_definitions[constants.SU_UNKNOWN_GPU] = {"GPUs": 1, "vCPUs": 8, "RAM": 64 * 1024}
+    su_definitions[constants.SU_UNKNOWN_MIG_GPU] = {
         "GPUs": 1,
         "vCPUs": 8,
         "RAM": 64 * 1024,
     }
-    su_definitions[invoice.SU_UNKNOWN] = {"GPUs": 0, "vCPUs": 1, "RAM": 1024}
+    su_definitions[constants.SU_UNKNOWN] = {"GPUs": 0, "vCPUs": 1, "RAM": 1024}
     return su_definitions
 
 
@@ -73,16 +73,23 @@ def main():
     parser.add_argument(
         "--invoice-file",
         help="Name of the invoice file. Defaults to NERC OpenShift <report_month>.csv",
+        default=config.INVOICE_FILE,
     )
     parser.add_argument(
         "--pod-report-file",
         help="Name of the pod report file. Defaults to Pod NERC OpenShift <report_month>.csv",
+        default=config.POD_REPORT_FILE,
     )
     parser.add_argument(
         "--class-invoice-file",
         help="Name of the class report file. Defaults to NERC OpenShift Class <report_month>.csv",
+        default=config.CLASS_INVOICE_FILE,
+    )
+    parser.add_argument(
+        "--upload-to-s3", 
+        action="store_true",
+        default=config.UPLOAD_TO_S3,
     )
-    parser.add_argument("--upload-to-s3", action="store_true")
     parser.add_argument(
         "--ignore-hours",
         type=parse_timestamp_range,
@@ -93,12 +100,13 @@ def main():
         "--use-nerc-rates",
         action="store_true",
         help="Use rates from the nerc-rates repo",
+        default=config.USE_NERC_RATES,
     )
-    parser.add_argument("--rate-cpu-su", type=Decimal)
-    parser.add_argument("--rate-gpu-v100-su", type=Decimal)
-    parser.add_argument("--rate-gpu-a100sxm4-su", type=Decimal)
-    parser.add_argument("--rate-gpu-a100-su", type=Decimal)
-    parser.add_argument("--rate-gpu-h100-su", type=Decimal)
+    parser.add_argument("--rate-cpu-su", type=Decimal, default=config.RATE_CPU_SU)
+    parser.add_argument("--rate-gpu-v100-su", type=Decimal, default=config.RATE_GPU_V100_SU)
+    parser.add_argument("--rate-gpu-a100sxm4-su", type=Decimal, default=config.RATE_GPU_A100SXM4_SU)
+    parser.add_argument("--rate-gpu-a100-su", type=Decimal, default=config.RATE_GPU_A100_SU)
+    parser.add_argument("--rate-gpu-h100-su", type=Decimal, default=config.RATE_GPU_H100_SU)
 
     args = parser.parse_args()
     files = args.files
@@ -208,7 +216,7 @@ def main():
         rates=rates,
         su_definitions=su_definitions,
         cluster_name=cluster_name,
-        namespaces_with_classes=["rhods-notebooks"],
+        namespaces_with_classes=config.NAMESPACES_WITH_CLASSES,
         ignore_hours=ignore_hours,
     )
     utils.write_metrics_by_pod(
@@ -219,7 +227,7 @@ def main():
     )
 
     if args.upload_to_s3:
-        bucket_name = os.environ.get("S3_INVOICE_BUCKET", "nerc-invoicing")
+        bucket_name = config.S3_INVOICE_BUCKET
         primary_location = (
             f"Invoices/{report_month}/"
             f"Service Invoices/{cluster_name} {report_month}.csv"
 
@@ -3,10 +3,14 @@
 from collections import namedtuple
 import logging
 
+from openshift_metrics import config
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
-GPU_UNKNOWN_TYPE = "GPU_UNKNOWN_TYPE"
+# Import constants module
+from openshift_metrics import constants
+
 GPUInfo = namedtuple("GPUInfo", ["gpu_type", "gpu_resource", "node_model"])
 
 
@@ -15,13 +19,13 @@ class MetricsProcessor:
 
     def __init__(
         self,
-        interval_minutes: int = 15,
+        interval_minutes: int = None,
         merged_data: dict = None,
-        gpu_mapping_file: str = "gpu_node_map.json",
+        gpu_mapping_file: str = None,
     ):
-        self.interval_minutes = interval_minutes
+        self.interval_minutes = interval_minutes or config.INTERVAL_MINUTES
         self.merged_data = merged_data if merged_data is not None else {}
-        self.gpu_mapping = self._load_gpu_mapping(gpu_mapping_file)
+        self.gpu_mapping = self._load_gpu_mapping(gpu_mapping_file or config.GPU_MAPPING_FILE)
 
     def merge_metrics(self, metric_name, metric_list):
         """Merge metrics (cpu, memory, gpu) by pod"""
@@ -75,16 +79,16 @@ def _extract_gpu_info(self, metric_name: str, metric: Dict) -> GPUInfo:
 
         if metric_name == "gpu_request":
             gpu_type = metric["metric"].get(
-                "label_nvidia_com_gpu_product", GPU_UNKNOWN_TYPE
+                "label_nvidia_com_gpu_product", constants.GPU_UNKNOWN_TYPE
             )
             gpu_resource = metric["metric"].get("resource")
             node_model = metric["metric"].get("label_nvidia_com_gpu_machine")
 
             # Sometimes GPU labels from the nodes can be missing, in that case
             # we get the gpu_type from the gpu-node file
-            if gpu_type == GPU_UNKNOWN_TYPE:
+            if gpu_type == constants.GPU_UNKNOWN_TYPE:
                 node_name = metric["metric"].get("node")
-                gpu_type = self.gpu_mapping.get(node_name, GPU_UNKNOWN_TYPE)
+                gpu_type = self.gpu_mapping.get(node_name, constants.GPU_UNKNOWN_TYPE)
 
         return GPUInfo(gpu_type, gpu_resource, node_model)
 
 
@@ -20,24 +20,21 @@
 import json
 import logging
 
-from openshift_metrics import utils
+from openshift_metrics import utils, config
 from openshift_metrics.prometheus_client import PrometheusClient
 from openshift_metrics.metrics_processor import MetricsProcessor
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
-CPU_REQUEST = 'kube_pod_resource_request{resource="cpu", node!=""} unless on(pod, namespace) kube_pod_status_unschedulable'
-MEMORY_REQUEST = 'kube_pod_resource_request{resource="memory", node!=""} unless on(pod, namespace) kube_pod_status_unschedulable'
-GPU_REQUEST = 'kube_pod_resource_request{resource=~"nvidia.com.*", node!=""} unless on(pod, namespace) kube_pod_status_unschedulable'
-KUBE_NODE_LABELS = 'kube_node_labels{label_nvidia_com_gpu_product!=""}'
-KUBE_POD_LABELS = 'kube_pod_labels{label_nerc_mghpcc_org_class!=""}'
+# Use centralized configuration for Prometheus queries and cluster mappings
+CPU_REQUEST = config.PROMETHEUS_QUERIES["CPU_REQUEST"]
+MEMORY_REQUEST = config.PROMETHEUS_QUERIES["MEMORY_REQUEST"]
+GPU_REQUEST = config.PROMETHEUS_QUERIES["GPU_REQUEST"]
+KUBE_NODE_LABELS = config.PROMETHEUS_QUERIES["KUBE_NODE_LABELS"]
+KUBE_POD_LABELS = config.PROMETHEUS_QUERIES["KUBE_POD_LABELS"]
 
-URL_CLUSTER_NAME_MAPPING = {
-    "https://thanos-querier-openshift-monitoring.apps.shift.nerc.mghpcc.org": "ocp-prod",
-    "https://thanos-querier-openshift-monitoring.apps.ocp-test.nerc.mghpcc.org": "ocp-test",
-    "https://thanos-querier-openshift-monitoring.apps.edu.nerc.mghpcc.org": "academic",
-}
+URL_CLUSTER_NAME_MAPPING = config.CLUSTER_NAME_MAPPING
 
 
 def main():
@@ -47,20 +44,27 @@ def main():
     parser.add_argument(
         "--openshift-url",
         help="OpenShift Prometheus URL",
-        default=os.getenv("OPENSHIFT_PROMETHEUS_URL"),
+        default=config.OPENSHIFT_PROMETHEUS_URL,
     )
     parser.add_argument(
         "--report-start-date",
         help="report date (ex: 2022-03-14)",
-        default=(datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
+        default=config.REPORT_START_DATE,
     )
     parser.add_argument(
         "--report-end-date",
         help="report date (ex: 2022-03-14)",
-        default=(datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
+        default=config.REPORT_END_DATE,
+    )
+    parser.add_argument(
+        "--upload-to-s3", 
+        action="store_true",
+        default=config.UPLOAD_TO_S3,
+    )
+    parser.add_argument(
+        "--output-file",
+        default=config.OUTPUT_FILE,
     )
-    parser.add_argument("--upload-to-s3", action="store_true")
-    parser.add_argument("--output-file")
 
     args = parser.parse_args()
     if not args.openshift_url:
@@ -88,7 +92,7 @@ def main():
         f"Generating report starting {report_start_date} and ending {report_end_date} in {output_file}"
     )
 
-    token = os.environ.get("OPENSHIFT_TOKEN")
+    token = config.OPENSHIFT_TOKEN
     prom_client = PrometheusClient(openshift_url, token)
 
     metrics_dict = {}
@@ -151,7 +155,7 @@ def main():
         json.dump(metrics_dict, file)
 
     if args.upload_to_s3:
-        bucket_name = os.environ.get("S3_METRICS_BUCKET", "openshift_metrics")
+        bucket_name = config.S3_METRICS_BUCKET
         utils.upload_to_s3(output_file, bucket_name, s3_location)