-
Notifications
You must be signed in to change notification settings - Fork 6
Refactor merge.py and add tests for it #165
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
fbff4fd
73997cf
35028fa
29ef881
6d219b4
717ddb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,8 +6,9 @@ | |
| import logging | ||
| import argparse | ||
| from datetime import datetime, UTC | ||
| from dataclasses import dataclass | ||
| import json | ||
| from typing import Tuple | ||
| from typing import Tuple, List | ||
| from decimal import Decimal | ||
| from nerc_rates import rates, outages | ||
|
|
||
|
|
@@ -19,7 +20,15 @@ | |
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def compare_dates(date_str1, date_str2): | ||
| @dataclass | ||
| class MetricsMetadata: | ||
| cluster_name: str | ||
| report_start_date: str | None | ||
| report_end_date: str | None | ||
| interval_minutes: int | ||
|
|
||
|
|
||
| def compare_dates(date_str1, date_str2) -> bool: | ||
| """Returns true is date1 is earlier than date2""" | ||
| date1 = datetime.strptime(date_str1, "%Y-%m-%d") | ||
| date2 = datetime.strptime(date_str2, "%Y-%m-%d") | ||
|
|
@@ -67,21 +76,96 @@ def get_su_definitions(report_month) -> dict: | |
| return su_definitions | ||
|
|
||
|
|
||
| def load_and_merge_metrics(interval_minutes, files: List[str]) -> MetricsProcessor: | ||
| """Load and merge metrics | ||
|
|
||
| Loads metrics from provided json files and then returns a processor | ||
| that has all the merged data. | ||
| """ | ||
| processor = MetricsProcessor(interval_minutes) | ||
| for file in files: | ||
| with open(file, "r") as jsonfile: | ||
| metrics_from_file = json.load(jsonfile) | ||
| cpu_request_metrics = metrics_from_file["cpu_metrics"] | ||
| memory_request_metrics = metrics_from_file["memory_metrics"] | ||
| gpu_request_metrics = metrics_from_file.get("gpu_metrics", None) | ||
| processor.merge_metrics("cpu_request", cpu_request_metrics) | ||
| processor.merge_metrics("memory_request", memory_request_metrics) | ||
| if gpu_request_metrics is not None: | ||
| processor.merge_metrics("gpu_request", gpu_request_metrics) | ||
| return processor | ||
|
|
||
|
|
||
| def load_metadata(files: List[str]) -> MetricsMetadata: | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I could load data and metadata in a single loop instead of loading files twice. And for that I reason I don't like what I've done. I am going to refactor it again later. |
||
| """ | ||
| Load only the metadata from the metrics files. | ||
| """ | ||
| cluster_name = None | ||
| report_start_date = None | ||
| report_end_date = None | ||
| interval_minutes = None | ||
|
|
||
| for file in files: | ||
| with open(file, "r") as jsonfile: | ||
| metrics_from_file = json.load(jsonfile) | ||
| if cluster_name is None: | ||
| cluster_name = metrics_from_file.get("cluster_name") | ||
|
Comment on lines
+111
to
+112
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any concern that
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we checked that cluster_name could be different in different files, so this behavior is unchanged. But it doesn't hurt to add that additional check. I'll add that in a different PR. |
||
|
|
||
| if interval_minutes is None: | ||
| interval_minutes = metrics_from_file.get("interval_minutes") | ||
| else: | ||
| interval_minutes_from_file = metrics_from_file["interval_minutes"] | ||
| if interval_minutes != interval_minutes_from_file: | ||
| sys.exit( | ||
| f"Cannot process files with different intervals {interval_minutes} != {interval_minutes_from_file}" | ||
| ) | ||
|
|
||
| if report_start_date is None: | ||
| report_start_date = metrics_from_file["start_date"] | ||
| elif compare_dates(metrics_from_file["start_date"], report_start_date): | ||
| report_start_date = metrics_from_file["start_date"] | ||
|
|
||
| if report_end_date is None: | ||
| report_end_date = metrics_from_file["end_date"] | ||
| elif compare_dates(report_end_date, metrics_from_file["end_date"]): | ||
| report_end_date = metrics_from_file["end_date"] | ||
|
|
||
| if cluster_name is None: | ||
| cluster_name = "Unknown Cluster" | ||
|
|
||
| if interval_minutes is None: | ||
| logger.info( | ||
| f"No prometheus query interval minutes found in the given set of files. Using the provided interval: {PROM_QUERY_INTERVAL_MINUTES} minute(s)" | ||
| ) | ||
| interval_minutes = PROM_QUERY_INTERVAL_MINUTES | ||
| else: | ||
| logger.info( | ||
| f"Prometheus Query interval set to {interval_minutes} minute(s) from file" | ||
| ) | ||
|
|
||
| return MetricsMetadata( | ||
| cluster_name=cluster_name, | ||
| report_start_date=report_start_date, | ||
| report_end_date=report_end_date, | ||
| interval_minutes=interval_minutes, | ||
| ) | ||
|
|
||
|
|
||
| def main(): | ||
| """Reads the metrics from files and generates the reports""" | ||
| parser = argparse.ArgumentParser() | ||
| parser.add_argument("files", nargs="+") | ||
| parser.add_argument( | ||
| "--invoice-file", | ||
| help="Name of the invoice file. Defaults to NERC OpenShift <report_month>.csv", | ||
| help="Name of the invoice file. Defaults to <cluster-name> <report_month>.csv", | ||
| ) | ||
| parser.add_argument( | ||
| "--pod-report-file", | ||
| help="Name of the pod report file. Defaults to Pod NERC OpenShift <report_month>.csv", | ||
| help="Name of the pod report file. Defaults to Pod-<cluster-name> <report_month>.csv", | ||
| ) | ||
| parser.add_argument( | ||
| "--class-invoice-file", | ||
| help="Name of the class report file. Defaults to NERC OpenShift Class <report_month>.csv", | ||
| help="Name of the class report file. Defaults to Classes-<cluster-name> <report_month>.csv", | ||
| ) | ||
| parser.add_argument("--upload-to-s3", action="store_true") | ||
| parser.add_argument( | ||
|
|
@@ -104,60 +188,14 @@ def main(): | |
| args = parser.parse_args() | ||
| files = args.files | ||
|
|
||
| report_start_date = None | ||
| report_end_date = None | ||
| cluster_name = None | ||
| interval_minutes = None | ||
|
|
||
| for file in files: | ||
| with open(file, "r") as jsonfile: | ||
| metrics_from_file = json.load(jsonfile) | ||
| if interval_minutes is None: | ||
| interval_minutes = metrics_from_file.get("interval_minutes") | ||
| else: | ||
| interval_minutes_from_file = metrics_from_file["interval_minutes"] | ||
| if interval_minutes != interval_minutes_from_file: | ||
| sys.exit( | ||
| f"Cannot process files with different intervals {interval_minutes} != {interval_minutes_from_file}" | ||
| ) | ||
|
|
||
| if interval_minutes is None: | ||
| logger.info( | ||
| f"No prometheus query interval minutes found in the given set of files. Using the provided interval: {PROM_QUERY_INTERVAL_MINUTES} minute(s)" | ||
| ) | ||
| interval_minutes = PROM_QUERY_INTERVAL_MINUTES | ||
| else: | ||
| logger.info( | ||
| f"Prometheus Query interval set to {interval_minutes} minute(s) from file" | ||
| ) | ||
|
|
||
| processor = MetricsProcessor(interval_minutes) | ||
|
|
||
| for file in files: | ||
| with open(file, "r") as jsonfile: | ||
| metrics_from_file = json.load(jsonfile) | ||
| if cluster_name is None: | ||
| cluster_name = metrics_from_file.get("cluster_name") | ||
| cpu_request_metrics = metrics_from_file["cpu_metrics"] | ||
| memory_request_metrics = metrics_from_file["memory_metrics"] | ||
| gpu_request_metrics = metrics_from_file.get("gpu_metrics", None) | ||
| processor.merge_metrics("cpu_request", cpu_request_metrics) | ||
| processor.merge_metrics("memory_request", memory_request_metrics) | ||
| if gpu_request_metrics is not None: | ||
| processor.merge_metrics("gpu_request", gpu_request_metrics) | ||
|
|
||
| if report_start_date is None: | ||
| report_start_date = metrics_from_file["start_date"] | ||
| elif compare_dates(metrics_from_file["start_date"], report_start_date): | ||
| report_start_date = metrics_from_file["start_date"] | ||
| metrics_metadata = load_metadata(files) | ||
|
|
||
| if report_end_date is None: | ||
| report_end_date = metrics_from_file["end_date"] | ||
| elif compare_dates(report_end_date, metrics_from_file["end_date"]): | ||
| report_end_date = metrics_from_file["end_date"] | ||
| cluster_name = metrics_metadata.cluster_name | ||
| report_start_date = metrics_metadata.report_start_date | ||
| report_end_date = metrics_metadata.report_end_date | ||
| interval_minutes = metrics_metadata.interval_minutes | ||
|
|
||
| if cluster_name is None: | ||
| cluster_name = "Unknown Cluster" | ||
| processor = load_and_merge_metrics(interval_minutes, files) | ||
|
|
||
| logger.info( | ||
| f"Generating report from {report_start_date} to {report_end_date} for {cluster_name}" | ||
|
|
@@ -171,13 +209,13 @@ def main(): | |
| logger.info("Using nerc rates for rates and outages") | ||
| rates_data = rates.load_from_url() | ||
| invoice_rates = invoice.Rates( | ||
| cpu=rates_data.get_value_at("CPU SU Rate", report_month, Decimal), | ||
| gpu_a100=rates_data.get_value_at("GPUA100 SU Rate", report_month, Decimal), | ||
| gpu_a100sxm4=rates_data.get_value_at( | ||
| cpu=rates_data.get_value_at("CPU SU Rate", report_month, Decimal), # type: ignore | ||
| gpu_a100=rates_data.get_value_at("GPUA100 SU Rate", report_month, Decimal), # type: ignore | ||
| gpu_a100sxm4=rates_data.get_value_at( # type: ignore | ||
|
||
| "GPUA100SXM4 SU Rate", report_month, Decimal | ||
| ), | ||
| gpu_v100=rates_data.get_value_at("GPUV100 SU Rate", report_month, Decimal), | ||
| gpu_h100=rates_data.get_value_at("GPUH100 SU Rate", report_month, Decimal), | ||
| gpu_v100=rates_data.get_value_at("GPUV100 SU Rate", report_month, Decimal), # type: ignore | ||
| gpu_h100=rates_data.get_value_at("GPUH100 SU Rate", report_month, Decimal), # type: ignore | ||
| ) | ||
| outage_data = outages.load_from_url() | ||
| ignore_hours = outage_data.get_outages_during( | ||
|
|
@@ -197,20 +235,11 @@ def main(): | |
| for start_time, end_time in ignore_hours: | ||
| logger.info(f"{start_time} to {end_time} will be excluded from the invoice") | ||
|
|
||
| if args.invoice_file: | ||
| invoice_file = args.invoice_file | ||
| else: | ||
| invoice_file = f"NERC OpenShift {report_month}.csv" | ||
|
|
||
| if args.class_invoice_file: | ||
| class_invoice_file = args.class_invoice_file | ||
| else: | ||
| class_invoice_file = f"NERC OpenShift Classes {report_month}.csv" | ||
|
|
||
| if args.pod_report_file: | ||
| pod_report_file = args.pod_report_file | ||
| else: | ||
| pod_report_file = f"Pod NERC OpenShift {report_month}.csv" | ||
| invoice_file = args.invoice_file or f"{cluster_name} {report_month}.csv" | ||
| class_invoice_file = ( | ||
| args.class_invoice_file or f"Classes-{cluster_name} {report_month}.csv" | ||
| ) | ||
| pod_report_file = args.pod_report_file or f"Pod-{cluster_name} {report_month}.csv" | ||
|
|
||
| report_start_date = datetime.strptime(report_start_date, "%Y-%m-%d") | ||
| report_end_date = datetime.strptime(report_end_date, "%Y-%m-%d") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,143 @@ | ||
| import pytest | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def mock_metrics_file1(): | ||
| cpu_metrics = [ | ||
| { | ||
| "metric": { | ||
| "pod": "pod1", | ||
| "namespace": "namespace1", | ||
| "resource": "cpu", | ||
| }, | ||
| "values": [ | ||
| [0, 10], | ||
| [60, 15], | ||
| [120, 20], | ||
| ], | ||
| }, | ||
| { | ||
| "metric": { | ||
| "pod": "pod2", | ||
| "namespace": "namespace1", | ||
| "resource": "cpu", | ||
| }, | ||
| "values": [ | ||
| [0, 30], | ||
| [60, 35], | ||
| [120, 40], | ||
| ], | ||
| }, | ||
| ] | ||
| memory_metrics = [ | ||
| { | ||
| "metric": { | ||
| "pod": "pod1", | ||
| "namespace": "namespace1", | ||
| "resource": "memory", | ||
| }, | ||
| "values": [ | ||
| [0, 10], | ||
| [60, 15], | ||
| [120, 20], | ||
| ], | ||
| }, | ||
| { | ||
| "metric": { | ||
| "pod": "pod2", | ||
| "namespace": "namespace1", | ||
| "resource": "cpu", | ||
| }, | ||
| "values": [ | ||
| [0, 30], | ||
| [60, 35], | ||
| [120, 40], | ||
| ], | ||
| }, | ||
| ] | ||
| return { | ||
| "cluster_name": "ocp-prod", | ||
| "start_date": "2025-09-20", | ||
| "end_date": "2025-09-20", | ||
| "interval_minutes": "15", | ||
| "cpu_metrics": cpu_metrics, | ||
| "memory_metrics": memory_metrics, | ||
| } | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def mock_metrics_file2(): | ||
| cpu_metrics = [ | ||
| { | ||
| "metric": { | ||
| "pod": "pod1", | ||
| "namespace": "namespace1", | ||
| "resource": "cpu", | ||
| }, | ||
| "values": [ | ||
| [180, 10], | ||
| [240, 15], | ||
| [300, 20], | ||
| ], | ||
| }, | ||
| { | ||
| "metric": { | ||
| "pod": "pod2", | ||
| "namespace": "namespace1", | ||
| "resource": "cpu", | ||
| }, | ||
| "values": [ | ||
| [180, 30], | ||
| [240, 35], | ||
| [300, 40], | ||
| ], | ||
| }, | ||
| ] | ||
| memory_metrics = [ | ||
| { | ||
| "metric": { | ||
| "pod": "pod1", | ||
| "namespace": "namespace1", | ||
| "resource": "memory", | ||
| }, | ||
| "values": [ | ||
| [180, 10], | ||
| [240, 15], | ||
| [300, 20], | ||
| ], | ||
| }, | ||
| { | ||
| "metric": { | ||
| "pod": "pod2", | ||
| "namespace": "namespace1", | ||
| "resource": "cpu", | ||
| }, | ||
| "values": [ | ||
| [180, 30], | ||
| [240, 35], | ||
| [300, 40], | ||
| ], | ||
| }, | ||
| ] | ||
| return { | ||
| "cluster_name": "ocp-prod", | ||
| "start_date": "2025-09-21", | ||
| "end_date": "2025-09-21", | ||
| "cpu_metrics": cpu_metrics, | ||
| "memory_metrics": memory_metrics, | ||
| "interval_minutes": "15", | ||
| } | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def mock_metrics_file3(): | ||
| cpu_metrics = [] | ||
| memory_metrics = [] | ||
| return { | ||
| "cluster_name": "ocp-prod", | ||
| "start_date": "2025-09-21", | ||
| "end_date": "2025-09-21", | ||
| "interval_minutes": "3", # file1 and file2 have 15 minutes | ||
| "cpu_metrics": cpu_metrics, | ||
| "memory_metrics": memory_metrics, | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor suggestion, but I think this is more concise:
If
cpu_metricsandmemory_metricsis always present, is it fine to make the loop even simpler?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good suggestion
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
uh, while this is a good suggestion, due to the old clumsy naming of things it'll break stuff. See, the files put things in cpu_metrics but the processors call it cpu_request so our neat little loop won't work. I am going to leave this as is.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah sorry, the strings looked similar and I thought they were the same