baler-collaboration · sanam2405 · Nov 6, 2023 · Nov 6, 2023 · Nov 11, 2023 · Nov 11, 2023
diff --git a/baler/baler.py b/baler/baler.py
@@ -65,6 +65,7 @@ def main():
     elif mode == "train":
         check_enabled_profilers(
             perform_training,
+            output_path,
             pytorch_profile,
             energy_profile,
             output_path,
@@ -92,13 +93,14 @@ def main():
 
 
 def check_enabled_profilers(
-    f, pytorchProfile=False, energyProfile=False, *args, **kwargs
+    f, output_path="/", pytorchProfile=False, energyProfile=False, *args, **kwargs
 ):
     """
     Conditionally apply profiling based on the given boolean flags.
 
     Args:
         f (callable): The function to be potentially profiled.
+        output_path (str): The path where the profiling logs and reports are to be saved.
         pytorchProfile (bool): Whether to apply PyTorch profiling.
         energyProfile (bool): Whether to apply energy profiling.
 
@@ -108,10 +110,10 @@ def check_enabled_profilers(
     if pytorchProfile and not energyProfile:
         return pytorch_profile(f, *args, **kwargs)
     elif energyProfile and not pytorchProfile:
-        return energy_profiling(f, "baler_training", 1, *args, **kwargs)
+        return energy_profiling(f, output_path, "baler_training", 1, *args, **kwargs)
     elif pytorchProfile and energyProfile:
         return pytorch_profile(
-            energy_profiling, f, "baler_training", 1, *args, **kwargs
+            energy_profiling, f, output_path, "baler_training", 1, *args, **kwargs
         )
     else:
         return f(*args, **kwargs)

diff --git a/baler/modules/profile_plotting.py b/baler/modules/profile_plotting.py
@@ -0,0 +1,252 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.dates import date2num
+import seaborn as sns
+import os
+
+
+def plot(profiling_path, func):
+    """
+    Visualizes the data that is generated from .CSV logs
+    of codecarbon by plotting graphs .The codecarbon package
+    is utilized for tracking amount of electricity that is
+    consumed by the given function and the amount of CO2 emission.
+
+    Args:
+        profiling_path (str): The path where the .CSV logs are generated
+        func (callable): The function to be profiled.
+
+    Returns:
+        Void. The plots are stored in the `profiling_path` location
+    """
+
+    # Load CSV data into a DataFrame
+    emission_csv_path = os.path.join(profiling_path, "emissions.csv")
+    data = pd.read_csv(emission_csv_path)
+
+    # Define the scaling factor (adjust this value according to your needs)
+    scaling_factor = 10**6
+
+    # List of columns to scale up (you can modify this as per your requirement)
+    columns_to_scale = [
+        "duration",
+        "emissions",
+        "emissions_rate",
+        "cpu_power",
+        "gpu_power",
+        "ram_power",
+        "cpu_energy",
+        "gpu_energy",
+        "ram_energy",
+        "energy_consumed",
+    ]
+
+    # Scale up the values in the selected columns
+    data[columns_to_scale] *= scaling_factor
+
+    # Convert the 'timestamp' column to datetime
+    data["timestamp"] = pd.to_datetime(data["timestamp"], errors="coerce")
+
+    # Plot 1: Time series graph for 'timestamp' vs 'duration'
+    plt.figure(figsize=(10, 6))
+    plt.plot(data["timestamp"], data["duration"], marker="o")
+    plt.xlabel("Timestamp")
+    plt.ylabel("Duration (in seconds) x 1e-6")
+    plt.title("Time Series " + f"{func.__name__}" + " : Duration vs Timestamp")
+    plt.xticks(rotation=45)
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig(os.path.join(profiling_path, f"{func.__name__}" + "_" + "plot1.png"))
+    plt.close()
+    # plt.show()
+
+    # Plot 2 and 3: Time series graph for 'timestamp' vs 'emissions' and 'emissions_rate'
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10), sharex=True)
+
+    ax1.plot(data["timestamp"], data["emissions"], marker="o")
+    ax1.set_ylabel("Emissions in CO₂eq (in kg) x 1e-6")
+    ax1.set_title("Time Series " + f"{func.__name__}" + " : Emissions vs Timestamp")
+    ax1.grid(True)
+
+    ax2.plot(data["timestamp"], data["emissions_rate"], marker="o")
+    ax2.set_xlabel("Timestamp")
+    ax2.set_ylabel("Emissions Rate in CO₂eq in (kg/second) x 1e-6")
+    ax2.set_title(
+        "Time Series " + f"{func.__name__}" + " : Emissions Rate vs Timestamp"
+    )
+    ax2.grid(True)
+
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+    plt.savefig(os.path.join(profiling_path, f"{func.__name__}" + "_" + "plot2.png"))
+    plt.close()
+    # plt.show()
+
+    # Plot 4, 5, 6, and 7: Time series graph for 'timestamp' vs 'ram_power', 'cpu_energy', 'ram_energy', and 'energy_consumed'
+    fig, axs = plt.subplots(2, 2, figsize=(12, 10), sharex=True)
+
+    axs[0, 0].plot(data["timestamp"], data["ram_power"], marker="o")
+    axs[0, 0].set_ylabel("RAM Power (in watt)")
+    axs[0, 0].set_title(
+        "Time Series " + f"{func.__name__}" + " : RAM Power vs Timestamp"
+    )
+    axs[0, 0].grid(True)
+
+    axs[0, 1].plot(data["timestamp"], data["cpu_energy"], marker="o")
+    axs[0, 1].set_ylabel("CPU Energy in (kilo-watt) x 1e-6")
+    axs[0, 1].set_title(
+        "Time Series " + f"{func.__name__}" + " : CPU Energy vs Timestamp"
+    )
+    axs[0, 1].grid(True)
+
+    axs[1, 0].plot(data["timestamp"], data["ram_energy"], marker="o")
+    axs[1, 0].set_xlabel("Timestamp")
+    axs[1, 0].set_ylabel("RAM Energy (in kilo-watt) x 1e-6")
+    axs[1, 0].set_title(
+        "Time Series " + f"{func.__name__}" + " : RAM Energy vs Timestamp"
+    )
+    axs[1, 0].grid(True)
+
+    axs[1, 1].plot(data["timestamp"], data["energy_consumed"], marker="o")
+    axs[1, 1].set_xlabel("Timestamp")
+    axs[1, 1].set_ylabel("Energy Consumed (in kilo-watt) x 1e-6")
+    axs[1, 1].set_title(
+        "Time Series " + f"{func.__name__}" + " : Energy Consumed vs Timestamp"
+    )
+    axs[1, 1].grid(True)
+
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+    plt.savefig(os.path.join(profiling_path, f"{func.__name__}" + "_" + "plot3.png"))
+    plt.close()
+    # plt.show()
+
+    # Measure central tendencies of the data for each metric
+    emissions_central = data["emissions"].describe()
+    emissions_rate_central = data["emissions_rate"].describe()
+    ram_power_central = data["ram_power"].describe()
+    cpu_energy_central = data["cpu_energy"].describe()
+    ram_energy_central = data["ram_energy"].describe()
+    energy_consumed_central = data["energy_consumed"].describe()
+
+    # Plot histograms for each metric with central tendencies
+    plt.figure(figsize=(15, 10))
+
+    # Emissions
+    plt.subplot(2, 3, 1)
+    sns.histplot(data["emissions"], bins=20, kde=True, color="skyblue")
+    plt.axvline(emissions_central["mean"], color="red", linestyle="--", label="Mean")
+    plt.axvline(
+        emissions_central["50%"], color="orange", linestyle="--", label="Median (50%)"
+    )
+    plt.axvline(
+        emissions_central["25%"], color="green", linestyle="--", label="Q1 (25%)"
+    )
+    plt.axvline(
+        emissions_central["75%"], color="blue", linestyle="--", label="Q3 (75%)"
+    )
+    plt.xlabel("Emissions")
+    plt.title("Histogram " + f"{func.__name__}" + " : Emissions")
+    plt.legend()
+
+    # Emissions Rate
+    plt.subplot(2, 3, 2)
+    sns.histplot(data["emissions_rate"], bins=20, kde=True, color="salmon")
+    plt.axvline(
+        emissions_rate_central["mean"], color="red", linestyle="--", label="Mean"
+    )
+    plt.axvline(
+        emissions_rate_central["50%"],
+        color="orange",
+        linestyle="--",
+        label="Median (50%)",
+    )
+    plt.axvline(
+        emissions_rate_central["25%"], color="green", linestyle="--", label="Q1 (25%)"
+    )
+    plt.axvline(
+        emissions_rate_central["75%"], color="blue", linestyle="--", label="Q3 (75%)"
+    )
+    plt.xlabel("Emissions Rate")
+    plt.title("Histogram " + f"{func.__name__}" + " Emissions Rate")
+    plt.legend()
+
+    # RAM Power
+    plt.subplot(2, 3, 3)
+    sns.histplot(data["ram_power"], bins=20, kde=True, color="lightgreen")
+    plt.axvline(ram_power_central["mean"], color="red", linestyle="--", label="Mean")
+    plt.axvline(
+        ram_power_central["50%"], color="orange", linestyle="--", label="Median (50%)"
+    )
+    plt.axvline(
+        ram_power_central["25%"], color="green", linestyle="--", label="Q1 (25%)"
+    )
+    plt.axvline(
+        ram_power_central["75%"], color="blue", linestyle="--", label="Q3 (75%)"
+    )
+    plt.xlabel("RAM Power")
+    plt.title("Histogram " + f"{func.__name__}" + " : RAM Power")
+    plt.legend()
+
+    # CPU Energy
+    plt.subplot(2, 3, 4)
+    sns.histplot(data["cpu_energy"], bins=20, kde=True, color="lightcoral")
+    plt.axvline(cpu_energy_central["mean"], color="red", linestyle="--", label="Mean")
+    plt.axvline(
+        cpu_energy_central["50%"], color="orange", linestyle="--", label="Median (50%)"
+    )
+    plt.axvline(
+        cpu_energy_central["25%"], color="green", linestyle="--", label="Q1 (25%)"
+    )
+    plt.axvline(
+        cpu_energy_central["75%"], color="blue", linestyle="--", label="Q3 (75%)"
+    )
+    plt.xlabel("CPU Energy")
+    plt.title("Histogram " + f"{func.__name__}" + " : CPU Energy")
+    plt.legend()
+
+    # RAM Energy
+    plt.subplot(2, 3, 5)
+    sns.histplot(data["ram_energy"], bins=20, kde=True, color="lightblue")
+    plt.axvline(ram_energy_central["mean"], color="red", linestyle="--", label="Mean")
+    plt.axvline(
+        ram_energy_central["50%"], color="orange", linestyle="--", label="Median (50%)"
+    )
+    plt.axvline(
+        ram_energy_central["25%"], color="green", linestyle="--", label="Q1 (25%)"
+    )
+    plt.axvline(
+        ram_energy_central["75%"], color="blue", linestyle="--", label="Q3 (75%)"
+    )
+    plt.xlabel("RAM Energy")
+    plt.title("Histogram " + f"{func.__name__}" + " : RAM Energy")
+    plt.legend()
+
+    # Energy Consumed
+    plt.subplot(2, 3, 6)
+    sns.histplot(data["energy_consumed"], bins=20, kde=True, color="lightyellow")
+    plt.axvline(
+        energy_consumed_central["mean"], color="red", linestyle="--", label="Mean"
+    )
+    plt.axvline(
+        energy_consumed_central["50%"],
+        color="orange",
+        linestyle="--",
+        label="Median (50%)",
+    )
+    plt.axvline(
+        energy_consumed_central["25%"], color="green", linestyle="--", label="Q1 (25%)"
+    )
+    plt.axvline(
+        energy_consumed_central["75%"], color="blue", linestyle="--", label="Q3 (75%)"
+    )
+    plt.xlabel("Energy Consumed")
+    plt.title("Histogram " + f"{func.__name__}" + " : Energy Consumed")
+    plt.legend()
+
+    plt.tight_layout()
+    plt.savefig(os.path.join(profiling_path, f"{func.__name__}" + "_" + "plot4.png"))
+    plt.close()
+    # plt.show()
+
+    print(f"Your codecarbon profiling plots are available at {profiling_path}")
diff --git a/baler/modules/profiling.py b/baler/modules/profiling.py
@@ -1,10 +1,12 @@
 import io
+import os
 import pstats
 import cProfile
 from pstats import SortKey
 import torch
 from torch.profiler import profile, record_function, ProfilerActivity
 import codecarbon
+from ..modules import profile_plotting
 
 
 def pytorch_profile(f, *args, **kwargs):
@@ -54,37 +56,70 @@ def pytorch_profile(f, *args, **kwargs):
     return result
 
 
-def energy_profiling(f, project_name, measure_power_secs, *args, **kwargs):
+def energy_profiling(f, output_path, project_name, measure_power_secs, *args, **kwargs):
     """
     Energy Profiling measures the amount of electricity that
     was consumed by the given function f and the amount of CO2 emission.
     It utilizes the codecarbon package for tracking this information.
 
     Args:
         f (callable): The function to be profiled.
+        output_path (str): The path where the profiling logs and reports are to be saved.
         project_name (str): The name of the project.
         measure_power_secs (int): The number of seconds to measure power.
 
     Returns:
         result: The result of the function `f` execution.
+        profile_plotting.plot(profiling_path, f): Subsequently called to generate plots from the codecarbon log files.
     """
 
+    profiling_path = os.path.join(output_path, "profiling")
     tracker = codecarbon.EmissionsTracker(
-        project_name=project_name, measure_power_secs=measure_power_secs
+        project_name=project_name,
+        measure_power_secs=measure_power_secs,
+        save_to_file=True,
+        output_dir=profiling_path,
+        co2_signal_api_token="script-overwrite",
+        experiment_id="235b1da5-aaaa-aaaa-aaaa-893681599d2c",
+        log_level="DEBUG",
+        tracking_mode="process",
     )
     tracker.start_task(f"{f.__name__}")
 
     # Execute the function and get its result
     result = f(*args, **kwargs)
 
-    emissions = tracker.stop_task()
-    print("CO2 emission [kg]: ", emissions.emissions)
-    print("CO2 emission rate [kg/h]: ", 3600 * emissions.emissions_rate)
-    print("CPU energy consumed [kWh]: ", emissions.cpu_energy)
-    print("GPU energy consumed [kWh]: ", emissions.gpu_energy)
-    print("RAM energy consumed [kWh]: ", emissions.ram_energy)
+    tracker.stop_task()
+    emissions = tracker.stop()
 
-    return result
+    print(
+        "----------------------------------Energy Profile-----------------------------------------------"
+    )
+    print(
+        "-----------------------------------------------------------------------------------------------"
+    )
+    print(f"Emissions : {1000 * emissions} g CO₂")
+    for task_name, task in tracker._tasks.items():
+        print(
+            f"Emissions : {1000 * task.emissions_data.emissions} g CO₂ for task {task_name} \nEmission Rate : {3600*task.emissions_data.emissions_rate} Kg/h"
+        )
+        print(
+            "-----------------------------------------------------------------------------------------------"
+        )
+        print("Energy Consumption")
+        print(
+            f"CPU : {1000 * task.emissions_data.cpu_energy} Wh \nGPU : {1000 * task.emissions_data.gpu_energy} Wh \nRAM : {1000 * task.emissions_data.ram_energy} Wh"
+        )
+        print(
+            "-----------------------------------------------------------------------------------------------"
+        )
+        print("Power Consumption")
+        print(
+            f"CPU : { task.emissions_data.cpu_power} W \nGPU : { task.emissions_data.gpu_power} W \nRAM : { task.emissions_data.ram_power} W"
+            + f"\nduring {task.emissions_data.duration} seconds."
+        )
+
+    return result, profile_plotting.plot(profiling_path, f)
 
 
 def c_profile(func, *args, **kwargs):