CentML
diff --git a/‎.github/workflows/benchmarking.yaml‎
Lines changed: 115 additions & 0 deletions b/‎.github/workflows/benchmarking.yaml‎
Lines changed: 115 additions & 0 deletions
diff --git a/‎analyzer/habitat/analysis/arguments.py‎
Lines changed: 18 additions & 2 deletions b/‎analyzer/habitat/analysis/arguments.py‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎analyzer/habitat/analysis/mlp/mlp.py‎
Lines changed: 0 additions & 4 deletions b/‎analyzer/habitat/analysis/mlp/mlp.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎analyzer/habitat/analysis/operation.py‎
Lines changed: 5 additions & 5 deletions b/‎analyzer/habitat/analysis/operation.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎analyzer/habitat/analysis/predictor.py‎
Lines changed: 29 additions & 14 deletions b/‎analyzer/habitat/analysis/predictor.py‎
Lines changed: 29 additions & 14 deletions
diff --git a/‎experiments/benchmarker_helper_script.sh‎
Lines changed: 48 additions & 0 deletions b/‎experiments/benchmarker_helper_script.sh‎
Lines changed: 48 additions & 0 deletions
@@ -0,0 +1,115 @@
+name: whl-build-all
+
+on:
+  workflow_dispatch:
+
+env:
+  DEVICE_PAIRS: L4,A100;L4,T4;T4,A100;L4,V100;V100,A100;T4,V100
+
+jobs:
+  create-summary:
+    needs: [experiments-t4, experiments-l4, experiments-a100, experiments-v100]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Fetch repository
+        uses: actions/checkout@v4
+
+      - name: create directory to gather all csv files
+        run: |
+          mkdir all_results
+
+      - name: Download All Artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: all_results
+          merge-multiple: true
+
+      - name: check files
+        run: |
+          ls -R all_results
+
+      - name: generate end-to-end and per operation results
+        run: |
+          sudo apt install python3-pip -y
+          pip3 install pandas
+          python3 ./experiments/process_results.py --in-dir all_results --out-e2e out_e2e --out-ops out_ops
+
+      - name: upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: artifacts-combined
+          path: |
+            ./experiments/out_e2e/
+            ./experiments/out_ops/
+
+  experiments-t4:
+    runs-on: [self-hosted, dev, t4]
+    steps:
+      - name: Fetch repository
+        uses: actions/checkout@v4
+
+      - name: run experiments
+        run: |
+          ./experiments/benchmarker_helper_script.sh python3.10
+        env:
+          LOCAL_DEVICE: T4
+
+      - name: upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: artifacts-t4
+          path: ./experiments/results/
+
+  experiments-v100:
+    runs-on: [self-hosted, dev, v100]
+    steps:
+      - name: Fetch repository
+        uses: actions/checkout@v4
+
+      - name: run experiments
+        run: |
+          ./experiments/benchmarker_helper_script.sh python3.10
+        env:
+          LOCAL_DEVICE: V100
+
+      - name: upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: artifacts-v100
+          path: ./experiments/results/
+
+  experiments-l4:
+    runs-on: [self-hosted, dev, l4]
+    steps:
+      - name: Fetch repository
+        uses: actions/checkout@v4
+
+      - name: run experiments
+        run: |
+          ./experiments/benchmarker_helper_script.sh python3.10
+        env:
+          LOCAL_DEVICE: L4
+
+      - name: upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: artifacts-l4
+          path: ./experiments/results/
+
+  experiments-a100:
+    runs-on: [self-hosted, dev, a100]
+    steps:
+      - name: Fetch repository
+        uses: actions/checkout@v4
+
+      - name: run experiments
+        run: |
+          ./experiments/benchmarker_helper_script.sh python3.10
+        env:
+          LOCAL_DEVICE: A100
+
+      - name: upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: artifacts-a100
+          path: ./experiments/results/
@@ -4,10 +4,12 @@
 class Arguments:
     """
     Stores representations of an operation's arguments.
+    debug_args is used for benchmarking and reporting
     """
-    def __init__(self, args, kwargs):
+    def __init__(self, args, kwargs, debug_args):
         self.args = args
         self.kwargs = kwargs
+        self.debug_args = debug_args
         self.special = {}
 
     @classmethod
@@ -17,7 +19,8 @@ def from_raw_arguments(cls, args, kwargs):
             arg_name: _process_argument(arg_value)
             for arg_name, arg_value in kwargs.items()
         }
-        return cls(processed_args, processed_kwargs)
+        debug_args = tuple(map(_debug_process_argument,args))
+        return cls(processed_args, processed_kwargs, debug_args)
 
 
 def _process_argument(argument):
@@ -34,3 +37,16 @@ def _process_argument(argument):
         return argument.size()
     else:
         return argument
+
+def _debug_process_argument(argument):
+    """Similar to process argument, but used for reporting and debugging purposes"""
+    if isinstance(argument, tuple):
+        return tuple(map(_process_argument, argument))
+
+    if isinstance(argument, list):
+        return list(map(_process_argument, argument))
+
+    if isinstance(argument, torch.Tensor):
+        return argument.size(), argument.dtype
+    else:
+        return argument
@@ -148,10 +148,6 @@ def __init__(self, model_name, layers, layer_size, model_path=None):
         self.mu = None
         self.sigma = None
 
-        # create directory to save model
-        curr_dir = os.getcwd()
-        pathlib.Path(f"{curr_dir}/saved_models/{model_name}").mkdir(exist_ok=True)
-
         if model_path is not None:
             self.load_state(model_path)
 
 
@@ -1,4 +1,4 @@
-
+import warnings
 
 class Operation:
     """
@@ -79,10 +79,10 @@ def backward(self):
     def device(self):
         return self._device
 
-    def to_device(self, dest_device, predictor):
+    def to_device(self, dest_device, predictor, unscaled=False):
         if dest_device.name == self._device.name:
-            return self
-        return predictor.predict_operation(self, dest_device)
+            warnings.warn("Predicting to the same device")
+        return predictor.predict_operation(self, dest_device, unscaled)
 
 
 class PredictedOperation(Operation):
@@ -121,4 +121,4 @@ def device(self):
     def to_device(self, dest_device, predictor):
         raise RuntimeError(
             'Cannot make a prediction using a predicted operation.',
-        )
+        )
@@ -89,7 +89,7 @@ def __init__(
         )
 
 
-    def predict_operation(self, operation, dest_device):
+    def predict_operation(self, operation, dest_device, unscaled=False):
         if operation.name not in SPECIAL_OPERATIONS:
             return PredictedOperation(
                 operation,
@@ -100,15 +100,15 @@ def predict_operation(self, operation, dest_device):
             )
 
         if operation.name == 'conv2d':
-            return self._special_scale(operation, dest_device, self._conv2d_scale)
+            return self._special_scale(operation, dest_device, self._conv2d_scale, unscaled)
         elif operation.name == 'lstm':
-            return self._special_scale(operation, dest_device, self._lstm_scale)
+            return self._special_scale(operation, dest_device, self._lstm_scale, unscaled)
         elif operation.name in ['linear','__matmul__']:
-            return self._special_scale(operation, dest_device, self._linear_scale)
+            return self._special_scale(operation, dest_device, self._linear_scale, unscaled)
         elif operation.name == 'bmm':
-            return self._special_scale(operation, dest_device, self._bmm_scale)
+            return self._special_scale(operation, dest_device, self._bmm_scale, unscaled)
         elif operation.name == 'conv_transpose2d':
-            return self._special_scale(operation, dest_device, self._conv_transpose2d_scale)
+            return self._special_scale(operation, dest_device, self._conv_transpose2d_scale, unscaled)
 
         logger.warn('Unhandled special operation: %s', operation.name)
         return PredictedOperation(
@@ -139,8 +139,8 @@ def _wave_scale(self, run_time, dest_device):
             device=dest_device,
         )
 
-    def _special_scale(self, operation, dest_device, scaler):
-        predicted_ms = scaler(operation, dest_device)
+    def _special_scale(self, operation, dest_device, scaler, unscaled=False):
+        predicted_ms = scaler(operation, dest_device, unscaled)
 
         if predicted_ms < 0:
             logger.warn(
@@ -154,10 +154,10 @@ def _special_scale(self, operation, dest_device, scaler):
             operation,
             RunTimePurePrediction(predicted_ms, dest_device),
             None,
-            dest_device,
+            dest_device
         )
 
-    def _conv2d_scale(self, operation, dest_device):
+    def _conv2d_scale(self, operation, dest_device, unscaled=False):
         # 1. Merge arguments (give them all names)
         merged = name_all_arguments(
             CONV2D_PARAMS,
@@ -189,9 +189,12 @@ def _conv2d_scale(self, operation, dest_device):
         pred_dest = self.conv2d_pred.predict(arguments, dest_device.name)
         pred_orig = self.conv2d_pred.predict(arguments, operation.device.name)
 
+        if unscaled:
+            return pred_dest
+
         return operation.run_time_ms * pred_dest / pred_orig
 
-    def _conv_transpose2d_scale(self, operation, dest_device):
+    def _conv_transpose2d_scale(self, operation, dest_device, unscaled=False):
         # 1. Merge arguments (give them all names)
         merged = name_all_arguments(
             CONVTRANSPOSE2D_PARAMS,
@@ -223,9 +226,12 @@ def _conv_transpose2d_scale(self, operation, dest_device):
         pred_dest = self.conv_transpose2d_pred.predict(arguments, dest_device.name)
         pred_orig = self.conv_transpose2d_pred.predict(arguments, operation.device.name)
 
+        if unscaled:
+            return pred_dest
+
         return operation.run_time_ms * pred_dest / pred_orig
 
-    def _linear_scale(self, operation, dest_device):
+    def _linear_scale(self, operation, dest_device, unscaled=False):
         merged = name_all_arguments(
             LINEAR_PARAMS,
             operation.arguments.args,
@@ -259,9 +265,12 @@ def _linear_scale(self, operation, dest_device):
         pred_dest = self.linear_pred.predict(arguments, dest_device.name)
         pred_orig = self.linear_pred.predict(arguments, operation.device.name)
 
+        if unscaled:
+            return pred_dest
+
         return operation.run_time_ms * pred_dest / pred_orig
 
-    def _bmm_scale(self, operation, dest_device):
+    def _bmm_scale(self, operation, dest_device, unscaled=False):
         merged = name_all_arguments(
             BMM_PARAMS,
             operation.arguments.args,
@@ -279,9 +288,12 @@ def _bmm_scale(self, operation, dest_device):
         pred_dest = self.bmm_pred.predict(arguments, dest_device.name)
         pred_orig = self.bmm_pred.predict(arguments, operation.device.name)
 
+        if unscaled:
+            return pred_dest
+
         return operation.run_time_ms * pred_dest / pred_orig
 
-    def _lstm_scale(self, operation, dest_device):
+    def _lstm_scale(self, operation, dest_device, unscaled=False):
         # This is hacky, but unfortunately the only way to differentiate these
         # overloaded LSTM calls.
         has_batch_sizes = isinstance(operation.arguments.args[4], bool)
@@ -324,4 +336,7 @@ def _lstm_scale(self, operation, dest_device):
         pred_dest = self.lstm_pred.predict(arguments, dest_device.name)
         pred_orig = self.lstm_pred.predict(arguments, operation.device.name)
 
+        if unscaled:
+            return pred_dest
+
         return operation.run_time_ms * pred_dest / pred_orig
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+PYTHON_VERSION=$1
+VENV_NAME=venv_${PYTHON_VERSION}
+
+python3 -m virtualenv ${VENV_NAME} -p ${PYTHON_VERSION}
+ln -s /usr/bin/${PYTHON_VERSION}-config ${VENV_NAME}/bin/python3-config
+
+source $VENV_NAME/bin/activate
+
+rm -r cpp/build analyzer/habitat/*.so
+git submodule update --init --recursive
+git lfs pull
+
+pushd analyzer
+./install-dev.sh
+popd
+
+pushd experiments
+
+device_pairs_list=()
+IFS=';' read -ra input_devices <<< "${DEVICE_PAIRS}"
+
+for i in "${input_devices[@]}"; do
+    IFS=',' read -ra pair <<< "${i}"
+    orig=${pair[0]}
+    dest=${pair[1]}
+    device_pairs_list+=("${orig},${dest}" "${dest},${orig}")
+done
+
+for j in "${device_pairs_list[@]}"; do
+    IFS=',' read -ra pair <<< "${j}"
+    orig=${pair[0]}
+    dest=${pair[1]}
+    if [ ${orig} == ${LOCAL_DEVICE} ]; then
+        python model_eval_per_device.py ${orig} ${dest}
+    fi
+done
+
+popd
+
+pushd analyzer/habitat/data
+find -iname "model.pth" | xargs sha256sum
+
+popd 
+
+deactivate
+rm -r ${VENV_NAME}