From 11e13d95b3739055035b43cf697ad1a1cb6e2818 Mon Sep 17 00:00:00 2001
From: Carlos <carlos@carlossegarra.com>
Date: Thu, 23 May 2024 10:53:34 +0200
Subject: [PATCH] locality: make first-class citizen plot (#45)

* locality: make first-class citizen plot

* scheduler: back to very-network

* plots: overall improvements

* plot: commit plotting scripts after submission
---
 requirements.txt                              |   2 +-
 tasks/elastic/plot.py                         |   8 +-
 tasks/lammps/run.py                           |  10 +-
 tasks/makespan/locality.md                    |  24 +-
 tasks/makespan/plot.py                        | 294 ++++--
 tasks/makespan/run.py                         |  32 +-
 tasks/makespan/scheduler.py                   |  57 +-
 tasks/makespan/trace.py                       |   6 +-
 .../traces/trace_mpi-locality_100_8.csv       | 101 ++
 .../traces/trace_mpi-locality_10_8.csv        |  11 +
 .../traces/trace_mpi-locality_150_8.csv       | 151 +++
 .../traces/trace_mpi-locality_200_8.csv       | 201 ++++
 .../traces/trace_mpi-locality_25_8.csv        |  26 +
 .../traces/trace_mpi-locality_50_8.csv        |  51 +
 .../traces/trace_mpi-locality_75_8.csv        |  76 ++
 tasks/migration/oracle.py                     |   4 +-
 tasks/migration/run.py                        |   8 +-
 tasks/motivation/ideal.py                     |   4 +-
 tasks/motivation/plot.py                      |  12 +-
 tasks/util/elastic.py                         |   6 +-
 tasks/util/lammps.py                          |  36 +-
 tasks/util/locality.py                        | 743 ++++++++++++++
 tasks/util/makespan.py                        | 967 +-----------------
 tasks/util/planner.py                         |  46 +-
 tasks/util/plot.py                            |  31 +-
 tasks/util/spot.py                            |   8 +-
 26 files changed, 1801 insertions(+), 1114 deletions(-)
 create mode 100644 tasks/makespan/traces/trace_mpi-locality_100_8.csv
 create mode 100644 tasks/makespan/traces/trace_mpi-locality_10_8.csv
 create mode 100644 tasks/makespan/traces/trace_mpi-locality_150_8.csv
 create mode 100644 tasks/makespan/traces/trace_mpi-locality_200_8.csv
 create mode 100644 tasks/makespan/traces/trace_mpi-locality_25_8.csv
 create mode 100644 tasks/makespan/traces/trace_mpi-locality_50_8.csv
 create mode 100644 tasks/makespan/traces/trace_mpi-locality_75_8.csv
 create mode 100644 tasks/util/locality.py

diff --git a/requirements.txt b/requirements.txt
index bdcfc14..b7b1a35 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 ansible==7.0.0
 black==22.3.0
-faasmctl>=0.43.0
+faasmctl>=0.44.0
 flake8==3.9.2
 hoststats==0.1.1
 invoke>=2.1.0
diff --git a/tasks/elastic/plot.py b/tasks/elastic/plot.py
index 8aae71e..5bfaf38 100644
--- a/tasks/elastic/plot.py
+++ b/tasks/elastic/plot.py
@@ -5,7 +5,11 @@
 from os.path import join
 from pandas import read_csv
 from tasks.util.elastic import ELASTIC_PLOTS_DIR, ELASTIC_RESULTS_DIR
-from tasks.util.plot import SINGLE_COL_FIGSIZE, save_plot
+from tasks.util.plot import (
+    SINGLE_COL_FIGSIZE,
+    get_color_for_baseline,
+    save_plot,
+)
 
 
 def _read_results():
@@ -37,7 +41,6 @@ def plot(ctx):
     Plot the slowdown of OpenMP's ParRes kernels
     """
     results = _read_results()
-    print(results)
     makedirs(ELASTIC_PLOTS_DIR, exist_ok=True)
     fig, ax = subplots(figsize=SINGLE_COL_FIGSIZE)
 
@@ -56,6 +59,7 @@ def plot(ctx):
     ax.bar(
         xs,
         ys,
+        color=get_color_for_baseline("omp-elastic", "granny"),
         edgecolor="black",
     )
 
diff --git a/tasks/lammps/run.py b/tasks/lammps/run.py
index 69036bb..c78f838 100644
--- a/tasks/lammps/run.py
+++ b/tasks/lammps/run.py
@@ -15,7 +15,7 @@
     LAMMPS_RESULTS_DIR,
     LAMMPS_SIM_WORKLOAD,
     LAMMPS_SIM_WORKLOAD_CONFIGS,
-    get_faasm_benchmark,
+    get_lammps_data_file,
     get_lammps_migration_params,
 )
 from tasks.util.openmpi import (
@@ -50,7 +50,6 @@ def wasm(ctx, w, repeats=1):
     """
     num_vms = len(get_faasm_worker_ips())
     assert num_vms == 2, "Expected 2 VMs got: {}!".format(num_vms)
-    data_file = basename(get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0])
 
     for workload in w:
         if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS:
@@ -60,6 +59,9 @@ def wasm(ctx, w, repeats=1):
                 )
             )
         workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
+        data_file = basename(
+            get_lammps_data_file(workload_config["data-file"])["data"][0]
+        )
 
         csv_name = "lammps_granny_{}.csv".format(workload)
         _init_csv_file(csv_name)
@@ -100,7 +102,6 @@ def native(ctx, w, repeats=1):
     """
     num_cpus_per_vm = 8
     num_vms = 2
-    data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]
 
     for workload in w:
         if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS:
@@ -110,6 +111,9 @@ def native(ctx, w, repeats=1):
                 )
             )
         workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
+        data_file = get_lammps_data_file(workload_config["data-file"])["data"][
+            0
+        ]
 
         pod_names, pod_ips = get_native_mpi_pods("lammps")
         assert (
diff --git a/tasks/makespan/locality.md b/tasks/makespan/locality.md
index a70c2e7..5c93b0e 100644
--- a/tasks/makespan/locality.md
+++ b/tasks/makespan/locality.md
@@ -25,25 +25,28 @@ inv cluster.provision --vm Standard_D8_v5 --nodes ${NUM_VMS} + 1
 inv cluster.credentials
 ```
 
-## Native (OpenMPI)
+## Native (Native Batch Schedulers with Granny's MPI implementation)
 
-First, deploy the native `k8s` cluster:
+For this experiment, our native baselines also run with Granny's MPI
+implementation. This is to make the comparison of improvement of performance
+through better locality fair:
 
 ```bash
-inv makespan.native.deploy --num-vms ${NUM_VMS}
+faasmctl deploy.k8s --workers=${NUM_VMS}
+inv makespan.wasm.upload
 ```
 
-Now, you can run the different OpenMPI baselines:
+Now, you can run the different baselines:
 
 ```bash
-inv makespan.run.native-batch --workload mpi-migrate --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
-inv makespan.run.native-slurm --workload mpi-migrate --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
+inv makespan.run.native-batch --workload mpi-locality --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
+inv makespan.run.native-slurm --workload mpi-locality --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
 ```
 
 Once you are done, you may remove the native OpenMPI cluster:
 
 ```bash
-inv makespan.native.delete
+faasmctl delete
 ```
 
 ## Granny
@@ -63,11 +66,8 @@ inv makespan.wasm.upload
 Third, run the experiment:
 
 ```bash
-# Granny with migration disabled as another baseline
-inv makespan.run.granny --workload mpi-migrate --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
-
-# Granny with migration enabled (aka Granny)
-inv makespan.run.granny --workload mpi-migrate --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} --migrate
+# Granny with migration enabled
+inv makespan.run.granny --workload mpi-locality --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} --migrate
 ```
 
 During an experiment, you may monitor the state of the cluster (in a separete
diff --git a/tasks/makespan/plot.py b/tasks/makespan/plot.py
index f38df94..bc57e03 100644
--- a/tasks/makespan/plot.py
+++ b/tasks/makespan/plot.py
@@ -12,12 +12,14 @@
 
 # TODO: consider moving some of the migration to a different file (e.g.
 # tasks.util.locality)
-from tasks.util.makespan import (
-    MAKESPAN_PLOTS_DIR,
-    do_makespan_plot,
-    read_makespan_results,
+from tasks.util.makespan import MAKESPAN_PLOTS_DIR
+from tasks.util.locality import (
+    plot_locality_results,
+    read_locality_results,
 )
 from tasks.util.plot import (
+    DOUBLE_COL_FIGSIZE_HALF,
+    DOUBLE_COL_FIGSIZE_THIRD,
     get_color_for_baseline,
     get_label_for_baseline,
     save_plot,
@@ -28,47 +30,101 @@
 )
 
 
+# TODO: delete me if miracle happens
 @task
 def migration(ctx):
     """
     Macrobenchmark plot showing the benefits of migrating MPI applications to
-    improve locality of execution
+    improve locality of execution. We show:
+    - LHS: both number of cross-VM links and number of idle cpu cores per exec
+    - RHS: timeseries of one of the points in the plot
     """
-    # num_vms = [16, 24, 32, 48, 64]
-    # num_tasks = [50, 75, 100, 150, 200]
-    num_vms = [16]
-    exec_cdf_num_vms = 16
-    num_tasks = [50]
+    num_vms = [8, 16, 24, 32]
+    num_tasks = [50, 100, 150, 200]
     num_cpus_per_vm = 8
 
-    # Read results from files
+    # RHS: zoom in one of the bars
+    timeseries_num_vms = num_vms[-1]
+
     results = {}
     for (n_vms, n_tasks) in zip(num_vms, num_tasks):
-        results[n_vms] = read_makespan_results(n_vms, n_tasks, num_cpus_per_vm)
+        results[n_vms] = read_locality_results(
+            n_vms, n_tasks, num_cpus_per_vm, migrate=True
+        )
 
-    fig, (ax1, ax2) = subplots(nrows=1, ncols=2)  # , figsize=(6, 3))
-    fig.subplots_adjust(wspace=0.35)
+    # ----------
+    # Plot 1: aggregate idle vCPUs
+    # ----------
+
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results(
+        "percentage_vcpus",
+        results,
+        ax,
+        num_vms=num_vms,
+        num_tasks=num_tasks,
+        migrate=True,
+    )
+
+    # Manually craft the legend
+    baselines = ["slurm", "batch", "granny", "granny-migrate"]
+    legend_entries = [
+        Patch(
+            color=get_color_for_baseline("mpi-migrate", baseline),
+            label=get_label_for_baseline("mpi-migrate", baseline),
+        )
+        for baseline in baselines
+    ]
+    fig.legend(
+        handles=legend_entries,
+        loc="upper center",
+        ncols=2,
+        bbox_to_anchor=(0.535, 0.3),
+    )
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_migrate_vcpus")
 
     # ----------
-    # Plot 1: CDF of Job-Completion-Time
+    # Plot 1: aggregate xVM links
     # ----------
 
-    # do_plot("exec_vs_tiq", results, ax1, num_vms, num_tasks)
-    do_makespan_plot("exec_cdf", results, ax1, exec_cdf_num_vms, num_tasks)
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results(
+        "percentage_xvm",
+        results,
+        ax,
+        num_vms=num_vms,
+        num_tasks=num_tasks,
+        migrate=True,
+    )
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_migrate_xvm")
 
     # ----------
-    # Plot 2: Job Churn
+    # Plot 3: timeseries of vCPUs
     # ----------
 
-    # WARNING: the "makespan" plot is the only one that reads num_vms as
-    # an array
-    do_makespan_plot("makespan", results, ax2, num_vms, num_tasks)
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results(
+        "ts_vcpus", results, ax, num_vms=timeseries_num_vms, migrate=True
+    )
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_migrate_ts_vcpus")
 
     # ----------
-    # Save figure
+    # Plot 4: timeseries of xVM links
     # ----------
 
-    save_plot(fig, MAKESPAN_PLOTS_DIR, "mpi_migration")
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results(
+        "ts_xvm_links", results, ax, num_vms=timeseries_num_vms, migrate=True
+    )
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_migrate_ts_xvm")
 
 
 @task
@@ -79,59 +135,113 @@ def locality(ctx):
     - LHS: both number of cross-VM links and number of idle cpu cores per exec
     - RHS: timeseries of one of the points in the plot
     """
+    # num_vms = [8, 16, 24, 32]
+    # num_tasks = [50, 100, 150, 200]
+    # num_vms = [4, 8]
+    # num_tasks = [10, 50]
+    # num_vms = [8]
+    # num_tasks = [50]
     num_vms = [8, 16, 24, 32]
-    num_tasks = [50, 100, 150, 200]
+    num_tasks = [25, 50, 75, 100]
     num_cpus_per_vm = 8
 
     # RHS: zoom in one of the bars
-    timeseries_num_vms = 32
-    timeseries_num_tasks = 200
+    timeseries_num_vms = num_vms[-1]
+    timeseries_num_tasks = num_tasks[-1]
 
-    # WARN: this assumes that we never repeat num_vms with different numbers of
-    # num_tasks (fair at this point)
     results = {}
     for (n_vms, n_tasks) in zip(num_vms, num_tasks):
-        results[n_vms] = read_makespan_results(n_vms, n_tasks, num_cpus_per_vm)
+        results[n_vms] = read_locality_results(n_vms, n_tasks, num_cpus_per_vm)
+
+    # ----------
+    # Plot 1: makespan bar plot
+    # ----------
+
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results(
+        "makespan", results, ax, num_vms=num_vms, num_tasks=num_tasks
+    )
 
-    fig, (ax1, ax2, ax3, ax4) = subplots(nrows=1, ncols=4, figsize=(12, 3))
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_makespan")
 
     # ----------
-    # Plot 1: boxplot of idle vCPUs and num xVM links for various cluster sizes
+    # Plot 2: Aggregate vCPUs metric
     # ----------
 
-    do_makespan_plot("percentage_vcpus", results, ax1, num_vms, num_tasks)
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
 
-    do_makespan_plot("percentage_xvm", results, ax2, num_vms, num_tasks)
+    plot_locality_results(
+        "percentage_vcpus", results, ax, num_vms=num_vms, num_tasks=num_tasks
+    )
 
     # ----------
-    # Plot 2: (two) timeseries of one of the cluster sizes
+    # Plot 3: Aggregate xVM metric
     # ----------
 
-    do_makespan_plot(
-        "ts_vcpus", results, ax3, timeseries_num_vms, timeseries_num_tasks
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results(
+        "percentage_xvm", results, ax, num_vms=num_vms, num_tasks=num_tasks
     )
 
-    do_makespan_plot(
-        "ts_xvm_links", results, ax4, timeseries_num_vms, timeseries_num_tasks
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_xvm")
+
+    # ----------
+    # Plot 4: execution time CDF
+    # ----------
+
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results(
+        "cdf_jct",
+        results,
+        ax,
+        cdf_num_vms=timeseries_num_vms,
+        cdf_num_tasks=timeseries_num_tasks,
     )
 
     # Manually craft the legend
-    baselines = ["slurm", "batch", "granny", "granny-migrate"]
+    baselines = ["granny-batch", "granny", "granny-migrate"]
     legend_entries = [
         Patch(
-            color=get_color_for_baseline("mpi-migrate", baseline),
-            label=get_label_for_baseline("mpi-migrate", baseline),
+            color=get_color_for_baseline("mpi-locality", baseline),
+            label=get_label_for_baseline("mpi-locality", baseline),
         )
         for baseline in baselines
     ]
     fig.legend(
         handles=legend_entries,
-        loc="upper center",
-        ncols=len(baselines),
-        bbox_to_anchor=(0.52, 1.07),
+        loc="lower center",
+        ncols=2,
+        bbox_to_anchor=(0.65, 0.17),
     )
 
-    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality")
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_vcpus")
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_cdf_jct")
+
+    # ----------
+    # Plot 5: time-series of idle vCPUs
+    # ----------
+
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results("ts_vcpus", results, ax, num_vms=timeseries_num_vms)
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_ts_vcpus")
+
+    # ----------
+    # Plot 5: time-series of cross-VM links
+    # ----------
+
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
+    plot_locality_results(
+        "ts_xvm_links", results, ax, num_vms=timeseries_num_vms
+    )
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_ts_xvm")
 
 
 @task
@@ -206,7 +316,7 @@ def spot(ctx):
     for (n_vms, n_tasks) in zip(num_vms, num_tasks):
         results[n_vms] = read_spot_results(n_vms, n_tasks, num_cpus_per_vm)
 
-    fig, (ax1, ax2) = subplots(nrows=1, ncols=2, figsize=(6, 3))
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_HALF)
 
     # ----------
     # Plot 1: makespan slowdown (spot / no spot)
@@ -215,25 +325,13 @@ def spot(ctx):
     plot_spot_results(
         "makespan",
         results,
-        ax1,
-        num_vms=num_vms,
-        num_tasks=num_tasks,
-    )
-
-    # ----------
-    # Plot 2: stacked cost bar plot (spot) + real cost (no spot)
-    # ----------
-
-    plot_spot_results(
-        "cost",
-        results,
-        ax2,
+        ax,
         num_vms=num_vms,
         num_tasks=num_tasks,
     )
 
     # Manually craft the legend
-    baselines = ["slurm", "batch", "granny-elastic"]
+    baselines = ["slurm", "batch", "granny"]
     legend_entries = [
         Patch(
             color=get_color_for_baseline("mpi-spot", baseline),
@@ -248,7 +346,23 @@ def spot(ctx):
         bbox_to_anchor=(0.52, 1.07),
     )
 
-    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_spot")
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_spot_makespan")
+
+    # ----------
+    # Plot 2: stacked cost bar plot (spot) + real cost (no spot)
+    # ----------
+
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_HALF)
+
+    plot_spot_results(
+        "cost",
+        results,
+        ax,
+        num_vms=num_vms,
+        num_tasks=num_tasks,
+    )
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_spot_cost")
 
 
 @task
@@ -278,71 +392,83 @@ def elastic(ctx):
     for (n_vms, n_tasks) in zip(num_vms, num_tasks):
         results[n_vms] = read_elastic_results(n_vms, n_tasks, num_cpus_per_vm)
 
-    fig, (ax1, ax2, ax3, ax4) = subplots(nrows=1, ncols=4, figsize=(12, 3))
-
     # ----------
     # Plot 1: makespan
     # ----------
 
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
     plot_elastic_results(
         "makespan",
         results,
-        ax1,
+        ax,
         num_vms=num_vms,
         num_tasks=num_tasks,
     )
 
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic_makespan")
+
     # ----------
     # Plot 2: percentage of idle vCPUs
     # ----------
 
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
     plot_elastic_results(
         "percentage_vcpus",
         results,
-        ax2,
+        ax,
         num_vms=num_vms,
         num_tasks=num_tasks,
         num_cpus_per_vm=num_cpus_per_vm,
     )
 
+    # Manually craft the legend
+    baselines = ["slurm", "batch", "granny", "granny-elastic"]
+    legend_entries = [
+        Patch(
+            color=get_color_for_baseline("omp-elastic", baseline),
+            label=get_label_for_baseline("omp-elastic", baseline),
+        )
+        for baseline in baselines
+    ]
+    fig.legend(
+        handles=legend_entries,
+        loc="lower center",
+        ncols=2,
+        bbox_to_anchor=(0.56, 0.2),
+    )
+
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic_vcpus")
+
     # ----------
     # Plot 3: CDF of the JCT (for one run)
     # ----------
 
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
     plot_elastic_results(
         "cdf_jct",
         results,
-        ax3,
+        ax,
         cdf_num_vms=cdf_num_vms,
         cdf_num_tasks=cdf_num_tasks,
     )
 
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic_cdf_jct")
+
     # ----------
     # Plot 4: timeseries of % of idle CPU cores
     # ----------
 
+    fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD)
+
     plot_elastic_results(
         "ts_vcpus",
         results,
-        ax4,
+        ax,
         timeseries_num_vms=timeseries_num_vms,
         timeseries_num_tasks=timeseries_num_tasks,
     )
 
-    # Manually craft the legend
-    baselines = ["slurm", "batch", "granny", "granny-elastic"]
-    legend_entries = [
-        Patch(
-            color=get_color_for_baseline("omp-elastic", baseline),
-            label=get_label_for_baseline("omp-elastic", baseline),
-        )
-        for baseline in baselines
-    ]
-    fig.legend(
-        handles=legend_entries,
-        loc="upper center",
-        ncols=len(baselines),
-        bbox_to_anchor=(0.52, 1.07),
-    )
-
-    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic")
+    save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic_ts_vcpus")
diff --git a/tasks/makespan/run.py b/tasks/makespan/run.py
index d4d91fb..79bd49c 100644
--- a/tasks/makespan/run.py
+++ b/tasks/makespan/run.py
@@ -31,9 +31,8 @@
 getLogger("urllib3").setLevel(log_level_WARNING)
 
 
-def _get_workload_from_cmdline(workload):
-    # TODO: rename mpi-migrate to something like mpi-locality
-    all_workloads = ["mpi-evict", "mpi-migrate", "mpi-spot", "omp-elastic"]
+def _validate_workload(workload):
+    all_workloads = ["mpi-evict", "mpi-locality", "mpi-spot", "omp-elastic"]
 
     if workload not in all_workloads:
         raise RuntimeError(
@@ -48,7 +47,7 @@ def _get_workload_from_cmdline(workload):
 @task()
 def granny(
     ctx,
-    workload="mpi-migrate",
+    workload,
     num_vms=32,
     num_cpus_per_vm=8,
     num_tasks=100,
@@ -68,7 +67,7 @@ def granny(
     baseline = "granny"
     if migrate:
         assert (
-            workload == "mpi-migrate"
+            workload == "mpi-locality"
         ), "--migrate flag should only be used with mpi-migrate workload!"
         baseline = "granny-migrate"
     if fault:
@@ -81,8 +80,12 @@ def granny(
             workload == "omp-elastic"
         ), "--fault flag should only be used with omp-elastic workload!"
         baseline = "granny-elastic"
+    if workload == "mpi-locality":
+        assert (
+            migrate
+        ), "mpi-locality for granny can only be run with --migrate!"
 
-    workload = _get_workload_from_cmdline(workload)
+    workload = _validate_workload(workload)
     trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
     _do_run(baseline, num_vms, trace, num_users)
 
@@ -90,7 +93,7 @@ def granny(
 @task()
 def native_slurm(
     ctx,
-    workload="mpi-migrate",
+    workload,
     num_vms=32,
     num_cpus_per_vm=8,
     num_tasks=100,
@@ -105,7 +108,11 @@ def native_slurm(
     if fault:
         baseline = "slurm-ft"
 
-    workload = _get_workload_from_cmdline(workload)
+    # For MPI locality, native-slurm is equivalent to granny-no-migrate
+    if workload == "mpi-locality":
+        baseline = "granny"
+
+    workload = _validate_workload(workload)
     trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
     _do_run(
         baseline,
@@ -118,7 +125,7 @@ def native_slurm(
 @task()
 def native_batch(
     ctx,
-    workload="mpi-migrate",
+    workload,
     num_vms=32,
     num_cpus_per_vm=8,
     num_tasks=100,
@@ -133,7 +140,12 @@ def native_batch(
     if fault:
         baseline = "batch-ft"
 
-    workload = _get_workload_from_cmdline(workload)
+    # For MPI locality, native-batch is equivalent to granny allocating
+    # resources to jobs at VM granularity
+    if workload == "mpi-locality":
+        baseline = "granny-batch"
+
+    workload = _validate_workload(workload)
     trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
     _do_run(
         baseline,
diff --git a/tasks/makespan/scheduler.py b/tasks/makespan/scheduler.py
index 299bc58..16824a3 100644
--- a/tasks/makespan/scheduler.py
+++ b/tasks/makespan/scheduler.py
@@ -46,14 +46,15 @@
     LAMMPS_MIGRATION_NET_DOCKER_BINARY,
     LAMMPS_MIGRATION_NET_DOCKER_DIR,
     LAMMPS_SIM_NUM_ITERATIONS,
-    LAMMPS_SIM_WORKLOAD,
-    get_faasm_benchmark,
+    get_lammps_data_file,
     get_lammps_migration_params,
+    get_lammps_workload,
 )
 from tasks.util.makespan import (
     ALLOWED_BASELINES,
     EXEC_TASK_INFO_FILE_PREFIX,
     GRANNY_BASELINES,
+    GRANNY_BATCH_BASELINES,
     GRANNY_ELASTIC_BASELINES,
     GRANNY_FT_BASELINES,
     GRANNY_MIGRATE_BASELINES,
@@ -271,7 +272,18 @@ def thread_print(msg):
 
         # Choose the right data file if running a LAMMPS simulation
         if work_item.task.app in MPI_WORKLOADS:
-            data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]
+            if work_item.task.app == "mpi-locality":
+                lammps_workload = "very-network"
+            else:
+                lammps_workload = "compute"
+
+            workload_config = get_lammps_workload(lammps_workload)
+            assert (
+                "data_file" in workload_config
+            ), "Workload config has no data file!"
+            data_file = get_lammps_data_file(workload_config["data_file"])[
+                "data"
+            ][0]
 
         # Record the start timestamp
         start_ts = 0
@@ -293,7 +305,12 @@ def thread_print(msg):
 
                 mpirun_cmd = [
                     "mpirun",
-                    get_lammps_migration_params(native=True),
+                    get_lammps_migration_params(
+                        num_loops=workload_config["num_iterations"],
+                        num_net_loops=workload_config["num_net_loops"],
+                        chunk_size=workload_config["chunk_size"],
+                        native=True,
+                    ),
                     "-np {}".format(world_size),
                     # To improve OpenMPI performance, we tell it exactly where
                     # to run each rank. According to the MPI manual, to specify
@@ -373,7 +390,10 @@ def thread_print(msg):
                         else LAMMPS_SIM_NUM_ITERATIONS
                     )
                     msg["input_data"] = get_lammps_migration_params(
-                        check_every=check_every
+                        check_every=check_every,
+                        num_loops=workload_config["num_iterations"],
+                        num_net_loops=workload_config["num_net_loops"],
+                        chunk_size=workload_config["chunk_size"],
                     )
             elif work_item.task.app in OPENMP_WORKLOADS:
                 if work_item.task.size > num_cpus_per_vm:
@@ -916,6 +936,33 @@ def have_enough_slots_for_task(self, task: TaskObject):
                     >= task.size
                 )
 
+            if (
+                self.state.workload == "mpi-locality"
+                and self.state.baseline in GRANNY_MIGRATE_BASELINES
+            ):
+                return (
+                    get_num_available_slots_from_in_flight_apps(
+                        self.state.num_vms,
+                        self.state.num_cpus_per_vm,
+                        next_task_size=task.size,
+                    )
+                    >= task.size
+                )
+
+            if (
+                self.state.workload == "mpi-locality"
+                and self.state.baseline in GRANNY_BATCH_BASELINES
+            ):
+                return (
+                    get_num_available_slots_from_in_flight_apps(
+                        self.state.num_vms,
+                        self.state.num_cpus_per_vm,
+                        next_task_size=task.size,
+                        batch=True,
+                    )
+                    >= task.size
+                )
+
             if self.state.workload in OPENMP_WORKLOADS:
                 return (
                     get_num_available_slots_from_in_flight_apps(
diff --git a/tasks/makespan/trace.py b/tasks/makespan/trace.py
index 39701c5..7a40db2 100644
--- a/tasks/makespan/trace.py
+++ b/tasks/makespan/trace.py
@@ -9,7 +9,7 @@
 
 
 @task()
-def generate(ctx, workload, num_tasks, num_cores_per_vm, lmbd="0.1"):
+def generate(ctx, workload, num_tasks, num_cores_per_vm=8, lmbd="0.1"):
     """
     A trace is a set of tasks where each task is identified by:
     - An arrival time sampled from a Poisson distribution with parameter lambda
@@ -45,8 +45,8 @@ def generate(ctx, workload, num_tasks, num_cores_per_vm, lmbd="0.1"):
     inter_arrival_times.insert(0, 0)
 
     # Work out the possible different workloads
-    if workload == "mpi-migrate":
-        possible_workloads = ["mpi-migrate"]
+    if workload == "mpi-locality":
+        possible_workloads = ["mpi-locality"]
     elif workload == "mpi-evict":
         possible_workloads = ["mpi-migrate"]
     elif workload == "mpi-spot":
diff --git a/tasks/makespan/traces/trace_mpi-locality_100_8.csv b/tasks/makespan/traces/trace_mpi-locality_100_8.csv
new file mode 100644
index 0000000..4220e9d
--- /dev/null
+++ b/tasks/makespan/traces/trace_mpi-locality_100_8.csv
@@ -0,0 +1,101 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,mpi-locality,6,0
+1,mpi-locality,15,11
+2,mpi-locality,5,19
+3,mpi-locality,10,1
+4,mpi-locality,5,8
+5,mpi-locality,11,7
+6,mpi-locality,2,5
+7,mpi-locality,9,8
+8,mpi-locality,14,26
+9,mpi-locality,14,25
+10,mpi-locality,2,25
+11,mpi-locality,4,57
+12,mpi-locality,12,1
+13,mpi-locality,5,2
+14,mpi-locality,2,7
+15,mpi-locality,13,11
+16,mpi-locality,13,28
+17,mpi-locality,3,1
+18,mpi-locality,7,3
+19,mpi-locality,5,23
+20,mpi-locality,14,7
+21,mpi-locality,2,0
+22,mpi-locality,15,6
+23,mpi-locality,5,5
+24,mpi-locality,15,5
+25,mpi-locality,8,7
+26,mpi-locality,7,0
+27,mpi-locality,5,6
+28,mpi-locality,12,16
+29,mpi-locality,4,15
+30,mpi-locality,4,1
+31,mpi-locality,14,48
+32,mpi-locality,9,0
+33,mpi-locality,7,6
+34,mpi-locality,15,18
+35,mpi-locality,11,12
+36,mpi-locality,8,7
+37,mpi-locality,9,16
+38,mpi-locality,12,16
+39,mpi-locality,14,2
+40,mpi-locality,12,21
+41,mpi-locality,6,1
+42,mpi-locality,11,10
+43,mpi-locality,4,14
+44,mpi-locality,2,29
+45,mpi-locality,4,0
+46,mpi-locality,11,27
+47,mpi-locality,4,16
+48,mpi-locality,11,0
+49,mpi-locality,4,3
+50,mpi-locality,10,1
+51,mpi-locality,11,9
+52,mpi-locality,13,8
+53,mpi-locality,14,19
+54,mpi-locality,14,4
+55,mpi-locality,12,3
+56,mpi-locality,6,2
+57,mpi-locality,6,24
+58,mpi-locality,14,13
+59,mpi-locality,5,0
+60,mpi-locality,3,3
+61,mpi-locality,14,2
+62,mpi-locality,12,0
+63,mpi-locality,9,9
+64,mpi-locality,11,2
+65,mpi-locality,7,0
+66,mpi-locality,9,15
+67,mpi-locality,14,10
+68,mpi-locality,13,5
+69,mpi-locality,5,2
+70,mpi-locality,8,28
+71,mpi-locality,10,16
+72,mpi-locality,6,3
+73,mpi-locality,7,2
+74,mpi-locality,7,2
+75,mpi-locality,13,15
+76,mpi-locality,15,9
+77,mpi-locality,3,12
+78,mpi-locality,10,7
+79,mpi-locality,13,12
+80,mpi-locality,13,13
+81,mpi-locality,7,6
+82,mpi-locality,3,20
+83,mpi-locality,12,6
+84,mpi-locality,4,18
+85,mpi-locality,7,3
+86,mpi-locality,2,4
+87,mpi-locality,9,4
+88,mpi-locality,13,8
+89,mpi-locality,14,36
+90,mpi-locality,10,2
+91,mpi-locality,7,0
+92,mpi-locality,10,34
+93,mpi-locality,2,0
+94,mpi-locality,11,10
+95,mpi-locality,10,6
+96,mpi-locality,14,8
+97,mpi-locality,2,13
+98,mpi-locality,5,4
+99,mpi-locality,8,5
diff --git a/tasks/makespan/traces/trace_mpi-locality_10_8.csv b/tasks/makespan/traces/trace_mpi-locality_10_8.csv
new file mode 100644
index 0000000..c4622cd
--- /dev/null
+++ b/tasks/makespan/traces/trace_mpi-locality_10_8.csv
@@ -0,0 +1,11 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,mpi-locality,12,0
+1,mpi-locality,4,11
+2,mpi-locality,12,0
+3,mpi-locality,14,8
+4,mpi-locality,12,8
+5,mpi-locality,8,2
+6,mpi-locality,11,8
+7,mpi-locality,10,0
+8,mpi-locality,13,0
+9,mpi-locality,9,0
diff --git a/tasks/makespan/traces/trace_mpi-locality_150_8.csv b/tasks/makespan/traces/trace_mpi-locality_150_8.csv
new file mode 100644
index 0000000..82077b7
--- /dev/null
+++ b/tasks/makespan/traces/trace_mpi-locality_150_8.csv
@@ -0,0 +1,151 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,mpi-locality,2,0
+1,mpi-locality,15,0
+2,mpi-locality,3,38
+3,mpi-locality,8,31
+4,mpi-locality,5,1
+5,mpi-locality,2,6
+6,mpi-locality,5,11
+7,mpi-locality,5,4
+8,mpi-locality,6,35
+9,mpi-locality,2,5
+10,mpi-locality,6,15
+11,mpi-locality,10,15
+12,mpi-locality,6,8
+13,mpi-locality,11,2
+14,mpi-locality,8,4
+15,mpi-locality,14,0
+16,mpi-locality,12,2
+17,mpi-locality,11,13
+18,mpi-locality,3,3
+19,mpi-locality,13,29
+20,mpi-locality,11,11
+21,mpi-locality,3,0
+22,mpi-locality,12,14
+23,mpi-locality,14,16
+24,mpi-locality,10,3
+25,mpi-locality,12,18
+26,mpi-locality,4,46
+27,mpi-locality,12,41
+28,mpi-locality,6,29
+29,mpi-locality,4,7
+30,mpi-locality,15,7
+31,mpi-locality,15,4
+32,mpi-locality,2,7
+33,mpi-locality,7,12
+34,mpi-locality,13,3
+35,mpi-locality,12,8
+36,mpi-locality,7,0
+37,mpi-locality,8,18
+38,mpi-locality,6,7
+39,mpi-locality,11,0
+40,mpi-locality,6,17
+41,mpi-locality,15,3
+42,mpi-locality,2,17
+43,mpi-locality,12,24
+44,mpi-locality,9,9
+45,mpi-locality,6,24
+46,mpi-locality,9,0
+47,mpi-locality,8,21
+48,mpi-locality,3,1
+49,mpi-locality,8,7
+50,mpi-locality,15,4
+51,mpi-locality,2,0
+52,mpi-locality,8,8
+53,mpi-locality,11,6
+54,mpi-locality,15,6
+55,mpi-locality,3,3
+56,mpi-locality,10,0
+57,mpi-locality,7,4
+58,mpi-locality,8,16
+59,mpi-locality,11,1
+60,mpi-locality,7,7
+61,mpi-locality,5,1
+62,mpi-locality,15,0
+63,mpi-locality,15,6
+64,mpi-locality,5,9
+65,mpi-locality,9,5
+66,mpi-locality,6,1
+67,mpi-locality,13,5
+68,mpi-locality,4,4
+69,mpi-locality,10,2
+70,mpi-locality,6,4
+71,mpi-locality,9,0
+72,mpi-locality,5,18
+73,mpi-locality,15,32
+74,mpi-locality,7,1
+75,mpi-locality,15,2
+76,mpi-locality,4,21
+77,mpi-locality,15,0
+78,mpi-locality,5,9
+79,mpi-locality,12,3
+80,mpi-locality,12,5
+81,mpi-locality,3,24
+82,mpi-locality,14,2
+83,mpi-locality,13,7
+84,mpi-locality,9,13
+85,mpi-locality,13,35
+86,mpi-locality,5,0
+87,mpi-locality,15,0
+88,mpi-locality,12,23
+89,mpi-locality,4,3
+90,mpi-locality,3,34
+91,mpi-locality,10,0
+92,mpi-locality,12,8
+93,mpi-locality,14,34
+94,mpi-locality,9,0
+95,mpi-locality,2,1
+96,mpi-locality,14,1
+97,mpi-locality,15,0
+98,mpi-locality,11,6
+99,mpi-locality,3,4
+100,mpi-locality,15,1
+101,mpi-locality,4,14
+102,mpi-locality,12,4
+103,mpi-locality,2,0
+104,mpi-locality,4,10
+105,mpi-locality,15,9
+106,mpi-locality,10,14
+107,mpi-locality,5,7
+108,mpi-locality,4,9
+109,mpi-locality,6,17
+110,mpi-locality,8,13
+111,mpi-locality,8,15
+112,mpi-locality,11,24
+113,mpi-locality,15,5
+114,mpi-locality,15,1
+115,mpi-locality,12,11
+116,mpi-locality,15,9
+117,mpi-locality,7,12
+118,mpi-locality,4,1
+119,mpi-locality,14,6
+120,mpi-locality,4,3
+121,mpi-locality,9,2
+122,mpi-locality,6,9
+123,mpi-locality,9,3
+124,mpi-locality,8,45
+125,mpi-locality,5,0
+126,mpi-locality,6,20
+127,mpi-locality,3,8
+128,mpi-locality,9,4
+129,mpi-locality,11,0
+130,mpi-locality,14,11
+131,mpi-locality,3,2
+132,mpi-locality,4,26
+133,mpi-locality,13,6
+134,mpi-locality,6,43
+135,mpi-locality,7,2
+136,mpi-locality,7,1
+137,mpi-locality,10,2
+138,mpi-locality,13,14
+139,mpi-locality,9,2
+140,mpi-locality,3,10
+141,mpi-locality,9,16
+142,mpi-locality,2,3
+143,mpi-locality,6,13
+144,mpi-locality,3,15
+145,mpi-locality,14,17
+146,mpi-locality,2,16
+147,mpi-locality,8,4
+148,mpi-locality,7,6
+149,mpi-locality,5,12
diff --git a/tasks/makespan/traces/trace_mpi-locality_200_8.csv b/tasks/makespan/traces/trace_mpi-locality_200_8.csv
new file mode 100644
index 0000000..27e6952
--- /dev/null
+++ b/tasks/makespan/traces/trace_mpi-locality_200_8.csv
@@ -0,0 +1,201 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,mpi-locality,13,0
+1,mpi-locality,2,14
+2,mpi-locality,2,4
+3,mpi-locality,12,4
+4,mpi-locality,15,4
+5,mpi-locality,13,4
+6,mpi-locality,12,11
+7,mpi-locality,9,49
+8,mpi-locality,3,3
+9,mpi-locality,14,1
+10,mpi-locality,8,4
+11,mpi-locality,11,5
+12,mpi-locality,12,15
+13,mpi-locality,12,5
+14,mpi-locality,13,11
+15,mpi-locality,10,1
+16,mpi-locality,2,5
+17,mpi-locality,8,1
+18,mpi-locality,8,5
+19,mpi-locality,2,2
+20,mpi-locality,3,37
+21,mpi-locality,10,8
+22,mpi-locality,10,9
+23,mpi-locality,15,4
+24,mpi-locality,11,5
+25,mpi-locality,11,6
+26,mpi-locality,9,32
+27,mpi-locality,10,16
+28,mpi-locality,5,2
+29,mpi-locality,5,10
+30,mpi-locality,15,5
+31,mpi-locality,8,7
+32,mpi-locality,3,2
+33,mpi-locality,4,1
+34,mpi-locality,13,3
+35,mpi-locality,13,1
+36,mpi-locality,4,5
+37,mpi-locality,15,7
+38,mpi-locality,3,4
+39,mpi-locality,13,19
+40,mpi-locality,13,1
+41,mpi-locality,7,2
+42,mpi-locality,2,3
+43,mpi-locality,9,3
+44,mpi-locality,10,7
+45,mpi-locality,6,77
+46,mpi-locality,6,0
+47,mpi-locality,12,9
+48,mpi-locality,8,24
+49,mpi-locality,11,28
+50,mpi-locality,14,2
+51,mpi-locality,8,9
+52,mpi-locality,7,0
+53,mpi-locality,11,5
+54,mpi-locality,9,12
+55,mpi-locality,6,5
+56,mpi-locality,2,44
+57,mpi-locality,9,15
+58,mpi-locality,8,7
+59,mpi-locality,4,18
+60,mpi-locality,14,9
+61,mpi-locality,3,21
+62,mpi-locality,11,0
+63,mpi-locality,6,13
+64,mpi-locality,9,2
+65,mpi-locality,3,1
+66,mpi-locality,12,26
+67,mpi-locality,2,1
+68,mpi-locality,2,7
+69,mpi-locality,12,2
+70,mpi-locality,15,27
+71,mpi-locality,13,2
+72,mpi-locality,15,5
+73,mpi-locality,2,4
+74,mpi-locality,8,5
+75,mpi-locality,5,4
+76,mpi-locality,14,1
+77,mpi-locality,9,66
+78,mpi-locality,11,3
+79,mpi-locality,10,2
+80,mpi-locality,12,3
+81,mpi-locality,15,7
+82,mpi-locality,3,7
+83,mpi-locality,12,4
+84,mpi-locality,9,11
+85,mpi-locality,4,19
+86,mpi-locality,9,13
+87,mpi-locality,5,9
+88,mpi-locality,8,5
+89,mpi-locality,11,5
+90,mpi-locality,5,11
+91,mpi-locality,3,0
+92,mpi-locality,12,21
+93,mpi-locality,6,5
+94,mpi-locality,10,8
+95,mpi-locality,3,5
+96,mpi-locality,15,72
+97,mpi-locality,7,15
+98,mpi-locality,8,9
+99,mpi-locality,10,9
+100,mpi-locality,10,9
+101,mpi-locality,11,3
+102,mpi-locality,13,3
+103,mpi-locality,11,1
+104,mpi-locality,4,2
+105,mpi-locality,11,20
+106,mpi-locality,6,9
+107,mpi-locality,10,7
+108,mpi-locality,6,15
+109,mpi-locality,6,5
+110,mpi-locality,13,1
+111,mpi-locality,15,2
+112,mpi-locality,3,4
+113,mpi-locality,11,5
+114,mpi-locality,8,1
+115,mpi-locality,8,6
+116,mpi-locality,11,4
+117,mpi-locality,3,10
+118,mpi-locality,6,32
+119,mpi-locality,15,9
+120,mpi-locality,14,8
+121,mpi-locality,5,2
+122,mpi-locality,8,16
+123,mpi-locality,6,2
+124,mpi-locality,10,19
+125,mpi-locality,11,5
+126,mpi-locality,6,17
+127,mpi-locality,12,5
+128,mpi-locality,3,4
+129,mpi-locality,2,20
+130,mpi-locality,7,11
+131,mpi-locality,11,4
+132,mpi-locality,10,8
+133,mpi-locality,8,3
+134,mpi-locality,10,32
+135,mpi-locality,13,21
+136,mpi-locality,7,13
+137,mpi-locality,11,10
+138,mpi-locality,4,16
+139,mpi-locality,6,5
+140,mpi-locality,11,0
+141,mpi-locality,10,5
+142,mpi-locality,5,8
+143,mpi-locality,2,4
+144,mpi-locality,9,3
+145,mpi-locality,6,6
+146,mpi-locality,8,13
+147,mpi-locality,11,30
+148,mpi-locality,6,2
+149,mpi-locality,2,0
+150,mpi-locality,7,46
+151,mpi-locality,9,43
+152,mpi-locality,3,45
+153,mpi-locality,13,0
+154,mpi-locality,12,35
+155,mpi-locality,15,16
+156,mpi-locality,14,1
+157,mpi-locality,14,12
+158,mpi-locality,14,11
+159,mpi-locality,12,13
+160,mpi-locality,11,4
+161,mpi-locality,8,6
+162,mpi-locality,4,7
+163,mpi-locality,13,28
+164,mpi-locality,9,1
+165,mpi-locality,2,6
+166,mpi-locality,8,1
+167,mpi-locality,6,4
+168,mpi-locality,2,0
+169,mpi-locality,4,7
+170,mpi-locality,2,7
+171,mpi-locality,5,7
+172,mpi-locality,14,8
+173,mpi-locality,6,2
+174,mpi-locality,8,11
+175,mpi-locality,11,3
+176,mpi-locality,14,6
+177,mpi-locality,4,0
+178,mpi-locality,15,0
+179,mpi-locality,14,6
+180,mpi-locality,8,26
+181,mpi-locality,4,7
+182,mpi-locality,6,17
+183,mpi-locality,15,4
+184,mpi-locality,4,7
+185,mpi-locality,8,30
+186,mpi-locality,9,7
+187,mpi-locality,15,11
+188,mpi-locality,3,4
+189,mpi-locality,7,27
+190,mpi-locality,14,14
+191,mpi-locality,14,4
+192,mpi-locality,11,17
+193,mpi-locality,3,4
+194,mpi-locality,4,6
+195,mpi-locality,8,4
+196,mpi-locality,13,10
+197,mpi-locality,4,1
+198,mpi-locality,5,3
+199,mpi-locality,4,12
diff --git a/tasks/makespan/traces/trace_mpi-locality_25_8.csv b/tasks/makespan/traces/trace_mpi-locality_25_8.csv
new file mode 100644
index 0000000..3f6324f
--- /dev/null
+++ b/tasks/makespan/traces/trace_mpi-locality_25_8.csv
@@ -0,0 +1,26 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,mpi-locality,13,0
+1,mpi-locality,3,2
+2,mpi-locality,12,10
+3,mpi-locality,2,41
+4,mpi-locality,12,2
+5,mpi-locality,15,18
+6,mpi-locality,14,1
+7,mpi-locality,15,21
+8,mpi-locality,6,1
+9,mpi-locality,12,5
+10,mpi-locality,12,34
+11,mpi-locality,9,17
+12,mpi-locality,5,12
+13,mpi-locality,3,5
+14,mpi-locality,9,4
+15,mpi-locality,15,4
+16,mpi-locality,4,3
+17,mpi-locality,10,0
+18,mpi-locality,2,11
+19,mpi-locality,13,18
+20,mpi-locality,11,5
+21,mpi-locality,2,11
+22,mpi-locality,12,21
+23,mpi-locality,7,15
+24,mpi-locality,7,6
diff --git a/tasks/makespan/traces/trace_mpi-locality_50_8.csv b/tasks/makespan/traces/trace_mpi-locality_50_8.csv
new file mode 100644
index 0000000..3057621
--- /dev/null
+++ b/tasks/makespan/traces/trace_mpi-locality_50_8.csv
@@ -0,0 +1,51 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,mpi-locality,12,0
+1,mpi-locality,8,0
+2,mpi-locality,13,16
+3,mpi-locality,7,14
+4,mpi-locality,10,5
+5,mpi-locality,13,7
+6,mpi-locality,2,28
+7,mpi-locality,5,5
+8,mpi-locality,15,3
+9,mpi-locality,14,0
+10,mpi-locality,15,26
+11,mpi-locality,8,32
+12,mpi-locality,2,30
+13,mpi-locality,6,0
+14,mpi-locality,13,1
+15,mpi-locality,8,13
+16,mpi-locality,10,17
+17,mpi-locality,6,2
+18,mpi-locality,6,11
+19,mpi-locality,9,0
+20,mpi-locality,8,0
+21,mpi-locality,9,1
+22,mpi-locality,7,9
+23,mpi-locality,3,20
+24,mpi-locality,13,0
+25,mpi-locality,14,1
+26,mpi-locality,11,13
+27,mpi-locality,5,2
+28,mpi-locality,5,19
+29,mpi-locality,4,41
+30,mpi-locality,5,2
+31,mpi-locality,11,53
+32,mpi-locality,8,0
+33,mpi-locality,13,18
+34,mpi-locality,2,5
+35,mpi-locality,2,4
+36,mpi-locality,7,23
+37,mpi-locality,9,10
+38,mpi-locality,7,8
+39,mpi-locality,13,4
+40,mpi-locality,14,2
+41,mpi-locality,15,4
+42,mpi-locality,14,20
+43,mpi-locality,13,0
+44,mpi-locality,5,2
+45,mpi-locality,5,27
+46,mpi-locality,8,7
+47,mpi-locality,10,8
+48,mpi-locality,13,0
+49,mpi-locality,11,24
diff --git a/tasks/makespan/traces/trace_mpi-locality_75_8.csv b/tasks/makespan/traces/trace_mpi-locality_75_8.csv
new file mode 100644
index 0000000..e621baf
--- /dev/null
+++ b/tasks/makespan/traces/trace_mpi-locality_75_8.csv
@@ -0,0 +1,76 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,mpi-locality,2,0
+1,mpi-locality,10,19
+2,mpi-locality,15,5
+3,mpi-locality,11,0
+4,mpi-locality,11,8
+5,mpi-locality,10,3
+6,mpi-locality,4,27
+7,mpi-locality,5,1
+8,mpi-locality,15,8
+9,mpi-locality,13,10
+10,mpi-locality,2,2
+11,mpi-locality,11,5
+12,mpi-locality,14,29
+13,mpi-locality,11,8
+14,mpi-locality,12,8
+15,mpi-locality,4,14
+16,mpi-locality,4,4
+17,mpi-locality,12,20
+18,mpi-locality,2,9
+19,mpi-locality,12,3
+20,mpi-locality,8,14
+21,mpi-locality,3,1
+22,mpi-locality,13,32
+23,mpi-locality,2,16
+24,mpi-locality,5,10
+25,mpi-locality,3,13
+26,mpi-locality,5,6
+27,mpi-locality,11,5
+28,mpi-locality,3,32
+29,mpi-locality,8,7
+30,mpi-locality,8,5
+31,mpi-locality,6,12
+32,mpi-locality,15,0
+33,mpi-locality,4,17
+34,mpi-locality,6,4
+35,mpi-locality,7,2
+36,mpi-locality,7,13
+37,mpi-locality,7,13
+38,mpi-locality,13,0
+39,mpi-locality,9,0
+40,mpi-locality,9,2
+41,mpi-locality,12,6
+42,mpi-locality,11,4
+43,mpi-locality,7,0
+44,mpi-locality,4,1
+45,mpi-locality,11,3
+46,mpi-locality,3,9
+47,mpi-locality,4,4
+48,mpi-locality,9,24
+49,mpi-locality,2,8
+50,mpi-locality,4,8
+51,mpi-locality,2,3
+52,mpi-locality,14,5
+53,mpi-locality,2,1
+54,mpi-locality,9,13
+55,mpi-locality,7,1
+56,mpi-locality,6,20
+57,mpi-locality,5,26
+58,mpi-locality,5,0
+59,mpi-locality,3,15
+60,mpi-locality,12,1
+61,mpi-locality,3,3
+62,mpi-locality,10,40
+63,mpi-locality,13,15
+64,mpi-locality,7,5
+65,mpi-locality,15,2
+66,mpi-locality,3,1
+67,mpi-locality,15,2
+68,mpi-locality,4,5
+69,mpi-locality,11,0
+70,mpi-locality,13,5
+71,mpi-locality,5,4
+72,mpi-locality,11,4
+73,mpi-locality,5,7
+74,mpi-locality,15,0
diff --git a/tasks/migration/oracle.py b/tasks/migration/oracle.py
index 3e4163d..9557cc5 100644
--- a/tasks/migration/oracle.py
+++ b/tasks/migration/oracle.py
@@ -21,7 +21,7 @@
     LAMMPS_FAASM_MIGRATION_NET_FUNC,
     LAMMPS_SIM_WORKLOAD,
     LAMMPS_SIM_WORKLOAD_CONFIGS,
-    get_faasm_benchmark,
+    get_lammps_data_file,
     get_lammps_migration_params,
 )
 from time import sleep
@@ -126,7 +126,7 @@ def do_write_csv_line(csv_name, part, xvm_links, actual_time):
 
             host_list = generate_host_list(part)
             file_name = basename(
-                get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]
+                get_lammps_data_file(LAMMPS_SIM_WORKLOAD)["data"][0]
             )
             user = LAMMPS_FAASM_USER
             func = LAMMPS_FAASM_MIGRATION_NET_FUNC
diff --git a/tasks/migration/run.py b/tasks/migration/run.py
index 986abf5..94cec3e 100644
--- a/tasks/migration/run.py
+++ b/tasks/migration/run.py
@@ -14,7 +14,7 @@
     LAMMPS_FAASM_MIGRATION_NET_FUNC,
     LAMMPS_SIM_WORKLOAD,
     LAMMPS_SIM_WORKLOAD_CONFIGS,
-    get_faasm_benchmark,
+    get_lammps_data_file,
     get_lammps_migration_params,
 )
 from time import sleep
@@ -48,9 +48,6 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8):
     """
     num_vms = len(get_faasm_worker_ips())
     assert num_vms == 2, "Expected 2 VMs got: {}!".format(num_vms)
-    # data_file = basename(get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0])
-    # TODO: is this a good idea? FIXME FIXME DELETE ME
-    data_file = basename(get_faasm_benchmark("compute")["data"][0])
 
     if check_in is None:
         check_array = [0, 2, 4, 6, 8, 10]
@@ -65,6 +62,9 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8):
                 )
             )
         workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
+        data_file = basename(
+            get_lammps_data_file(workload_config["data_file"])["data"][0]
+        )
 
         csv_name = "migration_{}.csv".format(workload)
         _init_csv_file(csv_name)
diff --git a/tasks/motivation/ideal.py b/tasks/motivation/ideal.py
index 80706a9..3a2b322 100644
--- a/tasks/motivation/ideal.py
+++ b/tasks/motivation/ideal.py
@@ -10,7 +10,7 @@
 from tasks.util.lammps import (
     LAMMPS_DOCKER_BINARY,
     LAMMPS_DOCKER_DIR,
-    get_faasm_benchmark,
+    get_lammps_data_file,
 )
 from tasks.util.openmpi import get_native_mpi_pods, run_kubectl_cmd
 from time import time
@@ -84,7 +84,7 @@ def do_single_run(vm_names, vm_ips, size, partition):
     assert len(allocated_pod_ips) == size
 
     # Prepare LAMMPS command line
-    data_file = get_faasm_benchmark("compute-xl")["data"][0]
+    data_file = get_lammps_data_file("compute-xl")["data"][0]
     lammps_cmdline = "-in {}/{}.faasm.native".format(
         LAMMPS_DOCKER_DIR, data_file
     )
diff --git a/tasks/motivation/plot.py b/tasks/motivation/plot.py
index 0505ba5..5820402 100644
--- a/tasks/motivation/plot.py
+++ b/tasks/motivation/plot.py
@@ -4,7 +4,7 @@
 from os import makedirs
 from os.path import join
 from tasks.util.env import PLOTS_ROOT
-from tasks.util.makespan import do_makespan_plot, read_makespan_results
+from tasks.util.locality import plot_locality_results, read_locality_results
 from tasks.util.plot import (
     get_color_for_baseline,
     get_label_for_baseline,
@@ -26,7 +26,7 @@ def locality(ctx):
     num_cpus_per_vm = 8
 
     results = {}
-    results[num_vms] = read_makespan_results(
+    results[num_vms] = read_locality_results(
         num_vms, num_tasks, num_cpus_per_vm
     )
     makedirs(MOTIVATION_PLOTS_DIR, exist_ok=True)
@@ -36,7 +36,9 @@ def locality(ctx):
     # ----------
 
     fig, ax1 = subplots(figsize=(6, 2))
-    do_makespan_plot("ts_vcpus", results, ax1, num_vms, num_tasks)
+    plot_locality_results(
+        "ts_vcpus", results, ax1, num_vms=num_vms, num_tasks=num_tasks
+    )
 
     # Manually craft the legend
     baselines = ["slurm", "batch", "granny-migrate"]
@@ -61,7 +63,9 @@ def locality(ctx):
     # ----------
 
     fig, ax2 = subplots(figsize=(6, 2))
-    do_makespan_plot("ts_xvm_links", results, ax2, num_vms, num_tasks)
+    plot_locality_results(
+        "ts_xvm_links", results, ax2, num_vms=num_vms, num_tasks=num_tasks
+    )
     save_plot(fig, MOTIVATION_PLOTS_DIR, "motivation_xvm_links")
 
 
diff --git a/tasks/util/elastic.py b/tasks/util/elastic.py
index 207e368..f33cfd3 100644
--- a/tasks/util/elastic.py
+++ b/tasks/util/elastic.py
@@ -228,7 +228,7 @@ def _do_plot_makespan(results, ax, **kwargs):
     ax.set_ylim(bottom=0)
     ax.set_ylabel("Makespan [s]")
 
-    ax.set_xticks(xticks, labels=xticklabels, fontsize=6)
+    ax.set_xticks(xticks, labels=xticklabels)
 
 
 def _do_plot_cdf_jct(results, ax, **kwargs):
@@ -327,8 +327,8 @@ def _do_plot_percentage_vcpus(results, ax, **kwargs):
 
     ax.set_ylim(bottom=0)
     ax.set_xlim(left=-0.25)
-    ax.set_ylabel("Idle CPU-seconds /\n Total CPU-seconds [%]", fontsize=8)
-    ax.set_xticks(xs, labels=xticklabels, fontsize=6)
+    ax.set_ylabel("Idle CPU-seconds /\n Total CPU-seconds [%]")
+    ax.set_xticks(xs, labels=xticklabels)
 
 
 def _do_plot_ts_vcpus(results, ax, **kwargs):
diff --git a/tasks/util/lammps.py b/tasks/util/lammps.py
index a6b29f0..9d916bd 100644
--- a/tasks/util/lammps.py
+++ b/tasks/util/lammps.py
@@ -49,13 +49,29 @@
 
 LAMMPS_SIM_WORKLOAD_CONFIGS = {
     "compute": {
+        "data_file": "compute",
+        "num_iterations": 10,
         "num_net_loops": 0,
         "chunk_size": 0,
     },
     "network": {
+        "data_file": "compute",
+        "num_iterations": 10,
         "num_net_loops": LAMMPS_SIM_NUM_NET_LOOPS,
         "chunk_size": LAMMPS_SIM_CHUNK_SIZE,
     },
+    "very-network": {
+        "data_file": "compute",
+        "num_iterations": 10,
+        "num_net_loops": 1e5,
+        "chunk_size": 10,
+    },
+    "og-network": {
+        "data_file": "network",
+        "num_iterations": 10,
+        "num_net_loops": 0,
+        "chunk_size": 0,
+    },
 }
 
 # Different supported LAMMPS benchmarks
@@ -94,15 +110,21 @@
 }
 
 
-def get_faasm_benchmark(bench):
-    if bench not in BENCHMARKS:
-        print("Unrecognized benchmark: {}".format(bench))
+def get_lammps_data_file(workload):
+    return BENCHMARKS[workload]
+
+
+def get_lammps_workload(workload):
+    if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS:
+        print("Unrecognized workload: {}".format(workload))
         print(
-            "The supported LAMMPS benchmarks are: {}".format(BENCHMARKS.keys())
+            "The supported LAMMPS workloads are: {}".format(
+                LAMMPS_SIM_WORKLOAD_CONFIGS.keys()
+            )
         )
-        raise RuntimeError("Unrecognized LAMMPS benchmark")
+        raise RuntimeError("Unrecognized LAMMPS workload")
 
-    return BENCHMARKS[bench]
+    return LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
 
 
 def get_lammps_migration_params(
@@ -138,7 +160,7 @@ def lammps_data_upload(ctx, bench):
     file_details = []
 
     for b in bench:
-        _bench = get_faasm_benchmark(b)
+        _bench = get_lammps_data_file(b)
 
         # Upload all data corresponding to the benchmark
         for data in _bench["data"]:
diff --git a/tasks/util/locality.py b/tasks/util/locality.py
new file mode 100644
index 0000000..b30d599
--- /dev/null
+++ b/tasks/util/locality.py
@@ -0,0 +1,743 @@
+from glob import glob
+from numpy import linspace
+from os.path import join
+from pandas import read_csv
+from scipy.interpolate import CubicSpline
+from tasks.util.makespan import (
+    GRANNY_BASELINES,
+    MAKESPAN_RESULTS_DIR,
+    NATIVE_BASELINES,
+)
+from tasks.util.math import cum_sum
+from tasks.util.planner import get_xvm_links_from_part
+from tasks.util.plot import (
+    fix_hist_step_vertical_line_at_end,
+    get_color_for_baseline,
+    get_label_for_baseline,
+)
+from tasks.util.trace import load_task_trace_from_file
+
+# ----------------------------
+# Plotting utilities
+# ----------------------------
+
+
+def read_locality_results(num_vms, num_tasks, num_cpus_per_vm, migrate=False):
+    workload = "mpi-locality" if not migrate else "mpi-migrate"
+
+    # Load results
+    result_dict = {}
+    glob_str = "makespan_exec-task-info_*_{}_{}_{}_{}.csv".format(
+        num_vms, workload, num_tasks, num_cpus_per_vm
+    )
+    for csv in glob(join(MAKESPAN_RESULTS_DIR, glob_str)):
+        baseline = csv.split("_")[2]
+        workload = csv.split("_")[4]
+
+        # -----
+        # Results to visualise differences between execution time and time
+        # in queue
+        # -----
+
+        # Results for per-job exec time and time-in-queue
+        result_dict[baseline] = {}
+        results = read_csv(csv)
+        task_ids = results["TaskId"].to_list()
+        times_exec = results["TimeExecuting"].to_list()
+        times_queue = results["TimeInQueue"].to_list()
+        start_ts = results["StartTimeStamp"].to_list()
+        genesis_ts = min(start_ts)
+        end_ts = results["EndTimeStamp"].to_list()
+        result_dict[baseline]["exec-time"] = [-1 for _ in task_ids]
+        result_dict[baseline]["queue-time"] = [-1 for _ in task_ids]
+        result_dict[baseline]["jct"] = [-1 for _ in task_ids]
+
+        for tid, texec, tqueue, e_ts in zip(
+            task_ids, times_exec, times_queue, end_ts
+        ):
+            result_dict[baseline]["exec-time"][tid] = texec
+            result_dict[baseline]["queue-time"][tid] = tqueue
+            result_dict[baseline]["jct"][tid] = e_ts - genesis_ts
+
+        # -----
+        # Results to visualise job churn
+        # -----
+
+        start_ts = results.min()["StartTimeStamp"]
+        end_ts = results.max()["EndTimeStamp"]
+        time_elapsed_secs = int(end_ts - start_ts)
+        result_dict[baseline]["makespan"] = time_elapsed_secs
+        print(
+            "Num VMs: {} - Num Tasks: {} - Baseline: {} - Makespan: {}s".format(
+                num_vms, num_tasks, baseline, time_elapsed_secs
+            )
+        )
+        if time_elapsed_secs > 1e5:
+            raise RuntimeError(
+                "Measured total time elapsed is too long: {}".format(
+                    time_elapsed_secs
+                )
+            )
+
+        # Dump all data
+        tasks_per_ts = [[] for i in range(time_elapsed_secs)]
+        for index, row in results.iterrows():
+            task_id = row["TaskId"]
+            start_slot = int(row["StartTimeStamp"] - start_ts)
+            end_slot = int(row["EndTimeStamp"] - start_ts)
+            for ind in range(start_slot, end_slot):
+                tasks_per_ts[ind].append(task_id)
+        for tasks in tasks_per_ts:
+            tasks.sort()
+
+        # Prune the timeseries
+        pruned_tasks_per_ts = {}
+        # prev_tasks = []
+        for ts, tasks in enumerate(tasks_per_ts):
+            # NOTE: we are not pruning at the moment
+            pruned_tasks_per_ts[ts] = tasks
+            # if tasks != prev_tasks:
+            # pruned_tasks_per_ts[ts] = tasks
+            # prev_tasks = tasks
+
+        result_dict[baseline]["tasks_per_ts"] = pruned_tasks_per_ts
+
+        # -----
+        # Results to visualise scheduling info per task
+        # -----
+
+        sched_info_csv = "makespan_sched-info_{}_{}_{}_{}_{}.csv".format(
+            baseline, num_vms, workload, num_tasks, num_cpus_per_vm
+        )
+        if baseline not in GRANNY_BASELINES:
+            result_dict[baseline]["task_scheduling"] = {}
+
+            # We identify VMs by numbers, not IPs
+            ip_to_vm = {}
+            vm_to_id = {}
+            with open(
+                join(MAKESPAN_RESULTS_DIR, sched_info_csv), "r"
+            ) as sched_fd:
+                # Process the file line by line, as each line will be different in
+                # length
+                for num, line in enumerate(sched_fd):
+                    # Skip the header
+                    if num == 0:
+                        continue
+
+                    line = line.strip()
+
+                    # In line 1 we include the IP to node conversion as one
+                    # comma-separated line, so we parse it here
+                    if num == 1:
+                        ip_to_vm_line = line.split(",")
+                        assert len(ip_to_vm_line) % 2 == 0
+
+                        i = 0
+                        while i < len(ip_to_vm_line):
+                            ip = ip_to_vm_line[i]
+                            vm = ip_to_vm_line[i + 1]
+                            ip_to_vm[ip] = vm
+                            i += 2
+
+                        continue
+
+                    # Get the task id and the scheduling decision from the line
+                    task_id = line.split(",")[0]
+                    result_dict[baseline]["task_scheduling"][task_id] = {}
+                    sched_info = line.split(",")[1:]
+                    # The scheduling decision must be even, as it contains pairs
+                    # of ip + slots
+                    assert len(sched_info) % 2 == 0
+
+                    i = 0
+                    while i < len(sched_info):
+                        vm = ip_to_vm[sched_info[i]]
+                        slots = sched_info[i + 1]
+
+                        if vm not in vm_to_id:
+                            len_map = len(vm_to_id)
+                            vm_to_id[vm] = len_map
+
+                        vm_id = vm_to_id[vm]
+                        if (
+                            vm_id
+                            not in result_dict[baseline]["task_scheduling"][
+                                task_id
+                            ]
+                        ):
+                            result_dict[baseline]["task_scheduling"][task_id][
+                                vm_id
+                            ] = 0
+
+                        result_dict[baseline]["task_scheduling"][task_id][
+                            vm_id
+                        ] += int(slots)
+                        i += 2
+
+        # -----
+        # Results to visualise the % of idle vCPUs (and VMs) over time
+        # -----
+
+        task_trace = load_task_trace_from_file(
+            workload, num_tasks, num_cpus_per_vm
+        )
+
+        result_dict[baseline]["ts_vcpus"] = {}
+        result_dict[baseline]["ts_xvm_links"] = {}
+        result_dict[baseline]["ts_idle_vms"] = {}
+        total_available_vcpus = num_vms * num_cpus_per_vm
+
+        if baseline in NATIVE_BASELINES:
+            # First, set each timestamp to the total available vCPUs, and
+            # initialise the set of idle vms
+            for ts in result_dict[baseline]["tasks_per_ts"]:
+                result_dict[baseline]["ts_vcpus"][ts] = total_available_vcpus
+                result_dict[baseline]["ts_idle_vms"][ts] = set()
+
+            # Second, for each ts subtract the size of each task in-flight
+            for ts in result_dict[baseline]["tasks_per_ts"]:
+                for t in result_dict[baseline]["tasks_per_ts"][ts]:
+                    result_dict[baseline]["ts_vcpus"][ts] -= task_trace[
+                        int(t)
+                    ].size
+
+                    # In addition, for each task in flight, add the tasks's IPs
+                    # to the host set
+                    for vm_id in result_dict[baseline]["task_scheduling"][
+                        str(int(t))
+                    ]:
+                        result_dict[baseline]["ts_idle_vms"][ts].add(vm_id)
+
+            # Third, express the results as percentages, and the number of
+            # idle VMs as a number (not as a set)
+            for ts in result_dict[baseline]["ts_vcpus"]:
+                result_dict[baseline]["ts_vcpus"][ts] = (
+                    result_dict[baseline]["ts_vcpus"][ts]
+                    / total_available_vcpus
+                ) * 100
+
+                result_dict[baseline]["ts_idle_vms"][ts] = num_vms - len(
+                    result_dict[baseline]["ts_idle_vms"][ts]
+                )
+        else:
+            # For Granny, the idle vCPUs results are directly available in
+            # the file
+            sch_info_csv = read_csv(join(MAKESPAN_RESULTS_DIR, sched_info_csv))
+            idle_cpus = (
+                sch_info_csv["NumIdleCpus"] / total_available_vcpus * 100
+            ).to_list()
+            tss = (
+                sch_info_csv["TimeStampSecs"]
+                - sch_info_csv["TimeStampSecs"][0]
+            ).to_list()
+
+            # Idle vCPUs
+            for (idle_cpu, ts) in zip(idle_cpus, tss):
+                result_dict[baseline]["ts_vcpus"][ts] = idle_cpu
+
+            # x-VM links
+            xvm_links = sch_info_csv["NumCrossVmLinks"].to_list()
+            for (ts, xvm_link) in zip(tss, xvm_links):
+                result_dict[baseline]["ts_xvm_links"][ts] = xvm_link
+
+            # Num of idle VMs
+            num_idle_vms = sch_info_csv["NumIdleVms"].to_list()
+            for (ts, n_idle_vms) in zip(tss, num_idle_vms):
+                result_dict[baseline]["ts_idle_vms"][ts] = n_idle_vms
+
+        # -----
+        # Results to visualise the # of cross-vm links
+        # -----
+
+        if baseline in NATIVE_BASELINES:
+            for ts in result_dict[baseline]["tasks_per_ts"]:
+                result_dict[baseline]["ts_xvm_links"][ts] = 0
+
+            for ts in result_dict[baseline]["tasks_per_ts"]:
+                for t in result_dict[baseline]["tasks_per_ts"][ts]:
+                    if baseline == "slurm":
+                        sched = result_dict[baseline]["task_scheduling"][
+                            str(int(t))
+                        ]
+
+                        # If only scheduled to one VM, no cross-VM links
+                        if len(sched) <= 1:
+                            continue
+
+                        # Add the accumulated to the total tally
+                        result_dict[baseline]["ts_xvm_links"][
+                            ts
+                        ] += get_xvm_links_from_part(list(sched.values()))
+                    elif baseline == "batch":
+                        # Batch baseline is optimal in terms of cross-vm links
+                        task_size = task_trace[int(t)].size
+                        if task_size > 8:
+                            num_links = 8 * (task_size - 8) / 2
+                            result_dict[baseline]["ts_xvm_links"][
+                                ts
+                            ] += num_links
+
+    return result_dict
+
+
+def _do_plot_exec_vs_tiq(results, ax, **kwargs):
+    """
+    This plot presents the percentiles of slowdown of execution time (and
+    time in queue too?)
+    """
+    num_vms = kwargs["num_vms"]
+    num_tasks = kwargs["num_tasks"]
+
+    num_jobs = len(results[num_vms[0]]["slurm"]["exec-time"])
+
+    num_slowdowns = 3
+    percentiles = [50, 75, 90, 95, 100]
+    width = float(1 / len(percentiles)) * 0.8
+    xs = []
+    ys = []
+    xticks = []
+    xlabels = []
+    colors = []
+    xs_vlines = []
+
+    for vm_ind, n_vms in enumerate(num_vms):
+
+        x_vm_offset = vm_ind * num_slowdowns
+
+        # Calculate slowdowns wrt granny w/ migration
+        slurm_slowdown = sorted(
+            [
+                float(slurm_time / granny_time)
+                for (slurm_time, granny_time) in zip(
+                    results[n_vms]["slurm"]["exec-time"],
+                    results[n_vms]["granny-migrate"]["exec-time"],
+                )
+            ]
+        )
+        batch_slowdown = sorted(
+            [
+                float(batch_time / granny_time)
+                for (batch_time, granny_time) in zip(
+                    results[n_vms]["batch"]["exec-time"],
+                    results[n_vms]["granny-migrate"]["exec-time"],
+                )
+            ]
+        )
+        granny_nomig_slowdown = sorted(
+            [
+                float(granny_nomig_time / granny_time)
+                for (granny_nomig_time, granny_time) in zip(
+                    results[n_vms]["granny"]["exec-time"],
+                    results[n_vms]["granny-migrate"]["exec-time"],
+                )
+            ]
+        )
+
+        for ind, (bline, slowdown) in enumerate(
+            [
+                ("slurm", slurm_slowdown),
+                ("batch", batch_slowdown),
+                ("granny-no-migration", granny_nomig_slowdown),
+            ]
+        ):
+            x_bline_offset = ind
+
+            for subind, percentile in enumerate(percentiles):
+                x = (
+                    x_vm_offset
+                    + x_bline_offset
+                    - width * int(len(percentiles) / 2)
+                    + width * subind
+                    + width * 0.5 * (len(percentiles) % 2 == 0)
+                )
+                xs.append(x)
+                if percentile == 100:
+                    ys.append(slowdown[-1])
+                else:
+                    index = int(percentile / 100 * num_jobs)
+                    ys.append(slowdown[index])
+                colors.append(get_color_for_baseline("mpi-migrate", bline))
+
+        # Add a vertical line at the end of each VM block
+        xs_vlines.append(
+            x_vm_offset + (num_slowdowns * len(percentiles) + 1 - 0.25) * width
+        )
+
+        # Add a label once per VM block
+        x_label = x_vm_offset + (
+            (num_slowdowns * len(percentiles) + num_slowdowns - 2) / 2 * width
+        )
+        xticks.append(x_label)
+        xlabels.append("{} VMs\n({} Jobs)".format(n_vms, num_tasks[vm_ind]))
+
+    xmin = -0.5
+    xmax = len(num_vms) * num_slowdowns - 0.5
+    ymin = 0.75
+    ymax = 2
+
+    ax.bar(xs, ys, width=width, color=colors, edgecolor="black")
+    ax.hlines(y=1, color="red", xmin=xmin, xmax=xmax)
+    ax.vlines(
+        xs_vlines,
+        ymin=ymin,
+        ymax=ymax,
+        color="gray",
+        linestyles="dashed",
+        linewidth=0.5,
+    )
+    ax.set_xticks(xticks, labels=xlabels, fontsize=6)
+    ax.set_ylabel("Slowdown [Baseline/Granny]")
+    ax.set_xlabel(
+        "Job percentile [{}]".format(
+            ",".join([str(p) + "th" for p in percentiles])
+        ),
+        fontsize=8,
+    )
+    ax.set_xlim(left=xmin, right=xmax)
+    ax.set_ylim(bottom=ymin, top=ymax)
+
+
+def _do_plot_exec_cdf(results, ax, **kwargs):
+    """
+    CDF of the absolute job completion time
+    (from the beginning of time, until the job has finished)
+    """
+    num_vms = kwargs["num_vms"]
+
+    num_jobs = len(results[num_vms]["slurm"]["exec-time"])
+    labels = ["slurm", "batch", "granny", "granny-migrate"]
+    xs = list(range(num_jobs))
+
+    for label in labels:
+        ys = []
+
+        this_label = label
+        if label == "granny-migrate":
+            this_label = "granny"
+        elif label == "granny":
+            this_label = "granny-nomig"
+
+        # Calculate the histogram using the histogram function, get the
+        # results from the return value, but make the plot transparent.
+        # TODO: maybe just calculate the CDF analitically?
+        ys, xs, patches = ax.hist(
+            results[num_vms][label]["jct"],
+            100,
+            color=get_color_for_baseline("mpi-migrate", label),
+            histtype="step",
+            density=True,
+            cumulative=True,
+            label=this_label,
+            # alpha=0,
+        )
+        fix_hist_step_vertical_line_at_end(ax)
+
+        # Interpolate more points
+        # spl = splrep(xs[:-1], ys, s=0.01, per=False)
+        # x2 = linspace(xs[0], xs[-1], 400)
+        # y2 = splev(x2, spl)
+        # ax.plot(x2, y2, color=PLOT_COLORS[label], label=this_label, linewidth=0.5)
+
+        ax.set_xlabel("Job Completion Time [s]")
+        ax.set_ylabel("CDF")
+        ax.set_ylim(bottom=0, top=1)
+        ax.legend()
+
+
+def _do_plot_makespan(results, ax, **kwargs):
+    num_vms = kwargs["num_vms"]
+    num_tasks = kwargs["num_tasks"]
+    baselines = ["granny", "granny-batch", "granny-migrate"]
+
+    xs = []
+    ys = []
+    colors = []
+    xticks = []
+    xticklabels = []
+
+    for ind, n_vms in enumerate(num_vms):
+        x_offset = ind * len(baselines) + (ind + 1)
+        xs += [x + x_offset for x in range(len(baselines))]
+        ys += [results[n_vms][la]["makespan"] for la in baselines]
+        colors += [
+            get_color_for_baseline("mpi-locality", la) for la in baselines
+        ]
+
+        # Add one tick and xlabel per VM size
+        xticks.append(x_offset + len(baselines) / 2)
+        xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, num_tasks[ind]))
+
+        # Add spacing between vms
+        xs.append(x_offset + len(baselines))
+        ys.append(0)
+        colors.append("white")
+
+    ax.bar(xs, ys, color=colors, edgecolor="black", width=1)
+    ax.set_ylim(bottom=0)
+    ax.set_ylabel("Makespan [s]")
+    ax.set_xticks(xticks, labels=xticklabels)
+
+
+def _do_plot_ts_vcpus(results, ax, **kwargs):
+    """
+    This plot presents a timeseries of the % of idle vCPUs over time
+    """
+    num_vms = kwargs["num_vms"]
+    workload = "mpi-migrate" if "migrate" in kwargs else "mpi-locality"
+
+    if workload == "mpi-migrate":
+        baselines = ["batch", "slurm", "granny", "granny-migrate"]
+    else:
+        baselines = ["granny", "granny-batch", "granny-migrate"]
+
+    xlim = 0
+    for baseline in baselines:
+        if baseline in NATIVE_BASELINES:
+            xs = range(len(results[num_vms][baseline]["ts_vcpus"]))
+        else:
+            xs = list(results[num_vms][baseline]["ts_vcpus"].keys())
+        xlim = max(xlim, max(xs))
+
+        ax.plot(
+            xs,
+            [results[num_vms][baseline]["ts_vcpus"][x] for x in xs],
+            label=get_label_for_baseline(workload, baseline),
+            color=get_color_for_baseline(workload, baseline),
+        )
+
+    ax.set_xlim(left=0, right=xlim)
+    ax.set_ylim(bottom=0, top=100)
+    ax.set_ylabel("% idle vCPUs")
+    ax.set_xlabel("Time [s]")
+
+
+def _do_plot_ts_xvm_links(results, ax, **kwargs):
+    """
+    This plot presents a timeseries of the # of cross-VM links over time
+    """
+    num_vms = kwargs["num_vms"]
+    workload = "mpi-migrate" if "migrate" in kwargs else "mpi-locality"
+    num_points = 500
+
+    if workload == "mpi-migrate":
+        baselines = ["batch", "slurm"]
+    else:
+        baselines = ["granny", "granny-batch"]
+
+    xlim = 0
+    for baseline in baselines:
+        if workload == "mpi-migrate":
+            xs = range(len(results[num_vms][baseline]["ts_xvm_links"]))
+        else:
+            xs = list(results[num_vms][baseline]["ts_xvm_links"].keys())
+        xlim = max(xlim, max(xs))
+
+        ax.plot(
+            xs,
+            [results[num_vms][baseline]["ts_xvm_links"][x] for x in xs],
+            label=get_label_for_baseline(workload, baseline),
+            color=get_color_for_baseline(workload, baseline),
+        )
+
+    # We do Granny separately to interpolate
+    xs_granny_migrate = list(
+        results[num_vms]["granny-migrate"]["ts_xvm_links"].keys()
+    )
+    ys_granny_migrate = [
+        results[num_vms]["granny-migrate"]["ts_xvm_links"][x]
+        for x in xs_granny_migrate
+    ]
+    spl_granny_migrate = CubicSpline(xs_granny_migrate, ys_granny_migrate)
+    new_xs_granny_migrate = linspace(0, max(xs_granny_migrate), num=num_points)
+
+    ax.plot(
+        new_xs_granny_migrate,
+        spl_granny_migrate(new_xs_granny_migrate),
+        label=get_label_for_baseline(workload, "granny-migrate"),
+        color=get_color_for_baseline(workload, "granny-migrate"),
+    )
+
+    xlim = max(xlim, max(new_xs_granny_migrate))
+    ax.set_xlim(left=0, right=xlim)
+    ax.set_ylim(bottom=0)
+    ax.set_xlabel("Time [s]")
+    ax.set_ylabel("# cross-VM links")
+
+
+def _do_plot_percentage_vcpus(results, ax, **kwargs):
+    num_vms = kwargs["num_vms"]
+    workload = "mpi-migrate" if "migrate" in kwargs else "mpi-locality"
+    num_tasks = kwargs["num_tasks"]
+
+    if workload == "mpi-migrate":
+        baselines = ["batch", "slurm", "granny", "granny-migrate"]
+    else:
+        baselines = ["granny", "granny-batch", "granny-migrate"]
+
+    xs = []
+    ys = []
+    xticklabels = []
+
+    num_cpus_per_vm = 8
+
+    # Integral of idle CPU cores over time
+    # WARNING: this plot reads num_vms as an array
+    cumsum_ys = {}
+    for baseline in baselines:
+        cumsum_ys[baseline] = {}
+
+        for n_vms in num_vms:
+            timestamps = list(results[n_vms][baseline]["ts_vcpus"].keys())
+            total_cpusecs = (
+                (timestamps[-1] - timestamps[0]) * num_cpus_per_vm * int(n_vms)
+            )
+
+            cumsum = cum_sum(
+                timestamps,
+                [
+                    res * num_cpus_per_vm * int(n_vms) / 100
+                    for res in list(
+                        results[n_vms][baseline]["ts_vcpus"].values()
+                    )
+                ],
+            )
+
+            # Record both the total idle CPUsecs and the percentage
+            cumsum_ys[baseline][n_vms] = (
+                cumsum,
+                (cumsum / total_cpusecs) * 100,
+            )
+
+    xs = [ind for ind in range(len(num_vms))]
+    xticklabels = []
+
+    for (n_vms, n_tasks) in zip(num_vms, num_tasks):
+        xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks))
+    for baseline in baselines:
+        ys = [cumsum_ys[baseline][n_vms][1] for n_vms in num_vms]
+        ax.plot(
+            xs,
+            ys,
+            color=get_color_for_baseline(workload, baseline),
+            linestyle="-",
+            marker=".",
+            label=get_label_for_baseline(workload, baseline),
+        )
+
+    ax.set_ylim(bottom=0)
+    ax.set_xlim(left=-0.25)
+    ax.set_ylabel("Idle CPU-seconds /\n Total CPU-seconds [%]", fontsize=8)
+    ax.set_xticks(xs, labels=xticklabels)
+
+
+def _do_plot_percentage_xvm(results, ax, **kwargs):
+    num_vms = kwargs["num_vms"]
+    workload = "mpi-migrate" if "migrate" in kwargs else "mpi-locality"
+    num_tasks = kwargs["num_tasks"]
+
+    if workload == "mpi-migrate":
+        baselines = ["batch", "slurm", "granny", "granny-migrate"]
+        optimal_baseline = "batch"
+    else:
+        baselines = ["granny", "granny-batch", "granny-migrate"]
+        optimal_baseline = "granny-batch"
+
+    xs = []
+    ys = []
+    xticklabels = []
+
+    # Integral of idle CPU cores over time
+    cumsum_ys = {}
+    for baseline in baselines:
+        cumsum_ys[baseline] = {}
+
+        for n_vms in num_vms:
+            timestamps = list(results[n_vms][baseline]["ts_xvm_links"].keys())
+
+            cumsum = cum_sum(
+                timestamps,
+                [
+                    res
+                    for res in list(
+                        results[n_vms][baseline]["ts_xvm_links"].values()
+                    )
+                ],
+            )
+
+            cumsum_ys[baseline][n_vms] = cumsum
+
+    # WARNING: this plot reads num_vms as an array
+    xs = [ind for ind in range(len(num_vms))]
+    xticklabels = []
+    for (n_vms, n_tasks) in zip(num_vms, num_tasks):
+        xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks))
+    for baseline in baselines:
+        ys = [
+            cumsum_ys[baseline][n_vms] / cumsum_ys[optimal_baseline][n_vms]
+            for n_vms in num_vms
+        ]
+        ax.plot(
+            xs,
+            ys,
+            label=get_label_for_baseline(workload, baseline),
+            color=get_color_for_baseline(workload, baseline),
+            linestyle="-",
+            marker=".",
+        )
+
+    ax.set_ylim(bottom=0)
+    ax.set_xlim(left=-0.25)
+    ax.set_ylabel("Total cross-VM / Optimal cross-VM links", fontsize=8)
+    ax.set_xticks(xs, labels=xticklabels)
+
+
+def _do_plot_cdf_jct(results, ax, **kwargs):
+    assert "cdf_num_vms" in kwargs, "cdf_num_vms not in kwargs!"
+    assert "cdf_num_tasks" in kwargs, "cdf_num_tasks not in kwargs!"
+    cdf_num_vms = kwargs["cdf_num_vms"]
+    cdf_num_tasks = kwargs["cdf_num_tasks"]
+
+    baselines = ["granny-batch", "granny", "granny-migrate"]
+
+    xs = list(range(cdf_num_tasks))
+    for baseline in baselines:
+        ys = []
+
+        ys, xs, patches = ax.hist(
+            results[cdf_num_vms][baseline]["jct"],
+            100,
+            color=get_color_for_baseline("mpi-locality", baseline),
+            label=get_label_for_baseline("mpi-locality", baseline),
+            histtype="step",
+            density=True,
+            cumulative=True,
+        )
+        fix_hist_step_vertical_line_at_end(ax)
+
+        ax.set_xlabel("Job Completion Time [s]")
+        ax.set_ylabel("CDF")
+        ax.set_ylim(bottom=0, top=1)
+
+
+def plot_locality_results(plot_name, results, ax, **kwargs):
+    """
+    This method keeps track of all the different alternative plots we have
+    explored for the motivation figure.
+    """
+    if plot_name == "exec_vs_tiq":
+        _do_plot_exec_vs_tiq(results, ax, **kwargs)
+    elif plot_name == "exec_cdf":
+        _do_plot_exec_cdf(results, ax, **kwargs)
+    elif plot_name == "makespan":
+        _do_plot_makespan(results, ax, **kwargs)
+    elif plot_name == "ts_vcpus":
+        _do_plot_ts_vcpus(results, ax, **kwargs)
+    elif plot_name == "ts_xvm_links":
+        _do_plot_ts_xvm_links(results, ax, **kwargs)
+    elif plot_name == "percentage_vcpus":
+        _do_plot_percentage_vcpus(results, ax, **kwargs)
+    elif plot_name == "percentage_xvm":
+        _do_plot_percentage_xvm(results, ax, **kwargs)
+    elif plot_name == "cdf_jct":
+        _do_plot_cdf_jct(results, ax, **kwargs)
diff --git a/tasks/util/makespan.py b/tasks/util/makespan.py
index 59199a2..2814952 100644
--- a/tasks/util/makespan.py
+++ b/tasks/util/makespan.py
@@ -1,24 +1,11 @@
-from glob import glob
 from math import ceil, floor
-from matplotlib.patches import Patch
-from numpy import linspace
 from os import makedirs
 from os.path import join
-from pandas import read_csv
-from scipy.interpolate import CubicSpline
-from tasks.util.trace import load_task_trace_from_file
 from tasks.util.env import (
     PLOTS_ROOT,
     RESULTS_DIR,
 )
-from tasks.util.math import cum_sum
-from tasks.util.planner import get_xvm_links_from_part
 from tasks.util.openmpi import get_native_mpi_pods_ip_to_vm
-from tasks.util.plot import (
-    fix_hist_step_vertical_line_at_end,
-    get_color_for_baseline,
-    get_label_for_baseline,
-)
 
 # Directories
 MAKESPAN_RESULTS_DIR = join(RESULTS_DIR, "makespan")
@@ -36,11 +23,13 @@
 # - Slurm: native OpenMPI where we schedule jobs at CPU core granularity
 NATIVE_FT_BASELINES = ["batch-ft", "slurm-ft"]
 NATIVE_BASELINES = ["batch", "slurm"] + NATIVE_FT_BASELINES
+GRANNY_BATCH_BASELINES = ["granny-batch"]
 GRANNY_ELASTIC_BASELINES = ["granny-elastic"]
 GRANNY_FT_BASELINES = ["granny-ft"]
 GRANNY_MIGRATE_BASELINES = ["granny-migrate"]
 GRANNY_BASELINES = (
     ["granny"]
+    + GRANNY_BATCH_BASELINES
     + GRANNY_MIGRATE_BASELINES
     + GRANNY_FT_BASELINES
     + GRANNY_ELASTIC_BASELINES
@@ -48,7 +37,7 @@
 ALLOWED_BASELINES = NATIVE_BASELINES + GRANNY_BASELINES
 
 # Workload/Migration related constants
-MPI_MIGRATE_WORKLOADS = ["mpi-migrate", "mpi-evict", "mpi-spot"]
+MPI_MIGRATE_WORKLOADS = ["mpi-locality", "mpi-evict", "mpi-spot"]
 MPI_WORKLOADS = ["mpi"] + MPI_MIGRATE_WORKLOADS
 OPENMP_WORKLOADS = ["omp", "omp-elastic"]
 
@@ -306,953 +295,3 @@ def get_idle_core_count_from_task_info(
             start_t += 1
 
     return num_idle_cores_per_time_step
-
-
-# ----------------------------
-# Plotting utilities
-# ----------------------------
-
-
-def read_makespan_results(num_vms, num_tasks, num_cpus_per_vm):
-    workload = "mpi-migrate"
-
-    # Load results
-    result_dict = {}
-    glob_str = "makespan_exec-task-info_*_{}_{}_{}_{}.csv".format(
-        num_vms, workload, num_tasks, num_cpus_per_vm
-    )
-    for csv in glob(join(MAKESPAN_RESULTS_DIR, glob_str)):
-        baseline = csv.split("_")[2]
-        workload = csv.split("_")[4]
-
-        # -----
-        # Results to visualise differences between execution time and time
-        # in queue
-        # -----
-
-        # Results for per-job exec time and time-in-queue
-        result_dict[baseline] = {}
-        results = read_csv(csv)
-        task_ids = results["TaskId"].to_list()
-        times_exec = results["TimeExecuting"].to_list()
-        times_queue = results["TimeInQueue"].to_list()
-        start_ts = results["StartTimeStamp"].to_list()
-        genesis_ts = min(start_ts)
-        end_ts = results["EndTimeStamp"].to_list()
-        result_dict[baseline]["exec-time"] = [-1 for _ in task_ids]
-        result_dict[baseline]["queue-time"] = [-1 for _ in task_ids]
-        result_dict[baseline]["jct"] = [-1 for _ in task_ids]
-
-        for tid, texec, tqueue, e_ts in zip(
-            task_ids, times_exec, times_queue, end_ts
-        ):
-            result_dict[baseline]["exec-time"][tid] = texec
-            result_dict[baseline]["queue-time"][tid] = tqueue
-            result_dict[baseline]["jct"][tid] = e_ts - genesis_ts
-
-        # -----
-        # Results to visualise job churn
-        # -----
-
-        start_ts = results.min()["StartTimeStamp"]
-        end_ts = results.max()["EndTimeStamp"]
-        time_elapsed_secs = int(end_ts - start_ts)
-        result_dict[baseline]["makespan"] = time_elapsed_secs
-        print(
-            "Num VMs: {} - Num Tasks: {} - Baseline: {} - Makespan: {}s".format(
-                num_vms, num_tasks, baseline, time_elapsed_secs
-            )
-        )
-        if time_elapsed_secs > 1e5:
-            raise RuntimeError(
-                "Measured total time elapsed is too long: {}".format(
-                    time_elapsed_secs
-                )
-            )
-
-        # Dump all data
-        tasks_per_ts = [[] for i in range(time_elapsed_secs)]
-        for index, row in results.iterrows():
-            task_id = row["TaskId"]
-            start_slot = int(row["StartTimeStamp"] - start_ts)
-            end_slot = int(row["EndTimeStamp"] - start_ts)
-            for ind in range(start_slot, end_slot):
-                tasks_per_ts[ind].append(task_id)
-        for tasks in tasks_per_ts:
-            tasks.sort()
-
-        # Prune the timeseries
-        pruned_tasks_per_ts = {}
-        # prev_tasks = []
-        for ts, tasks in enumerate(tasks_per_ts):
-            # NOTE: we are not pruning at the moment
-            pruned_tasks_per_ts[ts] = tasks
-            # if tasks != prev_tasks:
-            # pruned_tasks_per_ts[ts] = tasks
-            # prev_tasks = tasks
-
-        result_dict[baseline]["tasks_per_ts"] = pruned_tasks_per_ts
-
-        # -----
-        # Results to visualise scheduling info per task
-        # -----
-
-        sched_info_csv = "makespan_sched-info_{}_{}_{}_{}_{}.csv".format(
-            baseline, num_vms, workload, num_tasks, num_cpus_per_vm
-        )
-        if baseline not in GRANNY_BASELINES:
-            result_dict[baseline]["task_scheduling"] = {}
-
-            # We identify VMs by numbers, not IPs
-            ip_to_vm = {}
-            vm_to_id = {}
-            with open(
-                join(MAKESPAN_RESULTS_DIR, sched_info_csv), "r"
-            ) as sched_fd:
-                # Process the file line by line, as each line will be different in
-                # length
-                for num, line in enumerate(sched_fd):
-                    # Skip the header
-                    if num == 0:
-                        continue
-
-                    line = line.strip()
-
-                    # In line 1 we include the IP to node conversion as one
-                    # comma-separated line, so we parse it here
-                    if num == 1:
-                        ip_to_vm_line = line.split(",")
-                        assert len(ip_to_vm_line) % 2 == 0
-
-                        i = 0
-                        while i < len(ip_to_vm_line):
-                            ip = ip_to_vm_line[i]
-                            vm = ip_to_vm_line[i + 1]
-                            ip_to_vm[ip] = vm
-                            i += 2
-
-                        continue
-
-                    # Get the task id and the scheduling decision from the line
-                    task_id = line.split(",")[0]
-                    result_dict[baseline]["task_scheduling"][task_id] = {}
-                    sched_info = line.split(",")[1:]
-                    # The scheduling decision must be even, as it contains pairs
-                    # of ip + slots
-                    assert len(sched_info) % 2 == 0
-
-                    i = 0
-                    while i < len(sched_info):
-                        vm = ip_to_vm[sched_info[i]]
-                        slots = sched_info[i + 1]
-
-                        if vm not in vm_to_id:
-                            len_map = len(vm_to_id)
-                            vm_to_id[vm] = len_map
-
-                        vm_id = vm_to_id[vm]
-                        if (
-                            vm_id
-                            not in result_dict[baseline]["task_scheduling"][
-                                task_id
-                            ]
-                        ):
-                            result_dict[baseline]["task_scheduling"][task_id][
-                                vm_id
-                            ] = 0
-
-                        result_dict[baseline]["task_scheduling"][task_id][
-                            vm_id
-                        ] += int(slots)
-                        i += 2
-
-        # -----
-        # Results to visualise the % of idle vCPUs (and VMs) over time
-        # -----
-
-        task_trace = load_task_trace_from_file(
-            workload, num_tasks, num_cpus_per_vm
-        )
-
-        result_dict[baseline]["ts_vcpus"] = {}
-        result_dict[baseline]["ts_xvm_links"] = {}
-        result_dict[baseline]["ts_idle_vms"] = {}
-        total_available_vcpus = num_vms * num_cpus_per_vm
-
-        if baseline in NATIVE_BASELINES:
-            # First, set each timestamp to the total available vCPUs, and
-            # initialise the set of idle vms
-            for ts in result_dict[baseline]["tasks_per_ts"]:
-                result_dict[baseline]["ts_vcpus"][ts] = total_available_vcpus
-                result_dict[baseline]["ts_idle_vms"][ts] = set()
-
-            # Second, for each ts subtract the size of each task in-flight
-            for ts in result_dict[baseline]["tasks_per_ts"]:
-                for t in result_dict[baseline]["tasks_per_ts"][ts]:
-                    result_dict[baseline]["ts_vcpus"][ts] -= task_trace[
-                        int(t)
-                    ].size
-
-                    # In addition, for each task in flight, add the tasks's IPs
-                    # to the host set
-                    for vm_id in result_dict[baseline]["task_scheduling"][
-                        str(int(t))
-                    ]:
-                        result_dict[baseline]["ts_idle_vms"][ts].add(vm_id)
-
-            # Third, express the results as percentages, and the number of
-            # idle VMs as a number (not as a set)
-            for ts in result_dict[baseline]["ts_vcpus"]:
-                result_dict[baseline]["ts_vcpus"][ts] = (
-                    result_dict[baseline]["ts_vcpus"][ts]
-                    / total_available_vcpus
-                ) * 100
-
-                result_dict[baseline]["ts_idle_vms"][ts] = num_vms - len(
-                    result_dict[baseline]["ts_idle_vms"][ts]
-                )
-        else:
-            # For Granny, the idle vCPUs results are directly available in
-            # the file
-            sch_info_csv = read_csv(join(MAKESPAN_RESULTS_DIR, sched_info_csv))
-            idle_cpus = (
-                sch_info_csv["NumIdleCpus"] / total_available_vcpus * 100
-            ).to_list()
-            tss = (
-                sch_info_csv["TimeStampSecs"]
-                - sch_info_csv["TimeStampSecs"][0]
-            ).to_list()
-
-            # Idle vCPUs
-            for (idle_cpu, ts) in zip(idle_cpus, tss):
-                result_dict[baseline]["ts_vcpus"][ts] = idle_cpu
-
-            # x-VM links
-            xvm_links = sch_info_csv["NumCrossVmLinks"].to_list()
-            for (ts, xvm_link) in zip(tss, xvm_links):
-                result_dict[baseline]["ts_xvm_links"][ts] = xvm_link
-
-            # Num of idle VMs
-            num_idle_vms = sch_info_csv["NumIdleVms"].to_list()
-            for (ts, n_idle_vms) in zip(tss, num_idle_vms):
-                result_dict[baseline]["ts_idle_vms"][ts] = n_idle_vms
-
-        # -----
-        # Results to visualise the # of cross-vm links
-        # -----
-
-        if baseline in NATIVE_BASELINES:
-            for ts in result_dict[baseline]["tasks_per_ts"]:
-                result_dict[baseline]["ts_xvm_links"][ts] = 0
-
-            for ts in result_dict[baseline]["tasks_per_ts"]:
-                for t in result_dict[baseline]["tasks_per_ts"][ts]:
-                    if baseline == "slurm":
-                        sched = result_dict[baseline]["task_scheduling"][
-                            str(int(t))
-                        ]
-
-                        # If only scheduled to one VM, no cross-VM links
-                        if len(sched) <= 1:
-                            continue
-
-                        # Add the accumulated to the total tally
-                        result_dict[baseline]["ts_xvm_links"][
-                            ts
-                        ] += get_xvm_links_from_part(list(sched.values()))
-                    elif baseline == "batch":
-                        # Batch baseline is optimal in terms of cross-vm links
-                        task_size = task_trace[int(t)].size
-                        if task_size > 8:
-                            num_links = 8 * (task_size - 8) / 2
-                            result_dict[baseline]["ts_xvm_links"][
-                                ts
-                            ] += num_links
-
-    return result_dict
-
-
-def do_makespan_plot(plot_name, results, ax, num_vms, num_tasks):
-    """
-    This method keeps track of all the different alternative plots we have
-    explored for the motivation figure.
-    """
-
-    if plot_name == "exec_vs_tiq":
-        """
-        This plot presents the percentiles of slowdown of execution time (and
-        time in queue too?)
-        """
-        num_jobs = len(results[num_vms[0]]["slurm"]["exec-time"])
-
-        num_slowdowns = 3
-        percentiles = [50, 75, 90, 95, 100]
-        width = float(1 / len(percentiles)) * 0.8
-        xs = []
-        ys = []
-        xticks = []
-        xlabels = []
-        colors = []
-        xs_vlines = []
-
-        for vm_ind, n_vms in enumerate(num_vms):
-
-            x_vm_offset = vm_ind * num_slowdowns
-
-            # Calculate slowdowns wrt granny w/ migration
-            slurm_slowdown = sorted(
-                [
-                    float(slurm_time / granny_time)
-                    for (slurm_time, granny_time) in zip(
-                        results[n_vms]["slurm"]["exec-time"],
-                        results[n_vms]["granny-migrate"]["exec-time"],
-                    )
-                ]
-            )
-            batch_slowdown = sorted(
-                [
-                    float(batch_time / granny_time)
-                    for (batch_time, granny_time) in zip(
-                        results[n_vms]["batch"]["exec-time"],
-                        results[n_vms]["granny-migrate"]["exec-time"],
-                    )
-                ]
-            )
-            granny_nomig_slowdown = sorted(
-                [
-                    float(granny_nomig_time / granny_time)
-                    for (granny_nomig_time, granny_time) in zip(
-                        results[n_vms]["granny"]["exec-time"],
-                        results[n_vms]["granny-migrate"]["exec-time"],
-                    )
-                ]
-            )
-
-            for ind, (bline, slowdown) in enumerate(
-                [
-                    ("slurm", slurm_slowdown),
-                    ("batch", batch_slowdown),
-                    ("granny-no-migration", granny_nomig_slowdown),
-                ]
-            ):
-                x_bline_offset = ind
-
-                for subind, percentile in enumerate(percentiles):
-                    x = (
-                        x_vm_offset
-                        + x_bline_offset
-                        - width * int(len(percentiles) / 2)
-                        + width * subind
-                        + width * 0.5 * (len(percentiles) % 2 == 0)
-                    )
-                    xs.append(x)
-                    if percentile == 100:
-                        ys.append(slowdown[-1])
-                    else:
-                        index = int(percentile / 100 * num_jobs)
-                        ys.append(slowdown[index])
-                    colors.append(get_color_for_baseline("mpi-migrate", bline))
-
-            # Add a vertical line at the end of each VM block
-            xs_vlines.append(
-                x_vm_offset
-                + (num_slowdowns * len(percentiles) + 1 - 0.25) * width
-            )
-
-            # Add a label once per VM block
-            x_label = x_vm_offset + (
-                (num_slowdowns * len(percentiles) + num_slowdowns - 2)
-                / 2
-                * width
-            )
-            xticks.append(x_label)
-            xlabels.append(
-                "{} VMs\n({} Jobs)".format(n_vms, num_tasks[vm_ind])
-            )
-
-        xmin = -0.5
-        xmax = len(num_vms) * num_slowdowns - 0.5
-        ymin = 0.75
-        ymax = 2
-
-        ax.bar(xs, ys, width=width, color=colors, edgecolor="black")
-        ax.hlines(y=1, color="red", xmin=xmin, xmax=xmax)
-        ax.vlines(
-            xs_vlines,
-            ymin=ymin,
-            ymax=ymax,
-            color="gray",
-            linestyles="dashed",
-            linewidth=0.5,
-        )
-        ax.set_xticks(xticks, labels=xlabels, fontsize=6)
-        ax.set_ylabel("Slowdown [Baseline/Granny]")
-        ax.set_xlabel(
-            "Job percentile [{}]".format(
-                ",".join([str(p) + "th" for p in percentiles])
-            ),
-            fontsize=8,
-        )
-        ax.set_xlim(left=xmin, right=xmax)
-        ax.set_ylim(bottom=ymin, top=ymax)
-
-    elif plot_name == "exec_abs":
-        """
-        TEMP
-        """
-        num_vms = 32
-        num_jobs = len(results[num_vms]["slurm"]["exec-time"])
-        labels = ["slurm", "batch", "granny", "granny-migrate"]
-        percentiles = [50, 75, 90, 95, 100]
-
-        xs = list(range(num_jobs))
-        for label in labels:
-            ys = []
-            for percentile in percentiles:
-                if percentile == 100:
-                    index = -1
-                else:
-                    index = int(percentile / 100 * num_jobs)
-
-                ys.append(results[num_vms][label]["exec-time"][index])
-
-            ax.plot(
-                percentiles,
-                ys,
-                label=get_label_for_baseline("mpi-migrate", label),
-                color=get_color_for_baseline("mpi-migrate", label),
-            )
-            ax.set_xlabel("Job percentile [th]")
-            ax.set_ylabel("Job completion time [s]")
-            ax.set_ylim(bottom=0)
-            ax.legend()
-
-    elif plot_name == "exec_cdf":
-        """
-        CDF of the absolute job completion time
-        (from the beginning of time, until the job has finished)
-        """
-        num_jobs = len(results[num_vms]["slurm"]["exec-time"])
-        labels = ["slurm", "batch", "granny", "granny-migrate"]
-        xs = list(range(num_jobs))
-
-        for label in labels:
-            ys = []
-
-            this_label = label
-            if label == "granny-migrate":
-                this_label = "granny"
-            elif label == "granny":
-                this_label = "granny-nomig"
-
-            # Calculate the histogram using the histogram function, get the
-            # results from the return value, but make the plot transparent.
-            # TODO: maybe just calculate the CDF analitically?
-            ys, xs, patches = ax.hist(
-                results[num_vms][label]["jct"],
-                100,
-                color=get_color_for_baseline("mpi-migrate", label),
-                histtype="step",
-                density=True,
-                cumulative=True,
-                label=this_label,
-                # alpha=0,
-            )
-            fix_hist_step_vertical_line_at_end(ax)
-
-            # Interpolate more points
-            # spl = splrep(xs[:-1], ys, s=0.01, per=False)
-            # x2 = linspace(xs[0], xs[-1], 400)
-            # y2 = splev(x2, spl)
-            # ax.plot(x2, y2, color=PLOT_COLORS[label], label=this_label, linewidth=0.5)
-
-            ax.set_xlabel("Job Completion Time [s]")
-            ax.set_ylabel("CDF")
-            ax.set_ylim(bottom=0, top=1)
-            ax.legend()
-
-    elif plot_name == "makespan":
-        labels = ["slurm", "batch", "granny", "granny-migrate"]
-
-        xs = []
-        ys = []
-        colors = []
-        xticks = []
-        xticklabels = []
-
-        # WARNING: this plot reads num_vms as an array
-        for ind, n_vms in enumerate(num_vms):
-            x_offset = ind * len(labels) + (ind + 1)
-            xs += [x + x_offset for x in range(len(labels))]
-            ys += [results[n_vms][la]["makespan"] for la in labels]
-            colors += [
-                get_color_for_baseline("mpi-migrate", la) for la in labels
-            ]
-
-            # Add one tick and xlabel per VM size
-            xticks.append(x_offset + len(labels) / 2)
-            xticklabels.append(
-                "{} VMs\n({} Jobs)".format(n_vms, num_tasks[ind])
-            )
-
-            # Add spacing between vms
-            xs.append(x_offset + len(labels))
-            ys.append(0)
-            colors.append("white")
-
-        ax.bar(xs, ys, color=colors, edgecolor="black", width=1)
-        ax.set_ylim(bottom=0)
-        ax.set_ylabel("Makespan [s]")
-        ax.set_xticks(xticks, labels=xticklabels, fontsize=6)
-
-        # Manually craft legend
-        legend_entries = [
-            Patch(
-                color=get_color_for_baseline("mpi-migrate", label),
-                label=get_label_for_baseline("mpi-migrate", label),
-            )
-            for label in labels
-        ]
-        ax.legend(handles=legend_entries, ncols=2, fontsize=8)
-
-    elif plot_name == "job_churn":
-        """
-        This plot presents a heatmap of the job churn for one execution of the
-        trace. On the X axis we have time in seconds, and on the Y axis we have
-        all vCPUs in the cluster (32 * 8). There are 100 different colors in
-        the plot, one for each job. Coordinate [x, y] is of color C_j if Job
-        `j` is using vCPU `y` at time `x`
-        """
-        # Fix one baseline (should we?)
-        baseline = "slurm"
-        # On the X axis, we have each job as a bar
-        num_ts = len(results[baseline]["tasks_per_ts"])
-        ncols = num_ts
-        num_vms = 32
-        num_cpus_per_vm = 8
-        nrows = num_vms * num_cpus_per_vm
-
-        # Data shape is (nrows, ncols). We have as many columns as tasks, and
-        # as many rows as the total number of CPUs.
-        # data[m, n] = task_id if cpu m is being used by task_id at timestamp n
-        # (where  m is the row and n the column)
-        data = [[-1 for _ in range(ncols)] for _ in range(nrows)]
-
-        for ts in results[baseline]["tasks_per_ts"]:
-            # This dictionary contains the in-flight tasks per timestamp (where
-            # the timestamp has already been de-duplicated)
-            tasks_in_flight = results[baseline]["tasks_per_ts"][ts]
-            vm_cpu_offset = {}
-            for i in range(num_vms):
-                vm_cpu_offset[i] = 0
-            for t in tasks_in_flight:
-                t_id = int(t)
-                sched_decision = results[baseline]["task_scheduling"][
-                    str(t_id)
-                ]
-                # Work out which rows (i.e. CPU cores) to paint
-                for vm in sched_decision:
-                    cpus_in_vm = sched_decision[vm]
-                    cpu_offset = vm_cpu_offset[vm]
-                    vm_offset = vm * num_cpus_per_vm
-                    this_offset = vm_offset + cpu_offset
-                    for j in range(this_offset, this_offset + cpus_in_vm):
-                        data[j][ts] = t_id
-                    vm_cpu_offset[vm] += cpus_in_vm
-
-        ax.imshow(data, origin="lower")
-
-    elif plot_name == "ts_vcpus":
-        """
-        This plot presents a timeseries of the % of idle vCPUs over time
-        """
-        baselines = ["batch", "slurm", "granny-migrate"]
-        xlim = 0
-        for baseline in baselines:
-            if baseline in NATIVE_BASELINES:
-                xs = range(len(results[num_vms][baseline]["ts_vcpus"]))
-            else:
-                xs = list(results[num_vms][baseline]["ts_vcpus"].keys())
-            xlim = max(xlim, max(xs))
-
-            ax.plot(
-                xs,
-                [results[num_vms][baseline]["ts_vcpus"][x] for x in xs],
-                label=get_label_for_baseline("mpi-migrate", baseline),
-                color=get_color_for_baseline("mpi-migrate", baseline),
-            )
-
-        ax.set_xlim(left=0, right=xlim)
-        ax.set_ylim(bottom=0, top=100)
-        ax.set_ylabel("% idle vCPUs")
-        ax.set_xlabel("Time [s]")
-
-    elif plot_name == "ts_xvm_links":
-        """
-        This plot presents a timeseries of the # of cross-VM links over time
-        """
-        num_points = 500
-
-        xs_slurm = range(len(results[num_vms]["slurm"]["ts_xvm_links"]))
-        xs_batch = range(len(results[num_vms]["batch"]["ts_xvm_links"]))
-
-        # xs_granny = list(results[num_vms]["granny"]["ts_xvm_links"].keys())
-        # ys_granny = [results[num_vms]["granny"]["ts_xvm_links"][x] for x in xs_granny]
-        # spl_granny = CubicSpline(xs_granny, ys_granny)
-        # new_xs_granny = linspace(0, max(xs_granny), num=num_points)
-
-        xs_granny_migrate = list(
-            results[num_vms]["granny-migrate"]["ts_xvm_links"].keys()
-        )
-        ys_granny_migrate = [
-            results[num_vms]["granny-migrate"]["ts_xvm_links"][x]
-            for x in xs_granny_migrate
-        ]
-        spl_granny_migrate = CubicSpline(xs_granny_migrate, ys_granny_migrate)
-        new_xs_granny_migrate = linspace(
-            0, max(xs_granny_migrate), num=num_points
-        )
-
-        ax.plot(
-            xs_slurm,
-            [results[num_vms]["slurm"]["ts_xvm_links"][x] for x in xs_slurm],
-            label="slurm",
-            color=get_color_for_baseline("mpi-migrate", "slurm"),
-        )
-        ax.plot(
-            xs_batch,
-            [results[num_vms]["batch"]["ts_xvm_links"][x] for x in xs_batch],
-            label="batch",
-            color=get_color_for_baseline("mpi-migrate", "batch"),
-        )
-        """
-        ax.plot(
-            new_xs_granny,
-            spl_granny(new_xs_granny),
-            label="granny",
-            color=PLOT_COLORS["granny"],
-        )
-        """
-        ax.plot(
-            new_xs_granny_migrate,
-            spl_granny_migrate(new_xs_granny_migrate),
-            label="granny-migrate",
-            color=get_color_for_baseline("mpi-migrate", "granny-migrate"),
-        )
-
-        xlim = max(xs_batch[-1], xs_slurm[-1], new_xs_granny_migrate[-1])
-        ax.set_xlim(left=0, right=xlim)
-        ax.set_ylim(bottom=0)
-        ax.set_xlabel("Time [s]")
-        ax.set_ylabel("# cross-VM links")
-    elif plot_name == "ts_idle_vms":
-        """
-        This plot presents a timeseries of the # of idle VMs over time
-        """
-        num_points = 500
-
-        xs_slurm = range(len(results[num_vms]["slurm"]["ts_xvm_links"]))
-        xs_batch = range(len(results[num_vms]["batch"]["ts_xvm_links"]))
-
-        # xs_granny = list(results[num_vms]["granny"]["ts_xvm_links"].keys())
-        # ys_granny = [results[num_vms]["granny"]["ts_xvm_links"][x] for x in xs_granny]
-        # spl_granny = CubicSpline(xs_granny, ys_granny)
-        # new_xs_granny = linspace(0, max(xs_granny), num=num_points)
-
-        # TODO: FIXME: move from granny-migrate to granny-evict!
-        xs_granny_migrate = list(
-            results[num_vms]["granny-migrate"]["ts_idle_vms"].keys()
-        )
-        ys_granny_migrate = [
-            (results[num_vms]["granny-migrate"]["ts_idle_vms"][x] / num_vms)
-            * 100
-            for x in xs_granny_migrate
-        ]
-        spl_granny_migrate = CubicSpline(xs_granny_migrate, ys_granny_migrate)
-        new_xs_granny_migrate = linspace(
-            0, max(xs_granny_migrate), num=num_points
-        )
-
-        ax.plot(
-            xs_slurm,
-            [
-                (results[num_vms]["slurm"]["ts_idle_vms"][x] / num_vms) * 100
-                for x in xs_slurm
-            ],
-            label=get_label_for_baseline("mpi-migrate", "slurm"),
-            color=get_color_for_baseline("mpi-migrate", "slurm"),
-        )
-        ax.plot(
-            xs_batch,
-            [
-                (results[num_vms]["batch"]["ts_idle_vms"][x] / num_vms) * 100
-                for x in xs_batch
-            ],
-            label=get_label_for_baseline("mpi-migrate", "batch"),
-            color=get_color_for_baseline("mpi-migrate", "batch"),
-        )
-        """
-        ax.plot(
-            new_xs_granny,
-            spl_granny(new_xs_granny),
-            label="granny",
-            color=PLOT_COLORS["granny"],
-        )
-        """
-        # TODO: FIXME move to granny-evict
-        ax.plot(
-            new_xs_granny_migrate,
-            spl_granny_migrate(new_xs_granny_migrate),
-            label=get_label_for_baseline("mpi-migrate", "granny-migrate"),
-            color=get_color_for_baseline("mpi-migrate", "granny-migrate"),
-        )
-
-        xlim = max(xs_batch[-1], xs_slurm[-1], new_xs_granny_migrate[-1])
-        ax.set_xlim(left=0, right=xlim)
-        ax.set_ylim(bottom=0, top=100)
-        ax.set_xlabel("Time [s]")
-        ax.set_ylabel("Idle VMs [%]")
-    elif plot_name == "percentage_vcpus":
-        labels = ["slurm", "batch", "granny", "granny-migrate"]
-
-        xs = []
-        ys = []
-        colors = []
-        xticks = []
-        xticklabels = []
-
-        num_cpus_per_vm = 8
-
-        # Integral of idle CPU cores over time
-        # WARNING: this plot reads num_vms as an array
-        cumsum_ys = {}
-        for la in labels:
-            cumsum_ys[la] = {}
-
-            for n_vms in num_vms:
-                timestamps = list(results[n_vms][la]["ts_vcpus"].keys())
-                total_cpusecs = (
-                    (timestamps[-1] - timestamps[0])
-                    * num_cpus_per_vm
-                    * int(n_vms)
-                )
-
-                cumsum = cum_sum(
-                    timestamps,
-                    [
-                        res * num_cpus_per_vm * int(n_vms) / 100
-                        for res in list(
-                            results[n_vms][la]["ts_vcpus"].values()
-                        )
-                    ],
-                )
-
-                # Record both the total idle CPUsecs and the percentage
-                cumsum_ys[la][n_vms] = (cumsum, (cumsum / total_cpusecs) * 100)
-
-        xs = [ind for ind in range(len(num_vms))]
-        xticklabels = []
-
-        for (n_vms, n_tasks) in zip(num_vms, num_tasks):
-            xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks))
-        for la in labels:
-            ys = [cumsum_ys[la][n_vms][1] for n_vms in num_vms]
-            ax.plot(
-                xs,
-                ys,
-                color=get_color_for_baseline("mpi-migrate", la),
-                linestyle="-",
-                marker=".",
-                label=get_label_for_baseline("mpi-migrate", la),
-            )
-
-        ax.set_ylim(bottom=0)
-        ax.set_xlim(left=-0.25)
-        ax.set_ylabel("Idle CPU-seconds /\n Total CPU-seconds [%]", fontsize=8)
-        ax.set_xticks(xs, labels=xticklabels, fontsize=6)
-
-    elif plot_name == "percentage_xvm":
-        labels = ["slurm", "batch", "granny", "granny-migrate"]
-
-        xs = []
-        ys = []
-        colors = []
-        xticks = []
-        xticklabels = []
-
-        num_cpus_per_vm = 8
-
-        # Integral of idle CPU cores over time
-        cumsum_ys = {}
-        for la in labels:
-            cumsum_ys[la] = {}
-
-            for n_vms in num_vms:
-                timestamps = list(results[n_vms][la]["ts_xvm_links"].keys())
-                total_cpusecs = (
-                    (timestamps[-1] - timestamps[0])
-                    * num_cpus_per_vm
-                    * int(n_vms)
-                )
-
-                cumsum = cum_sum(
-                    timestamps,
-                    [
-                        res
-                        for res in list(
-                            results[n_vms][la]["ts_xvm_links"].values()
-                        )
-                    ],
-                )
-
-                cumsum_ys[la][n_vms] = cumsum
-
-        # WARNING: this plot reads num_vms as an array
-        xs = [ind for ind in range(len(num_vms))]
-        xticklabels = []
-        for (n_vms, n_tasks) in zip(num_vms, num_tasks):
-            xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks))
-        for la in labels:
-            ys = [
-                cumsum_ys[la][n_vms] / cumsum_ys["batch"][n_vms]
-                for n_vms in num_vms
-            ]
-            ax.plot(
-                xs,
-                ys,
-                label=get_label_for_baseline("mpi-migrate", la),
-                color=get_color_for_baseline("mpi-migrate", la),
-                linestyle="-",
-                marker=".",
-            )
-
-        ax.set_ylim(bottom=0)
-        ax.set_xlim(left=-0.25)
-        ax.set_ylabel("Total cross-VM / Optimal cross-VM links", fontsize=8)
-        ax.set_xticks(xs, labels=xticklabels, fontsize=6)
-
-    # TODO: delete me
-    elif plot_name == "boxplot_xvm":
-        labels = ["slurm", "batch", "granny", "granny-migrate"]
-
-        xs = []
-        ys = []
-        colors = []
-        xticks = []
-        xticklabels = []
-
-        # WARNING: this plot reads num_vms as an array
-        for ind, n_vms in enumerate(num_vms):
-            x_offset = ind * len(labels) + (ind + 1)
-
-            # For each cluster size, and for each label, we add two boxplots
-
-            # Number of cross-VM links
-            xs += [x + x_offset for x in range(len(labels))]
-            ys += [
-                list(results[n_vms][la]["ts_xvm_links"].values())
-                for la in labels
-            ]
-
-            # Color and alpha for each box
-            colors += [
-                get_color_for_baseline("mpi-migrate", la) for la in labels
-            ]
-
-            # Add one tick and xlabel per VM size
-            xticks.append(x_offset + len(labels) / 2)
-            xticklabels.append(
-                "{} VMs\n({} Jobs)".format(n_vms, num_tasks[ind])
-            )
-
-        bplot = ax.boxplot(
-            ys,
-            sym="",
-            vert=True,
-            positions=xs,
-            patch_artist=True,
-            widths=0.5,
-        )
-
-        for (box, color) in zip(bplot["boxes"], colors):
-            box.set_facecolor(color)
-            box.set_edgecolor("black")
-
-        ax.set_ylim(bottom=0)
-        ax.set_ylabel("# cross-VM links")
-        ax.set_xticks(xticks, labels=xticklabels, fontsize=6)
-
-        # Manually craft legend
-        legend_entries = [
-            Patch(
-                color=get_color_for_baseline("mpi-migrate", label),
-                label=get_label_for_baseline("mpi-migrate", label),
-            )
-            for label in labels
-        ]
-        ax.legend(handles=legend_entries, ncols=2, fontsize=8)
-    elif plot_name == "used_vmsecs":
-        # TODO: FIXME: move to granny-evict
-        labels = ["slurm", "batch", "granny", "granny-migrate"]
-
-        xs = []
-        ys = []
-        colors = []
-        xticks = []
-        xticklabels = []
-
-        # Integral of idle CPU cores over time
-        cumsum_ys = {}
-        for la in labels:
-            cumsum_ys[la] = {}
-
-            for n_vms in num_vms:
-                timestamps = list(results[n_vms][la]["ts_idle_vms"].keys())
-
-                cumsum = cum_sum(
-                    timestamps,
-                    [
-                        (n_vms - res)
-                        for res in list(
-                            results[n_vms][la]["ts_idle_vms"].values()
-                        )
-                    ],
-                )
-                # TODO: delete me
-                if n_vms == 16:
-                    print(
-                        n_vms,
-                        la,
-                        cumsum,
-                        [
-                            res
-                            for res in list(
-                                results[n_vms][la]["ts_idle_vms"].values()
-                            )
-                        ],
-                    )
-
-                cumsum_ys[la][n_vms] = cumsum
-
-        # WARNING: this plot reads num_vms as an array
-        xs = [ind for ind in range(len(num_vms))]
-        xticklabels = []
-        for (n_vms, n_tasks) in zip(num_vms, num_tasks):
-            xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks))
-        for la in labels:
-            ys = [cumsum_ys[la][n_vms] for n_vms in num_vms]
-            ax.plot(
-                xs,
-                ys,
-                color=get_color_for_baseline("mpi-migrate", la),
-                label=get_label_for_baseline("mpi-migrate", la),
-                linestyle="-",
-                marker=".",
-            )
-
-        ax.set_ylim(bottom=0)
-        ax.set_xlim(left=-0.25)
-        ax.set_ylabel("Used VM Seconds", fontsize=8)
-        ax.set_xticks(xs, labels=xticklabels, fontsize=6)
-        ax.ticklabel_format(axis="y", style="sci", scilimits=(3, 3))
-        ax.legend(fontsize=6, ncols=2)
diff --git a/tasks/util/planner.py b/tasks/util/planner.py
index fa8e766..d5818d0 100644
--- a/tasks/util/planner.py
+++ b/tasks/util/planner.py
@@ -2,6 +2,7 @@
     get_available_hosts as planner_get_available_hosts,
     get_in_fligh_apps as planner_get_in_fligh_apps,
 )
+from math import ceil
 from time import sleep
 
 
@@ -33,6 +34,10 @@ def get_num_available_slots_from_in_flight_apps(
     user_id=None,
     num_evicted_vms=None,
     openmp=False,
+    # Used to leave some slack CPUs to help de-fragment (for `mpi-locality`)
+    next_task_size=None,
+    # Used to make Granny behave like batch (for `mpi-locality`)
+    batch=False,
 ):
     """
     For Granny baselines, we cannot use static knowledge of the
@@ -62,6 +67,10 @@ def get_num_available_slots_from_in_flight_apps(
             ]
         )
 
+        used_slots_map = {
+            host.ip: host.usedSlots for host in available_hosts.hosts
+        }
+
         next_evicted_vm_ips = []
         try:
             next_evicted_vm_ips = in_flight_apps.nextEvictedVmIps
@@ -87,7 +96,6 @@ def get_num_available_slots_from_in_flight_apps(
         # sleep for a bit and ask again (we allow the size to go over the
         # specified size in case of an elsatic scale-up)
         if any([len(app.hostIps) < app.size for app in in_flight_apps.apps]):
-            print("App not fully in-flight. We wait...")
             sleep(short_sleep_secs)
             continue
 
@@ -146,14 +154,36 @@ def get_num_available_slots_from_in_flight_apps(
                 ]
             )
 
+        # In a batch setting, we allocate resources to jobs at VM granularity
+        # The planner will by default do so, if enough free VMs are available
+        if batch and next_task_size is not None:
+            num_needed_vms = ceil(next_task_size / num_cpus_per_vm)
+            if (
+                num_vms - len(list(worker_occupation.keys()))
+            ) < num_needed_vms:
+                sleep(5 * long_sleep_secs)
+                continue
+
         num_available_slots = (
             num_vms - len(list(worker_occupation.keys()))
         ) * num_cpus_per_vm
         for ip in worker_occupation:
+            if worker_occupation[ip] != used_slots_map[ip]:
+                print(
+                    "Inconsistent worker used slots map for ip: {}".format(ip)
+                )
+                must_hold_back = True
+                break
+
             num_available_slots += num_cpus_per_vm - worker_occupation[ip]
 
+        if must_hold_back:
+            sleep(long_sleep_secs)
+            continue
+
         # Double-check the number of available slots with our other source of truth
-        if user_id is not None and num_available_slots != available_slots:
+        # are consistent with the in-flight apps?
+        if num_available_slots != available_slots:
             print(
                 "WARNING: inconsistency in the number of available slots"
                 " (in flight: {} - registered: {})".format(
@@ -163,6 +193,18 @@ def get_num_available_slots_from_in_flight_apps(
             sleep(short_sleep_secs)
             continue
 
+        # TODO: decide on the percentage, 10% or 5% ?
+        # with 10% we almost always have perfect locality
+        pctg = 0.05
+        if (
+            next_task_size is not None
+            and not batch
+            and (num_available_slots - next_task_size)
+            < int(num_vms * num_cpus_per_vm * pctg)
+        ):
+            sleep(long_sleep_secs)
+            continue
+
         # If we have made it this far, we are done
         break
 
diff --git a/tasks/util/plot.py b/tasks/util/plot.py
index 3d3e3a4..e718002 100644
--- a/tasks/util/plot.py
+++ b/tasks/util/plot.py
@@ -24,6 +24,9 @@
 
 PLOT_PATTERNS = ["//", "\\\\", "||", "-", "*-", "o-"]
 SINGLE_COL_FIGSIZE = (6, 3)
+SINGLE_COL_FIGSIZE_HALF = (3, 3)
+DOUBLE_COL_FIGSIZE_HALF = SINGLE_COL_FIGSIZE
+DOUBLE_COL_FIGSIZE_THIRD = (4, 4)
 
 
 def fix_hist_step_vertical_line_at_end(ax):
@@ -74,7 +77,7 @@ def _do_get_for_baseline(workload, baseline, color=False, label=False):
                 return this_label
             if color:
                 return _PLOT_COLORS[this_label]
-        if baseline == "batch" or baseline == "slurm":
+        if baseline in ["slurm", "batch"]:
             this_label = baseline
             if label:
                 return this_label
@@ -87,6 +90,32 @@ def _do_get_for_baseline(workload, baseline, color=False, label=False):
             )
         )
 
+    if workload == "mpi-locality":
+        if baseline == "granny":
+            this_label = "slurm"
+            if label:
+                return this_label
+            if color:
+                return _PLOT_COLORS[this_label]
+        if baseline == "granny-migrate":
+            this_label = "granny"
+            if label:
+                return this_label
+            if color:
+                return _PLOT_COLORS[this_label]
+        if baseline == "granny-batch":
+            this_label = "batch"
+            if label:
+                return this_label
+            if color:
+                return _PLOT_COLORS[this_label]
+
+        raise RuntimeError(
+            "Unrecognised baseline ({}) for workload: {}".format(
+                baseline, workload
+            )
+        )
+
     if workload == "mpi-spot":
         if baseline == "granny":
             this_label = "granny"
diff --git a/tasks/util/spot.py b/tasks/util/spot.py
index 7700867..ea85f28 100644
--- a/tasks/util/spot.py
+++ b/tasks/util/spot.py
@@ -108,7 +108,7 @@ def _do_plot_makespan(results, ax, **kwargs):
     if tight:
         ax.set_xticks([])
     else:
-        ax.set_xticks(xticks, labels=xticklabels, fontsize=6)
+        ax.set_xticks(xticks, labels=xticklabels)
 
 
 def _do_plot_cost(results, ax, **kwargs):
@@ -203,11 +203,9 @@ def _do_plot_cost(results, ax, **kwargs):
             )
         else:
             ax.text(
-                xs[-1] + 0.5,
+                xs[-1] - 0.5,
                 ys[discount][-1] + 0.0001,
                 "{}% off".format(discount),
-                fontsize=6,
-                rotation=90,
             )
 
         bottom_ys = ys[discount]
@@ -231,7 +229,7 @@ def _do_plot_cost(results, ax, **kwargs):
     else:
         ax.set_ylabel("Cost [VM Hours]")
         ax.legend(fontsize=8)
-        ax.set_xticks(xticks, labels=xticklabels, fontsize=6)
+        ax.set_xticks(xticks, labels=xticklabels)
 
 
 def plot_spot_results(plot_name, results, ax, **kwargs):