From 11e13d95b3739055035b43cf697ad1a1cb6e2818 Mon Sep 17 00:00:00 2001 From: Carlos Date: Thu, 23 May 2024 10:53:34 +0200 Subject: [PATCH] locality: make first-class citizen plot (#45) * locality: make first-class citizen plot * scheduler: back to very-network * plots: overall improvements * plot: commit plotting scripts after submission --- requirements.txt | 2 +- tasks/elastic/plot.py | 8 +- tasks/lammps/run.py | 10 +- tasks/makespan/locality.md | 24 +- tasks/makespan/plot.py | 294 ++++-- tasks/makespan/run.py | 32 +- tasks/makespan/scheduler.py | 57 +- tasks/makespan/trace.py | 6 +- .../traces/trace_mpi-locality_100_8.csv | 101 ++ .../traces/trace_mpi-locality_10_8.csv | 11 + .../traces/trace_mpi-locality_150_8.csv | 151 +++ .../traces/trace_mpi-locality_200_8.csv | 201 ++++ .../traces/trace_mpi-locality_25_8.csv | 26 + .../traces/trace_mpi-locality_50_8.csv | 51 + .../traces/trace_mpi-locality_75_8.csv | 76 ++ tasks/migration/oracle.py | 4 +- tasks/migration/run.py | 8 +- tasks/motivation/ideal.py | 4 +- tasks/motivation/plot.py | 12 +- tasks/util/elastic.py | 6 +- tasks/util/lammps.py | 36 +- tasks/util/locality.py | 743 ++++++++++++++ tasks/util/makespan.py | 967 +----------------- tasks/util/planner.py | 46 +- tasks/util/plot.py | 31 +- tasks/util/spot.py | 8 +- 26 files changed, 1801 insertions(+), 1114 deletions(-) create mode 100644 tasks/makespan/traces/trace_mpi-locality_100_8.csv create mode 100644 tasks/makespan/traces/trace_mpi-locality_10_8.csv create mode 100644 tasks/makespan/traces/trace_mpi-locality_150_8.csv create mode 100644 tasks/makespan/traces/trace_mpi-locality_200_8.csv create mode 100644 tasks/makespan/traces/trace_mpi-locality_25_8.csv create mode 100644 tasks/makespan/traces/trace_mpi-locality_50_8.csv create mode 100644 tasks/makespan/traces/trace_mpi-locality_75_8.csv create mode 100644 tasks/util/locality.py diff --git a/requirements.txt b/requirements.txt index bdcfc14..b7b1a35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ ansible==7.0.0 black==22.3.0 -faasmctl>=0.43.0 +faasmctl>=0.44.0 flake8==3.9.2 hoststats==0.1.1 invoke>=2.1.0 diff --git a/tasks/elastic/plot.py b/tasks/elastic/plot.py index 8aae71e..5bfaf38 100644 --- a/tasks/elastic/plot.py +++ b/tasks/elastic/plot.py @@ -5,7 +5,11 @@ from os.path import join from pandas import read_csv from tasks.util.elastic import ELASTIC_PLOTS_DIR, ELASTIC_RESULTS_DIR -from tasks.util.plot import SINGLE_COL_FIGSIZE, save_plot +from tasks.util.plot import ( + SINGLE_COL_FIGSIZE, + get_color_for_baseline, + save_plot, +) def _read_results(): @@ -37,7 +41,6 @@ def plot(ctx): Plot the slowdown of OpenMP's ParRes kernels """ results = _read_results() - print(results) makedirs(ELASTIC_PLOTS_DIR, exist_ok=True) fig, ax = subplots(figsize=SINGLE_COL_FIGSIZE) @@ -56,6 +59,7 @@ def plot(ctx): ax.bar( xs, ys, + color=get_color_for_baseline("omp-elastic", "granny"), edgecolor="black", ) diff --git a/tasks/lammps/run.py b/tasks/lammps/run.py index 69036bb..c78f838 100644 --- a/tasks/lammps/run.py +++ b/tasks/lammps/run.py @@ -15,7 +15,7 @@ LAMMPS_RESULTS_DIR, LAMMPS_SIM_WORKLOAD, LAMMPS_SIM_WORKLOAD_CONFIGS, - get_faasm_benchmark, + get_lammps_data_file, get_lammps_migration_params, ) from tasks.util.openmpi import ( @@ -50,7 +50,6 @@ def wasm(ctx, w, repeats=1): """ num_vms = len(get_faasm_worker_ips()) assert num_vms == 2, "Expected 2 VMs got: {}!".format(num_vms) - data_file = basename(get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]) for workload in w: if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS: @@ -60,6 +59,9 @@ def wasm(ctx, w, repeats=1): ) ) workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload] + data_file = basename( + get_lammps_data_file(workload_config["data-file"])["data"][0] + ) csv_name = "lammps_granny_{}.csv".format(workload) _init_csv_file(csv_name) @@ -100,7 +102,6 @@ def native(ctx, w, repeats=1): """ num_cpus_per_vm = 8 num_vms = 2 - data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0] for workload in w: if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS: @@ -110,6 +111,9 @@ def native(ctx, w, repeats=1): ) ) workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload] + data_file = get_lammps_data_file(workload_config["data-file"])["data"][ + 0 + ] pod_names, pod_ips = get_native_mpi_pods("lammps") assert ( diff --git a/tasks/makespan/locality.md b/tasks/makespan/locality.md index a70c2e7..5c93b0e 100644 --- a/tasks/makespan/locality.md +++ b/tasks/makespan/locality.md @@ -25,25 +25,28 @@ inv cluster.provision --vm Standard_D8_v5 --nodes ${NUM_VMS} + 1 inv cluster.credentials ``` -## Native (OpenMPI) +## Native (Native Batch Schedulers with Granny's MPI implementation) -First, deploy the native `k8s` cluster: +For this experiment, our native baselines also run with Granny's MPI +implementation. This is to make the comparison of improvement of performance +through better locality fair: ```bash -inv makespan.native.deploy --num-vms ${NUM_VMS} +faasmctl deploy.k8s --workers=${NUM_VMS} +inv makespan.wasm.upload ``` -Now, you can run the different OpenMPI baselines: +Now, you can run the different baselines: ```bash -inv makespan.run.native-batch --workload mpi-migrate --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} -inv makespan.run.native-slurm --workload mpi-migrate --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} +inv makespan.run.native-batch --workload mpi-locality --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} +inv makespan.run.native-slurm --workload mpi-locality --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} ``` Once you are done, you may remove the native OpenMPI cluster: ```bash -inv makespan.native.delete +faasmctl delete ``` ## Granny @@ -63,11 +66,8 @@ inv makespan.wasm.upload Third, run the experiment: ```bash -# Granny with migration disabled as another baseline -inv makespan.run.granny --workload mpi-migrate --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} - -# Granny with migration enabled (aka Granny) -inv makespan.run.granny --workload mpi-migrate --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} --migrate +# Granny with migration enabled +inv makespan.run.granny --workload mpi-locality --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} --migrate ``` During an experiment, you may monitor the state of the cluster (in a separete diff --git a/tasks/makespan/plot.py b/tasks/makespan/plot.py index f38df94..bc57e03 100644 --- a/tasks/makespan/plot.py +++ b/tasks/makespan/plot.py @@ -12,12 +12,14 @@ # TODO: consider moving some of the migration to a different file (e.g. # tasks.util.locality) -from tasks.util.makespan import ( - MAKESPAN_PLOTS_DIR, - do_makespan_plot, - read_makespan_results, +from tasks.util.makespan import MAKESPAN_PLOTS_DIR +from tasks.util.locality import ( + plot_locality_results, + read_locality_results, ) from tasks.util.plot import ( + DOUBLE_COL_FIGSIZE_HALF, + DOUBLE_COL_FIGSIZE_THIRD, get_color_for_baseline, get_label_for_baseline, save_plot, @@ -28,47 +30,101 @@ ) +# TODO: delete me if miracle happens @task def migration(ctx): """ Macrobenchmark plot showing the benefits of migrating MPI applications to - improve locality of execution + improve locality of execution. We show: + - LHS: both number of cross-VM links and number of idle cpu cores per exec + - RHS: timeseries of one of the points in the plot """ - # num_vms = [16, 24, 32, 48, 64] - # num_tasks = [50, 75, 100, 150, 200] - num_vms = [16] - exec_cdf_num_vms = 16 - num_tasks = [50] + num_vms = [8, 16, 24, 32] + num_tasks = [50, 100, 150, 200] num_cpus_per_vm = 8 - # Read results from files + # RHS: zoom in one of the bars + timeseries_num_vms = num_vms[-1] + results = {} for (n_vms, n_tasks) in zip(num_vms, num_tasks): - results[n_vms] = read_makespan_results(n_vms, n_tasks, num_cpus_per_vm) + results[n_vms] = read_locality_results( + n_vms, n_tasks, num_cpus_per_vm, migrate=True + ) - fig, (ax1, ax2) = subplots(nrows=1, ncols=2) # , figsize=(6, 3)) - fig.subplots_adjust(wspace=0.35) + # ---------- + # Plot 1: aggregate idle vCPUs + # ---------- + + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results( + "percentage_vcpus", + results, + ax, + num_vms=num_vms, + num_tasks=num_tasks, + migrate=True, + ) + + # Manually craft the legend + baselines = ["slurm", "batch", "granny", "granny-migrate"] + legend_entries = [ + Patch( + color=get_color_for_baseline("mpi-migrate", baseline), + label=get_label_for_baseline("mpi-migrate", baseline), + ) + for baseline in baselines + ] + fig.legend( + handles=legend_entries, + loc="upper center", + ncols=2, + bbox_to_anchor=(0.535, 0.3), + ) + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_migrate_vcpus") # ---------- - # Plot 1: CDF of Job-Completion-Time + # Plot 1: aggregate xVM links # ---------- - # do_plot("exec_vs_tiq", results, ax1, num_vms, num_tasks) - do_makespan_plot("exec_cdf", results, ax1, exec_cdf_num_vms, num_tasks) + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results( + "percentage_xvm", + results, + ax, + num_vms=num_vms, + num_tasks=num_tasks, + migrate=True, + ) + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_migrate_xvm") # ---------- - # Plot 2: Job Churn + # Plot 3: timeseries of vCPUs # ---------- - # WARNING: the "makespan" plot is the only one that reads num_vms as - # an array - do_makespan_plot("makespan", results, ax2, num_vms, num_tasks) + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results( + "ts_vcpus", results, ax, num_vms=timeseries_num_vms, migrate=True + ) + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_migrate_ts_vcpus") # ---------- - # Save figure + # Plot 4: timeseries of xVM links # ---------- - save_plot(fig, MAKESPAN_PLOTS_DIR, "mpi_migration") + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results( + "ts_xvm_links", results, ax, num_vms=timeseries_num_vms, migrate=True + ) + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_migrate_ts_xvm") @task @@ -79,59 +135,113 @@ def locality(ctx): - LHS: both number of cross-VM links and number of idle cpu cores per exec - RHS: timeseries of one of the points in the plot """ + # num_vms = [8, 16, 24, 32] + # num_tasks = [50, 100, 150, 200] + # num_vms = [4, 8] + # num_tasks = [10, 50] + # num_vms = [8] + # num_tasks = [50] num_vms = [8, 16, 24, 32] - num_tasks = [50, 100, 150, 200] + num_tasks = [25, 50, 75, 100] num_cpus_per_vm = 8 # RHS: zoom in one of the bars - timeseries_num_vms = 32 - timeseries_num_tasks = 200 + timeseries_num_vms = num_vms[-1] + timeseries_num_tasks = num_tasks[-1] - # WARN: this assumes that we never repeat num_vms with different numbers of - # num_tasks (fair at this point) results = {} for (n_vms, n_tasks) in zip(num_vms, num_tasks): - results[n_vms] = read_makespan_results(n_vms, n_tasks, num_cpus_per_vm) + results[n_vms] = read_locality_results(n_vms, n_tasks, num_cpus_per_vm) + + # ---------- + # Plot 1: makespan bar plot + # ---------- + + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results( + "makespan", results, ax, num_vms=num_vms, num_tasks=num_tasks + ) - fig, (ax1, ax2, ax3, ax4) = subplots(nrows=1, ncols=4, figsize=(12, 3)) + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_makespan") # ---------- - # Plot 1: boxplot of idle vCPUs and num xVM links for various cluster sizes + # Plot 2: Aggregate vCPUs metric # ---------- - do_makespan_plot("percentage_vcpus", results, ax1, num_vms, num_tasks) + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) - do_makespan_plot("percentage_xvm", results, ax2, num_vms, num_tasks) + plot_locality_results( + "percentage_vcpus", results, ax, num_vms=num_vms, num_tasks=num_tasks + ) # ---------- - # Plot 2: (two) timeseries of one of the cluster sizes + # Plot 3: Aggregate xVM metric # ---------- - do_makespan_plot( - "ts_vcpus", results, ax3, timeseries_num_vms, timeseries_num_tasks + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results( + "percentage_xvm", results, ax, num_vms=num_vms, num_tasks=num_tasks ) - do_makespan_plot( - "ts_xvm_links", results, ax4, timeseries_num_vms, timeseries_num_tasks + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_xvm") + + # ---------- + # Plot 4: execution time CDF + # ---------- + + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results( + "cdf_jct", + results, + ax, + cdf_num_vms=timeseries_num_vms, + cdf_num_tasks=timeseries_num_tasks, ) # Manually craft the legend - baselines = ["slurm", "batch", "granny", "granny-migrate"] + baselines = ["granny-batch", "granny", "granny-migrate"] legend_entries = [ Patch( - color=get_color_for_baseline("mpi-migrate", baseline), - label=get_label_for_baseline("mpi-migrate", baseline), + color=get_color_for_baseline("mpi-locality", baseline), + label=get_label_for_baseline("mpi-locality", baseline), ) for baseline in baselines ] fig.legend( handles=legend_entries, - loc="upper center", - ncols=len(baselines), - bbox_to_anchor=(0.52, 1.07), + loc="lower center", + ncols=2, + bbox_to_anchor=(0.65, 0.17), ) - save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality") + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_vcpus") + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_cdf_jct") + + # ---------- + # Plot 5: time-series of idle vCPUs + # ---------- + + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results("ts_vcpus", results, ax, num_vms=timeseries_num_vms) + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_ts_vcpus") + + # ---------- + # Plot 5: time-series of cross-VM links + # ---------- + + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + + plot_locality_results( + "ts_xvm_links", results, ax, num_vms=timeseries_num_vms + ) + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_locality_ts_xvm") @task @@ -206,7 +316,7 @@ def spot(ctx): for (n_vms, n_tasks) in zip(num_vms, num_tasks): results[n_vms] = read_spot_results(n_vms, n_tasks, num_cpus_per_vm) - fig, (ax1, ax2) = subplots(nrows=1, ncols=2, figsize=(6, 3)) + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_HALF) # ---------- # Plot 1: makespan slowdown (spot / no spot) @@ -215,25 +325,13 @@ def spot(ctx): plot_spot_results( "makespan", results, - ax1, - num_vms=num_vms, - num_tasks=num_tasks, - ) - - # ---------- - # Plot 2: stacked cost bar plot (spot) + real cost (no spot) - # ---------- - - plot_spot_results( - "cost", - results, - ax2, + ax, num_vms=num_vms, num_tasks=num_tasks, ) # Manually craft the legend - baselines = ["slurm", "batch", "granny-elastic"] + baselines = ["slurm", "batch", "granny"] legend_entries = [ Patch( color=get_color_for_baseline("mpi-spot", baseline), @@ -248,7 +346,23 @@ def spot(ctx): bbox_to_anchor=(0.52, 1.07), ) - save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_spot") + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_spot_makespan") + + # ---------- + # Plot 2: stacked cost bar plot (spot) + real cost (no spot) + # ---------- + + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_HALF) + + plot_spot_results( + "cost", + results, + ax, + num_vms=num_vms, + num_tasks=num_tasks, + ) + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_spot_cost") @task @@ -278,71 +392,83 @@ def elastic(ctx): for (n_vms, n_tasks) in zip(num_vms, num_tasks): results[n_vms] = read_elastic_results(n_vms, n_tasks, num_cpus_per_vm) - fig, (ax1, ax2, ax3, ax4) = subplots(nrows=1, ncols=4, figsize=(12, 3)) - # ---------- # Plot 1: makespan # ---------- + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + plot_elastic_results( "makespan", results, - ax1, + ax, num_vms=num_vms, num_tasks=num_tasks, ) + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic_makespan") + # ---------- # Plot 2: percentage of idle vCPUs # ---------- + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + plot_elastic_results( "percentage_vcpus", results, - ax2, + ax, num_vms=num_vms, num_tasks=num_tasks, num_cpus_per_vm=num_cpus_per_vm, ) + # Manually craft the legend + baselines = ["slurm", "batch", "granny", "granny-elastic"] + legend_entries = [ + Patch( + color=get_color_for_baseline("omp-elastic", baseline), + label=get_label_for_baseline("omp-elastic", baseline), + ) + for baseline in baselines + ] + fig.legend( + handles=legend_entries, + loc="lower center", + ncols=2, + bbox_to_anchor=(0.56, 0.2), + ) + + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic_vcpus") + # ---------- # Plot 3: CDF of the JCT (for one run) # ---------- + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + plot_elastic_results( "cdf_jct", results, - ax3, + ax, cdf_num_vms=cdf_num_vms, cdf_num_tasks=cdf_num_tasks, ) + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic_cdf_jct") + # ---------- # Plot 4: timeseries of % of idle CPU cores # ---------- + fig, ax = subplots(figsize=DOUBLE_COL_FIGSIZE_THIRD) + plot_elastic_results( "ts_vcpus", results, - ax4, + ax, timeseries_num_vms=timeseries_num_vms, timeseries_num_tasks=timeseries_num_tasks, ) - # Manually craft the legend - baselines = ["slurm", "batch", "granny", "granny-elastic"] - legend_entries = [ - Patch( - color=get_color_for_baseline("omp-elastic", baseline), - label=get_label_for_baseline("omp-elastic", baseline), - ) - for baseline in baselines - ] - fig.legend( - handles=legend_entries, - loc="upper center", - ncols=len(baselines), - bbox_to_anchor=(0.52, 1.07), - ) - - save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic") + save_plot(fig, MAKESPAN_PLOTS_DIR, "makespan_elastic_ts_vcpus") diff --git a/tasks/makespan/run.py b/tasks/makespan/run.py index d4d91fb..79bd49c 100644 --- a/tasks/makespan/run.py +++ b/tasks/makespan/run.py @@ -31,9 +31,8 @@ getLogger("urllib3").setLevel(log_level_WARNING) -def _get_workload_from_cmdline(workload): - # TODO: rename mpi-migrate to something like mpi-locality - all_workloads = ["mpi-evict", "mpi-migrate", "mpi-spot", "omp-elastic"] +def _validate_workload(workload): + all_workloads = ["mpi-evict", "mpi-locality", "mpi-spot", "omp-elastic"] if workload not in all_workloads: raise RuntimeError( @@ -48,7 +47,7 @@ def _get_workload_from_cmdline(workload): @task() def granny( ctx, - workload="mpi-migrate", + workload, num_vms=32, num_cpus_per_vm=8, num_tasks=100, @@ -68,7 +67,7 @@ def granny( baseline = "granny" if migrate: assert ( - workload == "mpi-migrate" + workload == "mpi-locality" ), "--migrate flag should only be used with mpi-migrate workload!" baseline = "granny-migrate" if fault: @@ -81,8 +80,12 @@ def granny( workload == "omp-elastic" ), "--fault flag should only be used with omp-elastic workload!" baseline = "granny-elastic" + if workload == "mpi-locality": + assert ( + migrate + ), "mpi-locality for granny can only be run with --migrate!" - workload = _get_workload_from_cmdline(workload) + workload = _validate_workload(workload) trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm) _do_run(baseline, num_vms, trace, num_users) @@ -90,7 +93,7 @@ def granny( @task() def native_slurm( ctx, - workload="mpi-migrate", + workload, num_vms=32, num_cpus_per_vm=8, num_tasks=100, @@ -105,7 +108,11 @@ def native_slurm( if fault: baseline = "slurm-ft" - workload = _get_workload_from_cmdline(workload) + # For MPI locality, native-slurm is equivalent to granny-no-migrate + if workload == "mpi-locality": + baseline = "granny" + + workload = _validate_workload(workload) trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm) _do_run( baseline, @@ -118,7 +125,7 @@ def native_slurm( @task() def native_batch( ctx, - workload="mpi-migrate", + workload, num_vms=32, num_cpus_per_vm=8, num_tasks=100, @@ -133,7 +140,12 @@ def native_batch( if fault: baseline = "batch-ft" - workload = _get_workload_from_cmdline(workload) + # For MPI locality, native-batch is equivalent to granny allocating + # resources to jobs at VM granularity + if workload == "mpi-locality": + baseline = "granny-batch" + + workload = _validate_workload(workload) trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm) _do_run( baseline, diff --git a/tasks/makespan/scheduler.py b/tasks/makespan/scheduler.py index 299bc58..16824a3 100644 --- a/tasks/makespan/scheduler.py +++ b/tasks/makespan/scheduler.py @@ -46,14 +46,15 @@ LAMMPS_MIGRATION_NET_DOCKER_BINARY, LAMMPS_MIGRATION_NET_DOCKER_DIR, LAMMPS_SIM_NUM_ITERATIONS, - LAMMPS_SIM_WORKLOAD, - get_faasm_benchmark, + get_lammps_data_file, get_lammps_migration_params, + get_lammps_workload, ) from tasks.util.makespan import ( ALLOWED_BASELINES, EXEC_TASK_INFO_FILE_PREFIX, GRANNY_BASELINES, + GRANNY_BATCH_BASELINES, GRANNY_ELASTIC_BASELINES, GRANNY_FT_BASELINES, GRANNY_MIGRATE_BASELINES, @@ -271,7 +272,18 @@ def thread_print(msg): # Choose the right data file if running a LAMMPS simulation if work_item.task.app in MPI_WORKLOADS: - data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0] + if work_item.task.app == "mpi-locality": + lammps_workload = "very-network" + else: + lammps_workload = "compute" + + workload_config = get_lammps_workload(lammps_workload) + assert ( + "data_file" in workload_config + ), "Workload config has no data file!" + data_file = get_lammps_data_file(workload_config["data_file"])[ + "data" + ][0] # Record the start timestamp start_ts = 0 @@ -293,7 +305,12 @@ def thread_print(msg): mpirun_cmd = [ "mpirun", - get_lammps_migration_params(native=True), + get_lammps_migration_params( + num_loops=workload_config["num_iterations"], + num_net_loops=workload_config["num_net_loops"], + chunk_size=workload_config["chunk_size"], + native=True, + ), "-np {}".format(world_size), # To improve OpenMPI performance, we tell it exactly where # to run each rank. According to the MPI manual, to specify @@ -373,7 +390,10 @@ def thread_print(msg): else LAMMPS_SIM_NUM_ITERATIONS ) msg["input_data"] = get_lammps_migration_params( - check_every=check_every + check_every=check_every, + num_loops=workload_config["num_iterations"], + num_net_loops=workload_config["num_net_loops"], + chunk_size=workload_config["chunk_size"], ) elif work_item.task.app in OPENMP_WORKLOADS: if work_item.task.size > num_cpus_per_vm: @@ -916,6 +936,33 @@ def have_enough_slots_for_task(self, task: TaskObject): >= task.size ) + if ( + self.state.workload == "mpi-locality" + and self.state.baseline in GRANNY_MIGRATE_BASELINES + ): + return ( + get_num_available_slots_from_in_flight_apps( + self.state.num_vms, + self.state.num_cpus_per_vm, + next_task_size=task.size, + ) + >= task.size + ) + + if ( + self.state.workload == "mpi-locality" + and self.state.baseline in GRANNY_BATCH_BASELINES + ): + return ( + get_num_available_slots_from_in_flight_apps( + self.state.num_vms, + self.state.num_cpus_per_vm, + next_task_size=task.size, + batch=True, + ) + >= task.size + ) + if self.state.workload in OPENMP_WORKLOADS: return ( get_num_available_slots_from_in_flight_apps( diff --git a/tasks/makespan/trace.py b/tasks/makespan/trace.py index 39701c5..7a40db2 100644 --- a/tasks/makespan/trace.py +++ b/tasks/makespan/trace.py @@ -9,7 +9,7 @@ @task() -def generate(ctx, workload, num_tasks, num_cores_per_vm, lmbd="0.1"): +def generate(ctx, workload, num_tasks, num_cores_per_vm=8, lmbd="0.1"): """ A trace is a set of tasks where each task is identified by: - An arrival time sampled from a Poisson distribution with parameter lambda @@ -45,8 +45,8 @@ def generate(ctx, workload, num_tasks, num_cores_per_vm, lmbd="0.1"): inter_arrival_times.insert(0, 0) # Work out the possible different workloads - if workload == "mpi-migrate": - possible_workloads = ["mpi-migrate"] + if workload == "mpi-locality": + possible_workloads = ["mpi-locality"] elif workload == "mpi-evict": possible_workloads = ["mpi-migrate"] elif workload == "mpi-spot": diff --git a/tasks/makespan/traces/trace_mpi-locality_100_8.csv b/tasks/makespan/traces/trace_mpi-locality_100_8.csv new file mode 100644 index 0000000..4220e9d --- /dev/null +++ b/tasks/makespan/traces/trace_mpi-locality_100_8.csv @@ -0,0 +1,101 @@ +TaskId,App,Size,InterArrivalTimeSecs +0,mpi-locality,6,0 +1,mpi-locality,15,11 +2,mpi-locality,5,19 +3,mpi-locality,10,1 +4,mpi-locality,5,8 +5,mpi-locality,11,7 +6,mpi-locality,2,5 +7,mpi-locality,9,8 +8,mpi-locality,14,26 +9,mpi-locality,14,25 +10,mpi-locality,2,25 +11,mpi-locality,4,57 +12,mpi-locality,12,1 +13,mpi-locality,5,2 +14,mpi-locality,2,7 +15,mpi-locality,13,11 +16,mpi-locality,13,28 +17,mpi-locality,3,1 +18,mpi-locality,7,3 +19,mpi-locality,5,23 +20,mpi-locality,14,7 +21,mpi-locality,2,0 +22,mpi-locality,15,6 +23,mpi-locality,5,5 +24,mpi-locality,15,5 +25,mpi-locality,8,7 +26,mpi-locality,7,0 +27,mpi-locality,5,6 +28,mpi-locality,12,16 +29,mpi-locality,4,15 +30,mpi-locality,4,1 +31,mpi-locality,14,48 +32,mpi-locality,9,0 +33,mpi-locality,7,6 +34,mpi-locality,15,18 +35,mpi-locality,11,12 +36,mpi-locality,8,7 +37,mpi-locality,9,16 +38,mpi-locality,12,16 +39,mpi-locality,14,2 +40,mpi-locality,12,21 +41,mpi-locality,6,1 +42,mpi-locality,11,10 +43,mpi-locality,4,14 +44,mpi-locality,2,29 +45,mpi-locality,4,0 +46,mpi-locality,11,27 +47,mpi-locality,4,16 +48,mpi-locality,11,0 +49,mpi-locality,4,3 +50,mpi-locality,10,1 +51,mpi-locality,11,9 +52,mpi-locality,13,8 +53,mpi-locality,14,19 +54,mpi-locality,14,4 +55,mpi-locality,12,3 +56,mpi-locality,6,2 +57,mpi-locality,6,24 +58,mpi-locality,14,13 +59,mpi-locality,5,0 +60,mpi-locality,3,3 +61,mpi-locality,14,2 +62,mpi-locality,12,0 +63,mpi-locality,9,9 +64,mpi-locality,11,2 +65,mpi-locality,7,0 +66,mpi-locality,9,15 +67,mpi-locality,14,10 +68,mpi-locality,13,5 +69,mpi-locality,5,2 +70,mpi-locality,8,28 +71,mpi-locality,10,16 +72,mpi-locality,6,3 +73,mpi-locality,7,2 +74,mpi-locality,7,2 +75,mpi-locality,13,15 +76,mpi-locality,15,9 +77,mpi-locality,3,12 +78,mpi-locality,10,7 +79,mpi-locality,13,12 +80,mpi-locality,13,13 +81,mpi-locality,7,6 +82,mpi-locality,3,20 +83,mpi-locality,12,6 +84,mpi-locality,4,18 +85,mpi-locality,7,3 +86,mpi-locality,2,4 +87,mpi-locality,9,4 +88,mpi-locality,13,8 +89,mpi-locality,14,36 +90,mpi-locality,10,2 +91,mpi-locality,7,0 +92,mpi-locality,10,34 +93,mpi-locality,2,0 +94,mpi-locality,11,10 +95,mpi-locality,10,6 +96,mpi-locality,14,8 +97,mpi-locality,2,13 +98,mpi-locality,5,4 +99,mpi-locality,8,5 diff --git a/tasks/makespan/traces/trace_mpi-locality_10_8.csv b/tasks/makespan/traces/trace_mpi-locality_10_8.csv new file mode 100644 index 0000000..c4622cd --- /dev/null +++ b/tasks/makespan/traces/trace_mpi-locality_10_8.csv @@ -0,0 +1,11 @@ +TaskId,App,Size,InterArrivalTimeSecs +0,mpi-locality,12,0 +1,mpi-locality,4,11 +2,mpi-locality,12,0 +3,mpi-locality,14,8 +4,mpi-locality,12,8 +5,mpi-locality,8,2 +6,mpi-locality,11,8 +7,mpi-locality,10,0 +8,mpi-locality,13,0 +9,mpi-locality,9,0 diff --git a/tasks/makespan/traces/trace_mpi-locality_150_8.csv b/tasks/makespan/traces/trace_mpi-locality_150_8.csv new file mode 100644 index 0000000..82077b7 --- /dev/null +++ b/tasks/makespan/traces/trace_mpi-locality_150_8.csv @@ -0,0 +1,151 @@ +TaskId,App,Size,InterArrivalTimeSecs +0,mpi-locality,2,0 +1,mpi-locality,15,0 +2,mpi-locality,3,38 +3,mpi-locality,8,31 +4,mpi-locality,5,1 +5,mpi-locality,2,6 +6,mpi-locality,5,11 +7,mpi-locality,5,4 +8,mpi-locality,6,35 +9,mpi-locality,2,5 +10,mpi-locality,6,15 +11,mpi-locality,10,15 +12,mpi-locality,6,8 +13,mpi-locality,11,2 +14,mpi-locality,8,4 +15,mpi-locality,14,0 +16,mpi-locality,12,2 +17,mpi-locality,11,13 +18,mpi-locality,3,3 +19,mpi-locality,13,29 +20,mpi-locality,11,11 +21,mpi-locality,3,0 +22,mpi-locality,12,14 +23,mpi-locality,14,16 +24,mpi-locality,10,3 +25,mpi-locality,12,18 +26,mpi-locality,4,46 +27,mpi-locality,12,41 +28,mpi-locality,6,29 +29,mpi-locality,4,7 +30,mpi-locality,15,7 +31,mpi-locality,15,4 +32,mpi-locality,2,7 +33,mpi-locality,7,12 +34,mpi-locality,13,3 +35,mpi-locality,12,8 +36,mpi-locality,7,0 +37,mpi-locality,8,18 +38,mpi-locality,6,7 +39,mpi-locality,11,0 +40,mpi-locality,6,17 +41,mpi-locality,15,3 +42,mpi-locality,2,17 +43,mpi-locality,12,24 +44,mpi-locality,9,9 +45,mpi-locality,6,24 +46,mpi-locality,9,0 +47,mpi-locality,8,21 +48,mpi-locality,3,1 +49,mpi-locality,8,7 +50,mpi-locality,15,4 +51,mpi-locality,2,0 +52,mpi-locality,8,8 +53,mpi-locality,11,6 +54,mpi-locality,15,6 +55,mpi-locality,3,3 +56,mpi-locality,10,0 +57,mpi-locality,7,4 +58,mpi-locality,8,16 +59,mpi-locality,11,1 +60,mpi-locality,7,7 +61,mpi-locality,5,1 +62,mpi-locality,15,0 +63,mpi-locality,15,6 +64,mpi-locality,5,9 +65,mpi-locality,9,5 +66,mpi-locality,6,1 +67,mpi-locality,13,5 +68,mpi-locality,4,4 +69,mpi-locality,10,2 +70,mpi-locality,6,4 +71,mpi-locality,9,0 +72,mpi-locality,5,18 +73,mpi-locality,15,32 +74,mpi-locality,7,1 +75,mpi-locality,15,2 +76,mpi-locality,4,21 +77,mpi-locality,15,0 +78,mpi-locality,5,9 +79,mpi-locality,12,3 +80,mpi-locality,12,5 +81,mpi-locality,3,24 +82,mpi-locality,14,2 +83,mpi-locality,13,7 +84,mpi-locality,9,13 +85,mpi-locality,13,35 +86,mpi-locality,5,0 +87,mpi-locality,15,0 +88,mpi-locality,12,23 +89,mpi-locality,4,3 +90,mpi-locality,3,34 +91,mpi-locality,10,0 +92,mpi-locality,12,8 +93,mpi-locality,14,34 +94,mpi-locality,9,0 +95,mpi-locality,2,1 +96,mpi-locality,14,1 +97,mpi-locality,15,0 +98,mpi-locality,11,6 +99,mpi-locality,3,4 +100,mpi-locality,15,1 +101,mpi-locality,4,14 +102,mpi-locality,12,4 +103,mpi-locality,2,0 +104,mpi-locality,4,10 +105,mpi-locality,15,9 +106,mpi-locality,10,14 +107,mpi-locality,5,7 +108,mpi-locality,4,9 +109,mpi-locality,6,17 +110,mpi-locality,8,13 +111,mpi-locality,8,15 +112,mpi-locality,11,24 +113,mpi-locality,15,5 +114,mpi-locality,15,1 +115,mpi-locality,12,11 +116,mpi-locality,15,9 +117,mpi-locality,7,12 +118,mpi-locality,4,1 +119,mpi-locality,14,6 +120,mpi-locality,4,3 +121,mpi-locality,9,2 +122,mpi-locality,6,9 +123,mpi-locality,9,3 +124,mpi-locality,8,45 +125,mpi-locality,5,0 +126,mpi-locality,6,20 +127,mpi-locality,3,8 +128,mpi-locality,9,4 +129,mpi-locality,11,0 +130,mpi-locality,14,11 +131,mpi-locality,3,2 +132,mpi-locality,4,26 +133,mpi-locality,13,6 +134,mpi-locality,6,43 +135,mpi-locality,7,2 +136,mpi-locality,7,1 +137,mpi-locality,10,2 +138,mpi-locality,13,14 +139,mpi-locality,9,2 +140,mpi-locality,3,10 +141,mpi-locality,9,16 +142,mpi-locality,2,3 +143,mpi-locality,6,13 +144,mpi-locality,3,15 +145,mpi-locality,14,17 +146,mpi-locality,2,16 +147,mpi-locality,8,4 +148,mpi-locality,7,6 +149,mpi-locality,5,12 diff --git a/tasks/makespan/traces/trace_mpi-locality_200_8.csv b/tasks/makespan/traces/trace_mpi-locality_200_8.csv new file mode 100644 index 0000000..27e6952 --- /dev/null +++ b/tasks/makespan/traces/trace_mpi-locality_200_8.csv @@ -0,0 +1,201 @@ +TaskId,App,Size,InterArrivalTimeSecs +0,mpi-locality,13,0 +1,mpi-locality,2,14 +2,mpi-locality,2,4 +3,mpi-locality,12,4 +4,mpi-locality,15,4 +5,mpi-locality,13,4 +6,mpi-locality,12,11 +7,mpi-locality,9,49 +8,mpi-locality,3,3 +9,mpi-locality,14,1 +10,mpi-locality,8,4 +11,mpi-locality,11,5 +12,mpi-locality,12,15 +13,mpi-locality,12,5 +14,mpi-locality,13,11 +15,mpi-locality,10,1 +16,mpi-locality,2,5 +17,mpi-locality,8,1 +18,mpi-locality,8,5 +19,mpi-locality,2,2 +20,mpi-locality,3,37 +21,mpi-locality,10,8 +22,mpi-locality,10,9 +23,mpi-locality,15,4 +24,mpi-locality,11,5 +25,mpi-locality,11,6 +26,mpi-locality,9,32 +27,mpi-locality,10,16 +28,mpi-locality,5,2 +29,mpi-locality,5,10 +30,mpi-locality,15,5 +31,mpi-locality,8,7 +32,mpi-locality,3,2 +33,mpi-locality,4,1 +34,mpi-locality,13,3 +35,mpi-locality,13,1 +36,mpi-locality,4,5 +37,mpi-locality,15,7 +38,mpi-locality,3,4 +39,mpi-locality,13,19 +40,mpi-locality,13,1 +41,mpi-locality,7,2 +42,mpi-locality,2,3 +43,mpi-locality,9,3 +44,mpi-locality,10,7 +45,mpi-locality,6,77 +46,mpi-locality,6,0 +47,mpi-locality,12,9 +48,mpi-locality,8,24 +49,mpi-locality,11,28 +50,mpi-locality,14,2 +51,mpi-locality,8,9 +52,mpi-locality,7,0 +53,mpi-locality,11,5 +54,mpi-locality,9,12 +55,mpi-locality,6,5 +56,mpi-locality,2,44 +57,mpi-locality,9,15 +58,mpi-locality,8,7 +59,mpi-locality,4,18 +60,mpi-locality,14,9 +61,mpi-locality,3,21 +62,mpi-locality,11,0 +63,mpi-locality,6,13 +64,mpi-locality,9,2 +65,mpi-locality,3,1 +66,mpi-locality,12,26 +67,mpi-locality,2,1 +68,mpi-locality,2,7 +69,mpi-locality,12,2 +70,mpi-locality,15,27 +71,mpi-locality,13,2 +72,mpi-locality,15,5 +73,mpi-locality,2,4 +74,mpi-locality,8,5 +75,mpi-locality,5,4 +76,mpi-locality,14,1 +77,mpi-locality,9,66 +78,mpi-locality,11,3 +79,mpi-locality,10,2 +80,mpi-locality,12,3 +81,mpi-locality,15,7 +82,mpi-locality,3,7 +83,mpi-locality,12,4 +84,mpi-locality,9,11 +85,mpi-locality,4,19 +86,mpi-locality,9,13 +87,mpi-locality,5,9 +88,mpi-locality,8,5 +89,mpi-locality,11,5 +90,mpi-locality,5,11 +91,mpi-locality,3,0 +92,mpi-locality,12,21 +93,mpi-locality,6,5 +94,mpi-locality,10,8 +95,mpi-locality,3,5 +96,mpi-locality,15,72 +97,mpi-locality,7,15 +98,mpi-locality,8,9 +99,mpi-locality,10,9 +100,mpi-locality,10,9 +101,mpi-locality,11,3 +102,mpi-locality,13,3 +103,mpi-locality,11,1 +104,mpi-locality,4,2 +105,mpi-locality,11,20 +106,mpi-locality,6,9 +107,mpi-locality,10,7 +108,mpi-locality,6,15 +109,mpi-locality,6,5 +110,mpi-locality,13,1 +111,mpi-locality,15,2 +112,mpi-locality,3,4 +113,mpi-locality,11,5 +114,mpi-locality,8,1 +115,mpi-locality,8,6 +116,mpi-locality,11,4 +117,mpi-locality,3,10 +118,mpi-locality,6,32 +119,mpi-locality,15,9 +120,mpi-locality,14,8 +121,mpi-locality,5,2 +122,mpi-locality,8,16 +123,mpi-locality,6,2 +124,mpi-locality,10,19 +125,mpi-locality,11,5 +126,mpi-locality,6,17 +127,mpi-locality,12,5 +128,mpi-locality,3,4 +129,mpi-locality,2,20 +130,mpi-locality,7,11 +131,mpi-locality,11,4 +132,mpi-locality,10,8 +133,mpi-locality,8,3 +134,mpi-locality,10,32 +135,mpi-locality,13,21 +136,mpi-locality,7,13 +137,mpi-locality,11,10 +138,mpi-locality,4,16 +139,mpi-locality,6,5 +140,mpi-locality,11,0 +141,mpi-locality,10,5 +142,mpi-locality,5,8 +143,mpi-locality,2,4 +144,mpi-locality,9,3 +145,mpi-locality,6,6 +146,mpi-locality,8,13 +147,mpi-locality,11,30 +148,mpi-locality,6,2 +149,mpi-locality,2,0 +150,mpi-locality,7,46 +151,mpi-locality,9,43 +152,mpi-locality,3,45 +153,mpi-locality,13,0 +154,mpi-locality,12,35 +155,mpi-locality,15,16 +156,mpi-locality,14,1 +157,mpi-locality,14,12 +158,mpi-locality,14,11 +159,mpi-locality,12,13 +160,mpi-locality,11,4 +161,mpi-locality,8,6 +162,mpi-locality,4,7 +163,mpi-locality,13,28 +164,mpi-locality,9,1 +165,mpi-locality,2,6 +166,mpi-locality,8,1 +167,mpi-locality,6,4 +168,mpi-locality,2,0 +169,mpi-locality,4,7 +170,mpi-locality,2,7 +171,mpi-locality,5,7 +172,mpi-locality,14,8 +173,mpi-locality,6,2 +174,mpi-locality,8,11 +175,mpi-locality,11,3 +176,mpi-locality,14,6 +177,mpi-locality,4,0 +178,mpi-locality,15,0 +179,mpi-locality,14,6 +180,mpi-locality,8,26 +181,mpi-locality,4,7 +182,mpi-locality,6,17 +183,mpi-locality,15,4 +184,mpi-locality,4,7 +185,mpi-locality,8,30 +186,mpi-locality,9,7 +187,mpi-locality,15,11 +188,mpi-locality,3,4 +189,mpi-locality,7,27 +190,mpi-locality,14,14 +191,mpi-locality,14,4 +192,mpi-locality,11,17 +193,mpi-locality,3,4 +194,mpi-locality,4,6 +195,mpi-locality,8,4 +196,mpi-locality,13,10 +197,mpi-locality,4,1 +198,mpi-locality,5,3 +199,mpi-locality,4,12 diff --git a/tasks/makespan/traces/trace_mpi-locality_25_8.csv b/tasks/makespan/traces/trace_mpi-locality_25_8.csv new file mode 100644 index 0000000..3f6324f --- /dev/null +++ b/tasks/makespan/traces/trace_mpi-locality_25_8.csv @@ -0,0 +1,26 @@ +TaskId,App,Size,InterArrivalTimeSecs +0,mpi-locality,13,0 +1,mpi-locality,3,2 +2,mpi-locality,12,10 +3,mpi-locality,2,41 +4,mpi-locality,12,2 +5,mpi-locality,15,18 +6,mpi-locality,14,1 +7,mpi-locality,15,21 +8,mpi-locality,6,1 +9,mpi-locality,12,5 +10,mpi-locality,12,34 +11,mpi-locality,9,17 +12,mpi-locality,5,12 +13,mpi-locality,3,5 +14,mpi-locality,9,4 +15,mpi-locality,15,4 +16,mpi-locality,4,3 +17,mpi-locality,10,0 +18,mpi-locality,2,11 +19,mpi-locality,13,18 +20,mpi-locality,11,5 +21,mpi-locality,2,11 +22,mpi-locality,12,21 +23,mpi-locality,7,15 +24,mpi-locality,7,6 diff --git a/tasks/makespan/traces/trace_mpi-locality_50_8.csv b/tasks/makespan/traces/trace_mpi-locality_50_8.csv new file mode 100644 index 0000000..3057621 --- /dev/null +++ b/tasks/makespan/traces/trace_mpi-locality_50_8.csv @@ -0,0 +1,51 @@ +TaskId,App,Size,InterArrivalTimeSecs +0,mpi-locality,12,0 +1,mpi-locality,8,0 +2,mpi-locality,13,16 +3,mpi-locality,7,14 +4,mpi-locality,10,5 +5,mpi-locality,13,7 +6,mpi-locality,2,28 +7,mpi-locality,5,5 +8,mpi-locality,15,3 +9,mpi-locality,14,0 +10,mpi-locality,15,26 +11,mpi-locality,8,32 +12,mpi-locality,2,30 +13,mpi-locality,6,0 +14,mpi-locality,13,1 +15,mpi-locality,8,13 +16,mpi-locality,10,17 +17,mpi-locality,6,2 +18,mpi-locality,6,11 +19,mpi-locality,9,0 +20,mpi-locality,8,0 +21,mpi-locality,9,1 +22,mpi-locality,7,9 +23,mpi-locality,3,20 +24,mpi-locality,13,0 +25,mpi-locality,14,1 +26,mpi-locality,11,13 +27,mpi-locality,5,2 +28,mpi-locality,5,19 +29,mpi-locality,4,41 +30,mpi-locality,5,2 +31,mpi-locality,11,53 +32,mpi-locality,8,0 +33,mpi-locality,13,18 +34,mpi-locality,2,5 +35,mpi-locality,2,4 +36,mpi-locality,7,23 +37,mpi-locality,9,10 +38,mpi-locality,7,8 +39,mpi-locality,13,4 +40,mpi-locality,14,2 +41,mpi-locality,15,4 +42,mpi-locality,14,20 +43,mpi-locality,13,0 +44,mpi-locality,5,2 +45,mpi-locality,5,27 +46,mpi-locality,8,7 +47,mpi-locality,10,8 +48,mpi-locality,13,0 +49,mpi-locality,11,24 diff --git a/tasks/makespan/traces/trace_mpi-locality_75_8.csv b/tasks/makespan/traces/trace_mpi-locality_75_8.csv new file mode 100644 index 0000000..e621baf --- /dev/null +++ b/tasks/makespan/traces/trace_mpi-locality_75_8.csv @@ -0,0 +1,76 @@ +TaskId,App,Size,InterArrivalTimeSecs +0,mpi-locality,2,0 +1,mpi-locality,10,19 +2,mpi-locality,15,5 +3,mpi-locality,11,0 +4,mpi-locality,11,8 +5,mpi-locality,10,3 +6,mpi-locality,4,27 +7,mpi-locality,5,1 +8,mpi-locality,15,8 +9,mpi-locality,13,10 +10,mpi-locality,2,2 +11,mpi-locality,11,5 +12,mpi-locality,14,29 +13,mpi-locality,11,8 +14,mpi-locality,12,8 +15,mpi-locality,4,14 +16,mpi-locality,4,4 +17,mpi-locality,12,20 +18,mpi-locality,2,9 +19,mpi-locality,12,3 +20,mpi-locality,8,14 +21,mpi-locality,3,1 +22,mpi-locality,13,32 +23,mpi-locality,2,16 +24,mpi-locality,5,10 +25,mpi-locality,3,13 +26,mpi-locality,5,6 +27,mpi-locality,11,5 +28,mpi-locality,3,32 +29,mpi-locality,8,7 +30,mpi-locality,8,5 +31,mpi-locality,6,12 +32,mpi-locality,15,0 +33,mpi-locality,4,17 +34,mpi-locality,6,4 +35,mpi-locality,7,2 +36,mpi-locality,7,13 +37,mpi-locality,7,13 +38,mpi-locality,13,0 +39,mpi-locality,9,0 +40,mpi-locality,9,2 +41,mpi-locality,12,6 +42,mpi-locality,11,4 +43,mpi-locality,7,0 +44,mpi-locality,4,1 +45,mpi-locality,11,3 +46,mpi-locality,3,9 +47,mpi-locality,4,4 +48,mpi-locality,9,24 +49,mpi-locality,2,8 +50,mpi-locality,4,8 +51,mpi-locality,2,3 +52,mpi-locality,14,5 +53,mpi-locality,2,1 +54,mpi-locality,9,13 +55,mpi-locality,7,1 +56,mpi-locality,6,20 +57,mpi-locality,5,26 +58,mpi-locality,5,0 +59,mpi-locality,3,15 +60,mpi-locality,12,1 +61,mpi-locality,3,3 +62,mpi-locality,10,40 +63,mpi-locality,13,15 +64,mpi-locality,7,5 +65,mpi-locality,15,2 +66,mpi-locality,3,1 +67,mpi-locality,15,2 +68,mpi-locality,4,5 +69,mpi-locality,11,0 +70,mpi-locality,13,5 +71,mpi-locality,5,4 +72,mpi-locality,11,4 +73,mpi-locality,5,7 +74,mpi-locality,15,0 diff --git a/tasks/migration/oracle.py b/tasks/migration/oracle.py index 3e4163d..9557cc5 100644 --- a/tasks/migration/oracle.py +++ b/tasks/migration/oracle.py @@ -21,7 +21,7 @@ LAMMPS_FAASM_MIGRATION_NET_FUNC, LAMMPS_SIM_WORKLOAD, LAMMPS_SIM_WORKLOAD_CONFIGS, - get_faasm_benchmark, + get_lammps_data_file, get_lammps_migration_params, ) from time import sleep @@ -126,7 +126,7 @@ def do_write_csv_line(csv_name, part, xvm_links, actual_time): host_list = generate_host_list(part) file_name = basename( - get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0] + get_lammps_data_file(LAMMPS_SIM_WORKLOAD)["data"][0] ) user = LAMMPS_FAASM_USER func = LAMMPS_FAASM_MIGRATION_NET_FUNC diff --git a/tasks/migration/run.py b/tasks/migration/run.py index 986abf5..94cec3e 100644 --- a/tasks/migration/run.py +++ b/tasks/migration/run.py @@ -14,7 +14,7 @@ LAMMPS_FAASM_MIGRATION_NET_FUNC, LAMMPS_SIM_WORKLOAD, LAMMPS_SIM_WORKLOAD_CONFIGS, - get_faasm_benchmark, + get_lammps_data_file, get_lammps_migration_params, ) from time import sleep @@ -48,9 +48,6 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8): """ num_vms = len(get_faasm_worker_ips()) assert num_vms == 2, "Expected 2 VMs got: {}!".format(num_vms) - # data_file = basename(get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]) - # TODO: is this a good idea? FIXME FIXME DELETE ME - data_file = basename(get_faasm_benchmark("compute")["data"][0]) if check_in is None: check_array = [0, 2, 4, 6, 8, 10] @@ -65,6 +62,9 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8): ) ) workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload] + data_file = basename( + get_lammps_data_file(workload_config["data_file"])["data"][0] + ) csv_name = "migration_{}.csv".format(workload) _init_csv_file(csv_name) diff --git a/tasks/motivation/ideal.py b/tasks/motivation/ideal.py index 80706a9..3a2b322 100644 --- a/tasks/motivation/ideal.py +++ b/tasks/motivation/ideal.py @@ -10,7 +10,7 @@ from tasks.util.lammps import ( LAMMPS_DOCKER_BINARY, LAMMPS_DOCKER_DIR, - get_faasm_benchmark, + get_lammps_data_file, ) from tasks.util.openmpi import get_native_mpi_pods, run_kubectl_cmd from time import time @@ -84,7 +84,7 @@ def do_single_run(vm_names, vm_ips, size, partition): assert len(allocated_pod_ips) == size # Prepare LAMMPS command line - data_file = get_faasm_benchmark("compute-xl")["data"][0] + data_file = get_lammps_data_file("compute-xl")["data"][0] lammps_cmdline = "-in {}/{}.faasm.native".format( LAMMPS_DOCKER_DIR, data_file ) diff --git a/tasks/motivation/plot.py b/tasks/motivation/plot.py index 0505ba5..5820402 100644 --- a/tasks/motivation/plot.py +++ b/tasks/motivation/plot.py @@ -4,7 +4,7 @@ from os import makedirs from os.path import join from tasks.util.env import PLOTS_ROOT -from tasks.util.makespan import do_makespan_plot, read_makespan_results +from tasks.util.locality import plot_locality_results, read_locality_results from tasks.util.plot import ( get_color_for_baseline, get_label_for_baseline, @@ -26,7 +26,7 @@ def locality(ctx): num_cpus_per_vm = 8 results = {} - results[num_vms] = read_makespan_results( + results[num_vms] = read_locality_results( num_vms, num_tasks, num_cpus_per_vm ) makedirs(MOTIVATION_PLOTS_DIR, exist_ok=True) @@ -36,7 +36,9 @@ def locality(ctx): # ---------- fig, ax1 = subplots(figsize=(6, 2)) - do_makespan_plot("ts_vcpus", results, ax1, num_vms, num_tasks) + plot_locality_results( + "ts_vcpus", results, ax1, num_vms=num_vms, num_tasks=num_tasks + ) # Manually craft the legend baselines = ["slurm", "batch", "granny-migrate"] @@ -61,7 +63,9 @@ def locality(ctx): # ---------- fig, ax2 = subplots(figsize=(6, 2)) - do_makespan_plot("ts_xvm_links", results, ax2, num_vms, num_tasks) + plot_locality_results( + "ts_xvm_links", results, ax2, num_vms=num_vms, num_tasks=num_tasks + ) save_plot(fig, MOTIVATION_PLOTS_DIR, "motivation_xvm_links") diff --git a/tasks/util/elastic.py b/tasks/util/elastic.py index 207e368..f33cfd3 100644 --- a/tasks/util/elastic.py +++ b/tasks/util/elastic.py @@ -228,7 +228,7 @@ def _do_plot_makespan(results, ax, **kwargs): ax.set_ylim(bottom=0) ax.set_ylabel("Makespan [s]") - ax.set_xticks(xticks, labels=xticklabels, fontsize=6) + ax.set_xticks(xticks, labels=xticklabels) def _do_plot_cdf_jct(results, ax, **kwargs): @@ -327,8 +327,8 @@ def _do_plot_percentage_vcpus(results, ax, **kwargs): ax.set_ylim(bottom=0) ax.set_xlim(left=-0.25) - ax.set_ylabel("Idle CPU-seconds /\n Total CPU-seconds [%]", fontsize=8) - ax.set_xticks(xs, labels=xticklabels, fontsize=6) + ax.set_ylabel("Idle CPU-seconds /\n Total CPU-seconds [%]") + ax.set_xticks(xs, labels=xticklabels) def _do_plot_ts_vcpus(results, ax, **kwargs): diff --git a/tasks/util/lammps.py b/tasks/util/lammps.py index a6b29f0..9d916bd 100644 --- a/tasks/util/lammps.py +++ b/tasks/util/lammps.py @@ -49,13 +49,29 @@ LAMMPS_SIM_WORKLOAD_CONFIGS = { "compute": { + "data_file": "compute", + "num_iterations": 10, "num_net_loops": 0, "chunk_size": 0, }, "network": { + "data_file": "compute", + "num_iterations": 10, "num_net_loops": LAMMPS_SIM_NUM_NET_LOOPS, "chunk_size": LAMMPS_SIM_CHUNK_SIZE, }, + "very-network": { + "data_file": "compute", + "num_iterations": 10, + "num_net_loops": 1e5, + "chunk_size": 10, + }, + "og-network": { + "data_file": "network", + "num_iterations": 10, + "num_net_loops": 0, + "chunk_size": 0, + }, } # Different supported LAMMPS benchmarks @@ -94,15 +110,21 @@ } -def get_faasm_benchmark(bench): - if bench not in BENCHMARKS: - print("Unrecognized benchmark: {}".format(bench)) +def get_lammps_data_file(workload): + return BENCHMARKS[workload] + + +def get_lammps_workload(workload): + if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS: + print("Unrecognized workload: {}".format(workload)) print( - "The supported LAMMPS benchmarks are: {}".format(BENCHMARKS.keys()) + "The supported LAMMPS workloads are: {}".format( + LAMMPS_SIM_WORKLOAD_CONFIGS.keys() + ) ) - raise RuntimeError("Unrecognized LAMMPS benchmark") + raise RuntimeError("Unrecognized LAMMPS workload") - return BENCHMARKS[bench] + return LAMMPS_SIM_WORKLOAD_CONFIGS[workload] def get_lammps_migration_params( @@ -138,7 +160,7 @@ def lammps_data_upload(ctx, bench): file_details = [] for b in bench: - _bench = get_faasm_benchmark(b) + _bench = get_lammps_data_file(b) # Upload all data corresponding to the benchmark for data in _bench["data"]: diff --git a/tasks/util/locality.py b/tasks/util/locality.py new file mode 100644 index 0000000..b30d599 --- /dev/null +++ b/tasks/util/locality.py @@ -0,0 +1,743 @@ +from glob import glob +from numpy import linspace +from os.path import join +from pandas import read_csv +from scipy.interpolate import CubicSpline +from tasks.util.makespan import ( + GRANNY_BASELINES, + MAKESPAN_RESULTS_DIR, + NATIVE_BASELINES, +) +from tasks.util.math import cum_sum +from tasks.util.planner import get_xvm_links_from_part +from tasks.util.plot import ( + fix_hist_step_vertical_line_at_end, + get_color_for_baseline, + get_label_for_baseline, +) +from tasks.util.trace import load_task_trace_from_file + +# ---------------------------- +# Plotting utilities +# ---------------------------- + + +def read_locality_results(num_vms, num_tasks, num_cpus_per_vm, migrate=False): + workload = "mpi-locality" if not migrate else "mpi-migrate" + + # Load results + result_dict = {} + glob_str = "makespan_exec-task-info_*_{}_{}_{}_{}.csv".format( + num_vms, workload, num_tasks, num_cpus_per_vm + ) + for csv in glob(join(MAKESPAN_RESULTS_DIR, glob_str)): + baseline = csv.split("_")[2] + workload = csv.split("_")[4] + + # ----- + # Results to visualise differences between execution time and time + # in queue + # ----- + + # Results for per-job exec time and time-in-queue + result_dict[baseline] = {} + results = read_csv(csv) + task_ids = results["TaskId"].to_list() + times_exec = results["TimeExecuting"].to_list() + times_queue = results["TimeInQueue"].to_list() + start_ts = results["StartTimeStamp"].to_list() + genesis_ts = min(start_ts) + end_ts = results["EndTimeStamp"].to_list() + result_dict[baseline]["exec-time"] = [-1 for _ in task_ids] + result_dict[baseline]["queue-time"] = [-1 for _ in task_ids] + result_dict[baseline]["jct"] = [-1 for _ in task_ids] + + for tid, texec, tqueue, e_ts in zip( + task_ids, times_exec, times_queue, end_ts + ): + result_dict[baseline]["exec-time"][tid] = texec + result_dict[baseline]["queue-time"][tid] = tqueue + result_dict[baseline]["jct"][tid] = e_ts - genesis_ts + + # ----- + # Results to visualise job churn + # ----- + + start_ts = results.min()["StartTimeStamp"] + end_ts = results.max()["EndTimeStamp"] + time_elapsed_secs = int(end_ts - start_ts) + result_dict[baseline]["makespan"] = time_elapsed_secs + print( + "Num VMs: {} - Num Tasks: {} - Baseline: {} - Makespan: {}s".format( + num_vms, num_tasks, baseline, time_elapsed_secs + ) + ) + if time_elapsed_secs > 1e5: + raise RuntimeError( + "Measured total time elapsed is too long: {}".format( + time_elapsed_secs + ) + ) + + # Dump all data + tasks_per_ts = [[] for i in range(time_elapsed_secs)] + for index, row in results.iterrows(): + task_id = row["TaskId"] + start_slot = int(row["StartTimeStamp"] - start_ts) + end_slot = int(row["EndTimeStamp"] - start_ts) + for ind in range(start_slot, end_slot): + tasks_per_ts[ind].append(task_id) + for tasks in tasks_per_ts: + tasks.sort() + + # Prune the timeseries + pruned_tasks_per_ts = {} + # prev_tasks = [] + for ts, tasks in enumerate(tasks_per_ts): + # NOTE: we are not pruning at the moment + pruned_tasks_per_ts[ts] = tasks + # if tasks != prev_tasks: + # pruned_tasks_per_ts[ts] = tasks + # prev_tasks = tasks + + result_dict[baseline]["tasks_per_ts"] = pruned_tasks_per_ts + + # ----- + # Results to visualise scheduling info per task + # ----- + + sched_info_csv = "makespan_sched-info_{}_{}_{}_{}_{}.csv".format( + baseline, num_vms, workload, num_tasks, num_cpus_per_vm + ) + if baseline not in GRANNY_BASELINES: + result_dict[baseline]["task_scheduling"] = {} + + # We identify VMs by numbers, not IPs + ip_to_vm = {} + vm_to_id = {} + with open( + join(MAKESPAN_RESULTS_DIR, sched_info_csv), "r" + ) as sched_fd: + # Process the file line by line, as each line will be different in + # length + for num, line in enumerate(sched_fd): + # Skip the header + if num == 0: + continue + + line = line.strip() + + # In line 1 we include the IP to node conversion as one + # comma-separated line, so we parse it here + if num == 1: + ip_to_vm_line = line.split(",") + assert len(ip_to_vm_line) % 2 == 0 + + i = 0 + while i < len(ip_to_vm_line): + ip = ip_to_vm_line[i] + vm = ip_to_vm_line[i + 1] + ip_to_vm[ip] = vm + i += 2 + + continue + + # Get the task id and the scheduling decision from the line + task_id = line.split(",")[0] + result_dict[baseline]["task_scheduling"][task_id] = {} + sched_info = line.split(",")[1:] + # The scheduling decision must be even, as it contains pairs + # of ip + slots + assert len(sched_info) % 2 == 0 + + i = 0 + while i < len(sched_info): + vm = ip_to_vm[sched_info[i]] + slots = sched_info[i + 1] + + if vm not in vm_to_id: + len_map = len(vm_to_id) + vm_to_id[vm] = len_map + + vm_id = vm_to_id[vm] + if ( + vm_id + not in result_dict[baseline]["task_scheduling"][ + task_id + ] + ): + result_dict[baseline]["task_scheduling"][task_id][ + vm_id + ] = 0 + + result_dict[baseline]["task_scheduling"][task_id][ + vm_id + ] += int(slots) + i += 2 + + # ----- + # Results to visualise the % of idle vCPUs (and VMs) over time + # ----- + + task_trace = load_task_trace_from_file( + workload, num_tasks, num_cpus_per_vm + ) + + result_dict[baseline]["ts_vcpus"] = {} + result_dict[baseline]["ts_xvm_links"] = {} + result_dict[baseline]["ts_idle_vms"] = {} + total_available_vcpus = num_vms * num_cpus_per_vm + + if baseline in NATIVE_BASELINES: + # First, set each timestamp to the total available vCPUs, and + # initialise the set of idle vms + for ts in result_dict[baseline]["tasks_per_ts"]: + result_dict[baseline]["ts_vcpus"][ts] = total_available_vcpus + result_dict[baseline]["ts_idle_vms"][ts] = set() + + # Second, for each ts subtract the size of each task in-flight + for ts in result_dict[baseline]["tasks_per_ts"]: + for t in result_dict[baseline]["tasks_per_ts"][ts]: + result_dict[baseline]["ts_vcpus"][ts] -= task_trace[ + int(t) + ].size + + # In addition, for each task in flight, add the tasks's IPs + # to the host set + for vm_id in result_dict[baseline]["task_scheduling"][ + str(int(t)) + ]: + result_dict[baseline]["ts_idle_vms"][ts].add(vm_id) + + # Third, express the results as percentages, and the number of + # idle VMs as a number (not as a set) + for ts in result_dict[baseline]["ts_vcpus"]: + result_dict[baseline]["ts_vcpus"][ts] = ( + result_dict[baseline]["ts_vcpus"][ts] + / total_available_vcpus + ) * 100 + + result_dict[baseline]["ts_idle_vms"][ts] = num_vms - len( + result_dict[baseline]["ts_idle_vms"][ts] + ) + else: + # For Granny, the idle vCPUs results are directly available in + # the file + sch_info_csv = read_csv(join(MAKESPAN_RESULTS_DIR, sched_info_csv)) + idle_cpus = ( + sch_info_csv["NumIdleCpus"] / total_available_vcpus * 100 + ).to_list() + tss = ( + sch_info_csv["TimeStampSecs"] + - sch_info_csv["TimeStampSecs"][0] + ).to_list() + + # Idle vCPUs + for (idle_cpu, ts) in zip(idle_cpus, tss): + result_dict[baseline]["ts_vcpus"][ts] = idle_cpu + + # x-VM links + xvm_links = sch_info_csv["NumCrossVmLinks"].to_list() + for (ts, xvm_link) in zip(tss, xvm_links): + result_dict[baseline]["ts_xvm_links"][ts] = xvm_link + + # Num of idle VMs + num_idle_vms = sch_info_csv["NumIdleVms"].to_list() + for (ts, n_idle_vms) in zip(tss, num_idle_vms): + result_dict[baseline]["ts_idle_vms"][ts] = n_idle_vms + + # ----- + # Results to visualise the # of cross-vm links + # ----- + + if baseline in NATIVE_BASELINES: + for ts in result_dict[baseline]["tasks_per_ts"]: + result_dict[baseline]["ts_xvm_links"][ts] = 0 + + for ts in result_dict[baseline]["tasks_per_ts"]: + for t in result_dict[baseline]["tasks_per_ts"][ts]: + if baseline == "slurm": + sched = result_dict[baseline]["task_scheduling"][ + str(int(t)) + ] + + # If only scheduled to one VM, no cross-VM links + if len(sched) <= 1: + continue + + # Add the accumulated to the total tally + result_dict[baseline]["ts_xvm_links"][ + ts + ] += get_xvm_links_from_part(list(sched.values())) + elif baseline == "batch": + # Batch baseline is optimal in terms of cross-vm links + task_size = task_trace[int(t)].size + if task_size > 8: + num_links = 8 * (task_size - 8) / 2 + result_dict[baseline]["ts_xvm_links"][ + ts + ] += num_links + + return result_dict + + +def _do_plot_exec_vs_tiq(results, ax, **kwargs): + """ + This plot presents the percentiles of slowdown of execution time (and + time in queue too?) + """ + num_vms = kwargs["num_vms"] + num_tasks = kwargs["num_tasks"] + + num_jobs = len(results[num_vms[0]]["slurm"]["exec-time"]) + + num_slowdowns = 3 + percentiles = [50, 75, 90, 95, 100] + width = float(1 / len(percentiles)) * 0.8 + xs = [] + ys = [] + xticks = [] + xlabels = [] + colors = [] + xs_vlines = [] + + for vm_ind, n_vms in enumerate(num_vms): + + x_vm_offset = vm_ind * num_slowdowns + + # Calculate slowdowns wrt granny w/ migration + slurm_slowdown = sorted( + [ + float(slurm_time / granny_time) + for (slurm_time, granny_time) in zip( + results[n_vms]["slurm"]["exec-time"], + results[n_vms]["granny-migrate"]["exec-time"], + ) + ] + ) + batch_slowdown = sorted( + [ + float(batch_time / granny_time) + for (batch_time, granny_time) in zip( + results[n_vms]["batch"]["exec-time"], + results[n_vms]["granny-migrate"]["exec-time"], + ) + ] + ) + granny_nomig_slowdown = sorted( + [ + float(granny_nomig_time / granny_time) + for (granny_nomig_time, granny_time) in zip( + results[n_vms]["granny"]["exec-time"], + results[n_vms]["granny-migrate"]["exec-time"], + ) + ] + ) + + for ind, (bline, slowdown) in enumerate( + [ + ("slurm", slurm_slowdown), + ("batch", batch_slowdown), + ("granny-no-migration", granny_nomig_slowdown), + ] + ): + x_bline_offset = ind + + for subind, percentile in enumerate(percentiles): + x = ( + x_vm_offset + + x_bline_offset + - width * int(len(percentiles) / 2) + + width * subind + + width * 0.5 * (len(percentiles) % 2 == 0) + ) + xs.append(x) + if percentile == 100: + ys.append(slowdown[-1]) + else: + index = int(percentile / 100 * num_jobs) + ys.append(slowdown[index]) + colors.append(get_color_for_baseline("mpi-migrate", bline)) + + # Add a vertical line at the end of each VM block + xs_vlines.append( + x_vm_offset + (num_slowdowns * len(percentiles) + 1 - 0.25) * width + ) + + # Add a label once per VM block + x_label = x_vm_offset + ( + (num_slowdowns * len(percentiles) + num_slowdowns - 2) / 2 * width + ) + xticks.append(x_label) + xlabels.append("{} VMs\n({} Jobs)".format(n_vms, num_tasks[vm_ind])) + + xmin = -0.5 + xmax = len(num_vms) * num_slowdowns - 0.5 + ymin = 0.75 + ymax = 2 + + ax.bar(xs, ys, width=width, color=colors, edgecolor="black") + ax.hlines(y=1, color="red", xmin=xmin, xmax=xmax) + ax.vlines( + xs_vlines, + ymin=ymin, + ymax=ymax, + color="gray", + linestyles="dashed", + linewidth=0.5, + ) + ax.set_xticks(xticks, labels=xlabels, fontsize=6) + ax.set_ylabel("Slowdown [Baseline/Granny]") + ax.set_xlabel( + "Job percentile [{}]".format( + ",".join([str(p) + "th" for p in percentiles]) + ), + fontsize=8, + ) + ax.set_xlim(left=xmin, right=xmax) + ax.set_ylim(bottom=ymin, top=ymax) + + +def _do_plot_exec_cdf(results, ax, **kwargs): + """ + CDF of the absolute job completion time + (from the beginning of time, until the job has finished) + """ + num_vms = kwargs["num_vms"] + + num_jobs = len(results[num_vms]["slurm"]["exec-time"]) + labels = ["slurm", "batch", "granny", "granny-migrate"] + xs = list(range(num_jobs)) + + for label in labels: + ys = [] + + this_label = label + if label == "granny-migrate": + this_label = "granny" + elif label == "granny": + this_label = "granny-nomig" + + # Calculate the histogram using the histogram function, get the + # results from the return value, but make the plot transparent. + # TODO: maybe just calculate the CDF analitically? + ys, xs, patches = ax.hist( + results[num_vms][label]["jct"], + 100, + color=get_color_for_baseline("mpi-migrate", label), + histtype="step", + density=True, + cumulative=True, + label=this_label, + # alpha=0, + ) + fix_hist_step_vertical_line_at_end(ax) + + # Interpolate more points + # spl = splrep(xs[:-1], ys, s=0.01, per=False) + # x2 = linspace(xs[0], xs[-1], 400) + # y2 = splev(x2, spl) + # ax.plot(x2, y2, color=PLOT_COLORS[label], label=this_label, linewidth=0.5) + + ax.set_xlabel("Job Completion Time [s]") + ax.set_ylabel("CDF") + ax.set_ylim(bottom=0, top=1) + ax.legend() + + +def _do_plot_makespan(results, ax, **kwargs): + num_vms = kwargs["num_vms"] + num_tasks = kwargs["num_tasks"] + baselines = ["granny", "granny-batch", "granny-migrate"] + + xs = [] + ys = [] + colors = [] + xticks = [] + xticklabels = [] + + for ind, n_vms in enumerate(num_vms): + x_offset = ind * len(baselines) + (ind + 1) + xs += [x + x_offset for x in range(len(baselines))] + ys += [results[n_vms][la]["makespan"] for la in baselines] + colors += [ + get_color_for_baseline("mpi-locality", la) for la in baselines + ] + + # Add one tick and xlabel per VM size + xticks.append(x_offset + len(baselines) / 2) + xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, num_tasks[ind])) + + # Add spacing between vms + xs.append(x_offset + len(baselines)) + ys.append(0) + colors.append("white") + + ax.bar(xs, ys, color=colors, edgecolor="black", width=1) + ax.set_ylim(bottom=0) + ax.set_ylabel("Makespan [s]") + ax.set_xticks(xticks, labels=xticklabels) + + +def _do_plot_ts_vcpus(results, ax, **kwargs): + """ + This plot presents a timeseries of the % of idle vCPUs over time + """ + num_vms = kwargs["num_vms"] + workload = "mpi-migrate" if "migrate" in kwargs else "mpi-locality" + + if workload == "mpi-migrate": + baselines = ["batch", "slurm", "granny", "granny-migrate"] + else: + baselines = ["granny", "granny-batch", "granny-migrate"] + + xlim = 0 + for baseline in baselines: + if baseline in NATIVE_BASELINES: + xs = range(len(results[num_vms][baseline]["ts_vcpus"])) + else: + xs = list(results[num_vms][baseline]["ts_vcpus"].keys()) + xlim = max(xlim, max(xs)) + + ax.plot( + xs, + [results[num_vms][baseline]["ts_vcpus"][x] for x in xs], + label=get_label_for_baseline(workload, baseline), + color=get_color_for_baseline(workload, baseline), + ) + + ax.set_xlim(left=0, right=xlim) + ax.set_ylim(bottom=0, top=100) + ax.set_ylabel("% idle vCPUs") + ax.set_xlabel("Time [s]") + + +def _do_plot_ts_xvm_links(results, ax, **kwargs): + """ + This plot presents a timeseries of the # of cross-VM links over time + """ + num_vms = kwargs["num_vms"] + workload = "mpi-migrate" if "migrate" in kwargs else "mpi-locality" + num_points = 500 + + if workload == "mpi-migrate": + baselines = ["batch", "slurm"] + else: + baselines = ["granny", "granny-batch"] + + xlim = 0 + for baseline in baselines: + if workload == "mpi-migrate": + xs = range(len(results[num_vms][baseline]["ts_xvm_links"])) + else: + xs = list(results[num_vms][baseline]["ts_xvm_links"].keys()) + xlim = max(xlim, max(xs)) + + ax.plot( + xs, + [results[num_vms][baseline]["ts_xvm_links"][x] for x in xs], + label=get_label_for_baseline(workload, baseline), + color=get_color_for_baseline(workload, baseline), + ) + + # We do Granny separately to interpolate + xs_granny_migrate = list( + results[num_vms]["granny-migrate"]["ts_xvm_links"].keys() + ) + ys_granny_migrate = [ + results[num_vms]["granny-migrate"]["ts_xvm_links"][x] + for x in xs_granny_migrate + ] + spl_granny_migrate = CubicSpline(xs_granny_migrate, ys_granny_migrate) + new_xs_granny_migrate = linspace(0, max(xs_granny_migrate), num=num_points) + + ax.plot( + new_xs_granny_migrate, + spl_granny_migrate(new_xs_granny_migrate), + label=get_label_for_baseline(workload, "granny-migrate"), + color=get_color_for_baseline(workload, "granny-migrate"), + ) + + xlim = max(xlim, max(new_xs_granny_migrate)) + ax.set_xlim(left=0, right=xlim) + ax.set_ylim(bottom=0) + ax.set_xlabel("Time [s]") + ax.set_ylabel("# cross-VM links") + + +def _do_plot_percentage_vcpus(results, ax, **kwargs): + num_vms = kwargs["num_vms"] + workload = "mpi-migrate" if "migrate" in kwargs else "mpi-locality" + num_tasks = kwargs["num_tasks"] + + if workload == "mpi-migrate": + baselines = ["batch", "slurm", "granny", "granny-migrate"] + else: + baselines = ["granny", "granny-batch", "granny-migrate"] + + xs = [] + ys = [] + xticklabels = [] + + num_cpus_per_vm = 8 + + # Integral of idle CPU cores over time + # WARNING: this plot reads num_vms as an array + cumsum_ys = {} + for baseline in baselines: + cumsum_ys[baseline] = {} + + for n_vms in num_vms: + timestamps = list(results[n_vms][baseline]["ts_vcpus"].keys()) + total_cpusecs = ( + (timestamps[-1] - timestamps[0]) * num_cpus_per_vm * int(n_vms) + ) + + cumsum = cum_sum( + timestamps, + [ + res * num_cpus_per_vm * int(n_vms) / 100 + for res in list( + results[n_vms][baseline]["ts_vcpus"].values() + ) + ], + ) + + # Record both the total idle CPUsecs and the percentage + cumsum_ys[baseline][n_vms] = ( + cumsum, + (cumsum / total_cpusecs) * 100, + ) + + xs = [ind for ind in range(len(num_vms))] + xticklabels = [] + + for (n_vms, n_tasks) in zip(num_vms, num_tasks): + xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks)) + for baseline in baselines: + ys = [cumsum_ys[baseline][n_vms][1] for n_vms in num_vms] + ax.plot( + xs, + ys, + color=get_color_for_baseline(workload, baseline), + linestyle="-", + marker=".", + label=get_label_for_baseline(workload, baseline), + ) + + ax.set_ylim(bottom=0) + ax.set_xlim(left=-0.25) + ax.set_ylabel("Idle CPU-seconds /\n Total CPU-seconds [%]", fontsize=8) + ax.set_xticks(xs, labels=xticklabels) + + +def _do_plot_percentage_xvm(results, ax, **kwargs): + num_vms = kwargs["num_vms"] + workload = "mpi-migrate" if "migrate" in kwargs else "mpi-locality" + num_tasks = kwargs["num_tasks"] + + if workload == "mpi-migrate": + baselines = ["batch", "slurm", "granny", "granny-migrate"] + optimal_baseline = "batch" + else: + baselines = ["granny", "granny-batch", "granny-migrate"] + optimal_baseline = "granny-batch" + + xs = [] + ys = [] + xticklabels = [] + + # Integral of idle CPU cores over time + cumsum_ys = {} + for baseline in baselines: + cumsum_ys[baseline] = {} + + for n_vms in num_vms: + timestamps = list(results[n_vms][baseline]["ts_xvm_links"].keys()) + + cumsum = cum_sum( + timestamps, + [ + res + for res in list( + results[n_vms][baseline]["ts_xvm_links"].values() + ) + ], + ) + + cumsum_ys[baseline][n_vms] = cumsum + + # WARNING: this plot reads num_vms as an array + xs = [ind for ind in range(len(num_vms))] + xticklabels = [] + for (n_vms, n_tasks) in zip(num_vms, num_tasks): + xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks)) + for baseline in baselines: + ys = [ + cumsum_ys[baseline][n_vms] / cumsum_ys[optimal_baseline][n_vms] + for n_vms in num_vms + ] + ax.plot( + xs, + ys, + label=get_label_for_baseline(workload, baseline), + color=get_color_for_baseline(workload, baseline), + linestyle="-", + marker=".", + ) + + ax.set_ylim(bottom=0) + ax.set_xlim(left=-0.25) + ax.set_ylabel("Total cross-VM / Optimal cross-VM links", fontsize=8) + ax.set_xticks(xs, labels=xticklabels) + + +def _do_plot_cdf_jct(results, ax, **kwargs): + assert "cdf_num_vms" in kwargs, "cdf_num_vms not in kwargs!" + assert "cdf_num_tasks" in kwargs, "cdf_num_tasks not in kwargs!" + cdf_num_vms = kwargs["cdf_num_vms"] + cdf_num_tasks = kwargs["cdf_num_tasks"] + + baselines = ["granny-batch", "granny", "granny-migrate"] + + xs = list(range(cdf_num_tasks)) + for baseline in baselines: + ys = [] + + ys, xs, patches = ax.hist( + results[cdf_num_vms][baseline]["jct"], + 100, + color=get_color_for_baseline("mpi-locality", baseline), + label=get_label_for_baseline("mpi-locality", baseline), + histtype="step", + density=True, + cumulative=True, + ) + fix_hist_step_vertical_line_at_end(ax) + + ax.set_xlabel("Job Completion Time [s]") + ax.set_ylabel("CDF") + ax.set_ylim(bottom=0, top=1) + + +def plot_locality_results(plot_name, results, ax, **kwargs): + """ + This method keeps track of all the different alternative plots we have + explored for the motivation figure. + """ + if plot_name == "exec_vs_tiq": + _do_plot_exec_vs_tiq(results, ax, **kwargs) + elif plot_name == "exec_cdf": + _do_plot_exec_cdf(results, ax, **kwargs) + elif plot_name == "makespan": + _do_plot_makespan(results, ax, **kwargs) + elif plot_name == "ts_vcpus": + _do_plot_ts_vcpus(results, ax, **kwargs) + elif plot_name == "ts_xvm_links": + _do_plot_ts_xvm_links(results, ax, **kwargs) + elif plot_name == "percentage_vcpus": + _do_plot_percentage_vcpus(results, ax, **kwargs) + elif plot_name == "percentage_xvm": + _do_plot_percentage_xvm(results, ax, **kwargs) + elif plot_name == "cdf_jct": + _do_plot_cdf_jct(results, ax, **kwargs) diff --git a/tasks/util/makespan.py b/tasks/util/makespan.py index 59199a2..2814952 100644 --- a/tasks/util/makespan.py +++ b/tasks/util/makespan.py @@ -1,24 +1,11 @@ -from glob import glob from math import ceil, floor -from matplotlib.patches import Patch -from numpy import linspace from os import makedirs from os.path import join -from pandas import read_csv -from scipy.interpolate import CubicSpline -from tasks.util.trace import load_task_trace_from_file from tasks.util.env import ( PLOTS_ROOT, RESULTS_DIR, ) -from tasks.util.math import cum_sum -from tasks.util.planner import get_xvm_links_from_part from tasks.util.openmpi import get_native_mpi_pods_ip_to_vm -from tasks.util.plot import ( - fix_hist_step_vertical_line_at_end, - get_color_for_baseline, - get_label_for_baseline, -) # Directories MAKESPAN_RESULTS_DIR = join(RESULTS_DIR, "makespan") @@ -36,11 +23,13 @@ # - Slurm: native OpenMPI where we schedule jobs at CPU core granularity NATIVE_FT_BASELINES = ["batch-ft", "slurm-ft"] NATIVE_BASELINES = ["batch", "slurm"] + NATIVE_FT_BASELINES +GRANNY_BATCH_BASELINES = ["granny-batch"] GRANNY_ELASTIC_BASELINES = ["granny-elastic"] GRANNY_FT_BASELINES = ["granny-ft"] GRANNY_MIGRATE_BASELINES = ["granny-migrate"] GRANNY_BASELINES = ( ["granny"] + + GRANNY_BATCH_BASELINES + GRANNY_MIGRATE_BASELINES + GRANNY_FT_BASELINES + GRANNY_ELASTIC_BASELINES @@ -48,7 +37,7 @@ ALLOWED_BASELINES = NATIVE_BASELINES + GRANNY_BASELINES # Workload/Migration related constants -MPI_MIGRATE_WORKLOADS = ["mpi-migrate", "mpi-evict", "mpi-spot"] +MPI_MIGRATE_WORKLOADS = ["mpi-locality", "mpi-evict", "mpi-spot"] MPI_WORKLOADS = ["mpi"] + MPI_MIGRATE_WORKLOADS OPENMP_WORKLOADS = ["omp", "omp-elastic"] @@ -306,953 +295,3 @@ def get_idle_core_count_from_task_info( start_t += 1 return num_idle_cores_per_time_step - - -# ---------------------------- -# Plotting utilities -# ---------------------------- - - -def read_makespan_results(num_vms, num_tasks, num_cpus_per_vm): - workload = "mpi-migrate" - - # Load results - result_dict = {} - glob_str = "makespan_exec-task-info_*_{}_{}_{}_{}.csv".format( - num_vms, workload, num_tasks, num_cpus_per_vm - ) - for csv in glob(join(MAKESPAN_RESULTS_DIR, glob_str)): - baseline = csv.split("_")[2] - workload = csv.split("_")[4] - - # ----- - # Results to visualise differences between execution time and time - # in queue - # ----- - - # Results for per-job exec time and time-in-queue - result_dict[baseline] = {} - results = read_csv(csv) - task_ids = results["TaskId"].to_list() - times_exec = results["TimeExecuting"].to_list() - times_queue = results["TimeInQueue"].to_list() - start_ts = results["StartTimeStamp"].to_list() - genesis_ts = min(start_ts) - end_ts = results["EndTimeStamp"].to_list() - result_dict[baseline]["exec-time"] = [-1 for _ in task_ids] - result_dict[baseline]["queue-time"] = [-1 for _ in task_ids] - result_dict[baseline]["jct"] = [-1 for _ in task_ids] - - for tid, texec, tqueue, e_ts in zip( - task_ids, times_exec, times_queue, end_ts - ): - result_dict[baseline]["exec-time"][tid] = texec - result_dict[baseline]["queue-time"][tid] = tqueue - result_dict[baseline]["jct"][tid] = e_ts - genesis_ts - - # ----- - # Results to visualise job churn - # ----- - - start_ts = results.min()["StartTimeStamp"] - end_ts = results.max()["EndTimeStamp"] - time_elapsed_secs = int(end_ts - start_ts) - result_dict[baseline]["makespan"] = time_elapsed_secs - print( - "Num VMs: {} - Num Tasks: {} - Baseline: {} - Makespan: {}s".format( - num_vms, num_tasks, baseline, time_elapsed_secs - ) - ) - if time_elapsed_secs > 1e5: - raise RuntimeError( - "Measured total time elapsed is too long: {}".format( - time_elapsed_secs - ) - ) - - # Dump all data - tasks_per_ts = [[] for i in range(time_elapsed_secs)] - for index, row in results.iterrows(): - task_id = row["TaskId"] - start_slot = int(row["StartTimeStamp"] - start_ts) - end_slot = int(row["EndTimeStamp"] - start_ts) - for ind in range(start_slot, end_slot): - tasks_per_ts[ind].append(task_id) - for tasks in tasks_per_ts: - tasks.sort() - - # Prune the timeseries - pruned_tasks_per_ts = {} - # prev_tasks = [] - for ts, tasks in enumerate(tasks_per_ts): - # NOTE: we are not pruning at the moment - pruned_tasks_per_ts[ts] = tasks - # if tasks != prev_tasks: - # pruned_tasks_per_ts[ts] = tasks - # prev_tasks = tasks - - result_dict[baseline]["tasks_per_ts"] = pruned_tasks_per_ts - - # ----- - # Results to visualise scheduling info per task - # ----- - - sched_info_csv = "makespan_sched-info_{}_{}_{}_{}_{}.csv".format( - baseline, num_vms, workload, num_tasks, num_cpus_per_vm - ) - if baseline not in GRANNY_BASELINES: - result_dict[baseline]["task_scheduling"] = {} - - # We identify VMs by numbers, not IPs - ip_to_vm = {} - vm_to_id = {} - with open( - join(MAKESPAN_RESULTS_DIR, sched_info_csv), "r" - ) as sched_fd: - # Process the file line by line, as each line will be different in - # length - for num, line in enumerate(sched_fd): - # Skip the header - if num == 0: - continue - - line = line.strip() - - # In line 1 we include the IP to node conversion as one - # comma-separated line, so we parse it here - if num == 1: - ip_to_vm_line = line.split(",") - assert len(ip_to_vm_line) % 2 == 0 - - i = 0 - while i < len(ip_to_vm_line): - ip = ip_to_vm_line[i] - vm = ip_to_vm_line[i + 1] - ip_to_vm[ip] = vm - i += 2 - - continue - - # Get the task id and the scheduling decision from the line - task_id = line.split(",")[0] - result_dict[baseline]["task_scheduling"][task_id] = {} - sched_info = line.split(",")[1:] - # The scheduling decision must be even, as it contains pairs - # of ip + slots - assert len(sched_info) % 2 == 0 - - i = 0 - while i < len(sched_info): - vm = ip_to_vm[sched_info[i]] - slots = sched_info[i + 1] - - if vm not in vm_to_id: - len_map = len(vm_to_id) - vm_to_id[vm] = len_map - - vm_id = vm_to_id[vm] - if ( - vm_id - not in result_dict[baseline]["task_scheduling"][ - task_id - ] - ): - result_dict[baseline]["task_scheduling"][task_id][ - vm_id - ] = 0 - - result_dict[baseline]["task_scheduling"][task_id][ - vm_id - ] += int(slots) - i += 2 - - # ----- - # Results to visualise the % of idle vCPUs (and VMs) over time - # ----- - - task_trace = load_task_trace_from_file( - workload, num_tasks, num_cpus_per_vm - ) - - result_dict[baseline]["ts_vcpus"] = {} - result_dict[baseline]["ts_xvm_links"] = {} - result_dict[baseline]["ts_idle_vms"] = {} - total_available_vcpus = num_vms * num_cpus_per_vm - - if baseline in NATIVE_BASELINES: - # First, set each timestamp to the total available vCPUs, and - # initialise the set of idle vms - for ts in result_dict[baseline]["tasks_per_ts"]: - result_dict[baseline]["ts_vcpus"][ts] = total_available_vcpus - result_dict[baseline]["ts_idle_vms"][ts] = set() - - # Second, for each ts subtract the size of each task in-flight - for ts in result_dict[baseline]["tasks_per_ts"]: - for t in result_dict[baseline]["tasks_per_ts"][ts]: - result_dict[baseline]["ts_vcpus"][ts] -= task_trace[ - int(t) - ].size - - # In addition, for each task in flight, add the tasks's IPs - # to the host set - for vm_id in result_dict[baseline]["task_scheduling"][ - str(int(t)) - ]: - result_dict[baseline]["ts_idle_vms"][ts].add(vm_id) - - # Third, express the results as percentages, and the number of - # idle VMs as a number (not as a set) - for ts in result_dict[baseline]["ts_vcpus"]: - result_dict[baseline]["ts_vcpus"][ts] = ( - result_dict[baseline]["ts_vcpus"][ts] - / total_available_vcpus - ) * 100 - - result_dict[baseline]["ts_idle_vms"][ts] = num_vms - len( - result_dict[baseline]["ts_idle_vms"][ts] - ) - else: - # For Granny, the idle vCPUs results are directly available in - # the file - sch_info_csv = read_csv(join(MAKESPAN_RESULTS_DIR, sched_info_csv)) - idle_cpus = ( - sch_info_csv["NumIdleCpus"] / total_available_vcpus * 100 - ).to_list() - tss = ( - sch_info_csv["TimeStampSecs"] - - sch_info_csv["TimeStampSecs"][0] - ).to_list() - - # Idle vCPUs - for (idle_cpu, ts) in zip(idle_cpus, tss): - result_dict[baseline]["ts_vcpus"][ts] = idle_cpu - - # x-VM links - xvm_links = sch_info_csv["NumCrossVmLinks"].to_list() - for (ts, xvm_link) in zip(tss, xvm_links): - result_dict[baseline]["ts_xvm_links"][ts] = xvm_link - - # Num of idle VMs - num_idle_vms = sch_info_csv["NumIdleVms"].to_list() - for (ts, n_idle_vms) in zip(tss, num_idle_vms): - result_dict[baseline]["ts_idle_vms"][ts] = n_idle_vms - - # ----- - # Results to visualise the # of cross-vm links - # ----- - - if baseline in NATIVE_BASELINES: - for ts in result_dict[baseline]["tasks_per_ts"]: - result_dict[baseline]["ts_xvm_links"][ts] = 0 - - for ts in result_dict[baseline]["tasks_per_ts"]: - for t in result_dict[baseline]["tasks_per_ts"][ts]: - if baseline == "slurm": - sched = result_dict[baseline]["task_scheduling"][ - str(int(t)) - ] - - # If only scheduled to one VM, no cross-VM links - if len(sched) <= 1: - continue - - # Add the accumulated to the total tally - result_dict[baseline]["ts_xvm_links"][ - ts - ] += get_xvm_links_from_part(list(sched.values())) - elif baseline == "batch": - # Batch baseline is optimal in terms of cross-vm links - task_size = task_trace[int(t)].size - if task_size > 8: - num_links = 8 * (task_size - 8) / 2 - result_dict[baseline]["ts_xvm_links"][ - ts - ] += num_links - - return result_dict - - -def do_makespan_plot(plot_name, results, ax, num_vms, num_tasks): - """ - This method keeps track of all the different alternative plots we have - explored for the motivation figure. - """ - - if plot_name == "exec_vs_tiq": - """ - This plot presents the percentiles of slowdown of execution time (and - time in queue too?) - """ - num_jobs = len(results[num_vms[0]]["slurm"]["exec-time"]) - - num_slowdowns = 3 - percentiles = [50, 75, 90, 95, 100] - width = float(1 / len(percentiles)) * 0.8 - xs = [] - ys = [] - xticks = [] - xlabels = [] - colors = [] - xs_vlines = [] - - for vm_ind, n_vms in enumerate(num_vms): - - x_vm_offset = vm_ind * num_slowdowns - - # Calculate slowdowns wrt granny w/ migration - slurm_slowdown = sorted( - [ - float(slurm_time / granny_time) - for (slurm_time, granny_time) in zip( - results[n_vms]["slurm"]["exec-time"], - results[n_vms]["granny-migrate"]["exec-time"], - ) - ] - ) - batch_slowdown = sorted( - [ - float(batch_time / granny_time) - for (batch_time, granny_time) in zip( - results[n_vms]["batch"]["exec-time"], - results[n_vms]["granny-migrate"]["exec-time"], - ) - ] - ) - granny_nomig_slowdown = sorted( - [ - float(granny_nomig_time / granny_time) - for (granny_nomig_time, granny_time) in zip( - results[n_vms]["granny"]["exec-time"], - results[n_vms]["granny-migrate"]["exec-time"], - ) - ] - ) - - for ind, (bline, slowdown) in enumerate( - [ - ("slurm", slurm_slowdown), - ("batch", batch_slowdown), - ("granny-no-migration", granny_nomig_slowdown), - ] - ): - x_bline_offset = ind - - for subind, percentile in enumerate(percentiles): - x = ( - x_vm_offset - + x_bline_offset - - width * int(len(percentiles) / 2) - + width * subind - + width * 0.5 * (len(percentiles) % 2 == 0) - ) - xs.append(x) - if percentile == 100: - ys.append(slowdown[-1]) - else: - index = int(percentile / 100 * num_jobs) - ys.append(slowdown[index]) - colors.append(get_color_for_baseline("mpi-migrate", bline)) - - # Add a vertical line at the end of each VM block - xs_vlines.append( - x_vm_offset - + (num_slowdowns * len(percentiles) + 1 - 0.25) * width - ) - - # Add a label once per VM block - x_label = x_vm_offset + ( - (num_slowdowns * len(percentiles) + num_slowdowns - 2) - / 2 - * width - ) - xticks.append(x_label) - xlabels.append( - "{} VMs\n({} Jobs)".format(n_vms, num_tasks[vm_ind]) - ) - - xmin = -0.5 - xmax = len(num_vms) * num_slowdowns - 0.5 - ymin = 0.75 - ymax = 2 - - ax.bar(xs, ys, width=width, color=colors, edgecolor="black") - ax.hlines(y=1, color="red", xmin=xmin, xmax=xmax) - ax.vlines( - xs_vlines, - ymin=ymin, - ymax=ymax, - color="gray", - linestyles="dashed", - linewidth=0.5, - ) - ax.set_xticks(xticks, labels=xlabels, fontsize=6) - ax.set_ylabel("Slowdown [Baseline/Granny]") - ax.set_xlabel( - "Job percentile [{}]".format( - ",".join([str(p) + "th" for p in percentiles]) - ), - fontsize=8, - ) - ax.set_xlim(left=xmin, right=xmax) - ax.set_ylim(bottom=ymin, top=ymax) - - elif plot_name == "exec_abs": - """ - TEMP - """ - num_vms = 32 - num_jobs = len(results[num_vms]["slurm"]["exec-time"]) - labels = ["slurm", "batch", "granny", "granny-migrate"] - percentiles = [50, 75, 90, 95, 100] - - xs = list(range(num_jobs)) - for label in labels: - ys = [] - for percentile in percentiles: - if percentile == 100: - index = -1 - else: - index = int(percentile / 100 * num_jobs) - - ys.append(results[num_vms][label]["exec-time"][index]) - - ax.plot( - percentiles, - ys, - label=get_label_for_baseline("mpi-migrate", label), - color=get_color_for_baseline("mpi-migrate", label), - ) - ax.set_xlabel("Job percentile [th]") - ax.set_ylabel("Job completion time [s]") - ax.set_ylim(bottom=0) - ax.legend() - - elif plot_name == "exec_cdf": - """ - CDF of the absolute job completion time - (from the beginning of time, until the job has finished) - """ - num_jobs = len(results[num_vms]["slurm"]["exec-time"]) - labels = ["slurm", "batch", "granny", "granny-migrate"] - xs = list(range(num_jobs)) - - for label in labels: - ys = [] - - this_label = label - if label == "granny-migrate": - this_label = "granny" - elif label == "granny": - this_label = "granny-nomig" - - # Calculate the histogram using the histogram function, get the - # results from the return value, but make the plot transparent. - # TODO: maybe just calculate the CDF analitically? - ys, xs, patches = ax.hist( - results[num_vms][label]["jct"], - 100, - color=get_color_for_baseline("mpi-migrate", label), - histtype="step", - density=True, - cumulative=True, - label=this_label, - # alpha=0, - ) - fix_hist_step_vertical_line_at_end(ax) - - # Interpolate more points - # spl = splrep(xs[:-1], ys, s=0.01, per=False) - # x2 = linspace(xs[0], xs[-1], 400) - # y2 = splev(x2, spl) - # ax.plot(x2, y2, color=PLOT_COLORS[label], label=this_label, linewidth=0.5) - - ax.set_xlabel("Job Completion Time [s]") - ax.set_ylabel("CDF") - ax.set_ylim(bottom=0, top=1) - ax.legend() - - elif plot_name == "makespan": - labels = ["slurm", "batch", "granny", "granny-migrate"] - - xs = [] - ys = [] - colors = [] - xticks = [] - xticklabels = [] - - # WARNING: this plot reads num_vms as an array - for ind, n_vms in enumerate(num_vms): - x_offset = ind * len(labels) + (ind + 1) - xs += [x + x_offset for x in range(len(labels))] - ys += [results[n_vms][la]["makespan"] for la in labels] - colors += [ - get_color_for_baseline("mpi-migrate", la) for la in labels - ] - - # Add one tick and xlabel per VM size - xticks.append(x_offset + len(labels) / 2) - xticklabels.append( - "{} VMs\n({} Jobs)".format(n_vms, num_tasks[ind]) - ) - - # Add spacing between vms - xs.append(x_offset + len(labels)) - ys.append(0) - colors.append("white") - - ax.bar(xs, ys, color=colors, edgecolor="black", width=1) - ax.set_ylim(bottom=0) - ax.set_ylabel("Makespan [s]") - ax.set_xticks(xticks, labels=xticklabels, fontsize=6) - - # Manually craft legend - legend_entries = [ - Patch( - color=get_color_for_baseline("mpi-migrate", label), - label=get_label_for_baseline("mpi-migrate", label), - ) - for label in labels - ] - ax.legend(handles=legend_entries, ncols=2, fontsize=8) - - elif plot_name == "job_churn": - """ - This plot presents a heatmap of the job churn for one execution of the - trace. On the X axis we have time in seconds, and on the Y axis we have - all vCPUs in the cluster (32 * 8). There are 100 different colors in - the plot, one for each job. Coordinate [x, y] is of color C_j if Job - `j` is using vCPU `y` at time `x` - """ - # Fix one baseline (should we?) - baseline = "slurm" - # On the X axis, we have each job as a bar - num_ts = len(results[baseline]["tasks_per_ts"]) - ncols = num_ts - num_vms = 32 - num_cpus_per_vm = 8 - nrows = num_vms * num_cpus_per_vm - - # Data shape is (nrows, ncols). We have as many columns as tasks, and - # as many rows as the total number of CPUs. - # data[m, n] = task_id if cpu m is being used by task_id at timestamp n - # (where m is the row and n the column) - data = [[-1 for _ in range(ncols)] for _ in range(nrows)] - - for ts in results[baseline]["tasks_per_ts"]: - # This dictionary contains the in-flight tasks per timestamp (where - # the timestamp has already been de-duplicated) - tasks_in_flight = results[baseline]["tasks_per_ts"][ts] - vm_cpu_offset = {} - for i in range(num_vms): - vm_cpu_offset[i] = 0 - for t in tasks_in_flight: - t_id = int(t) - sched_decision = results[baseline]["task_scheduling"][ - str(t_id) - ] - # Work out which rows (i.e. CPU cores) to paint - for vm in sched_decision: - cpus_in_vm = sched_decision[vm] - cpu_offset = vm_cpu_offset[vm] - vm_offset = vm * num_cpus_per_vm - this_offset = vm_offset + cpu_offset - for j in range(this_offset, this_offset + cpus_in_vm): - data[j][ts] = t_id - vm_cpu_offset[vm] += cpus_in_vm - - ax.imshow(data, origin="lower") - - elif plot_name == "ts_vcpus": - """ - This plot presents a timeseries of the % of idle vCPUs over time - """ - baselines = ["batch", "slurm", "granny-migrate"] - xlim = 0 - for baseline in baselines: - if baseline in NATIVE_BASELINES: - xs = range(len(results[num_vms][baseline]["ts_vcpus"])) - else: - xs = list(results[num_vms][baseline]["ts_vcpus"].keys()) - xlim = max(xlim, max(xs)) - - ax.plot( - xs, - [results[num_vms][baseline]["ts_vcpus"][x] for x in xs], - label=get_label_for_baseline("mpi-migrate", baseline), - color=get_color_for_baseline("mpi-migrate", baseline), - ) - - ax.set_xlim(left=0, right=xlim) - ax.set_ylim(bottom=0, top=100) - ax.set_ylabel("% idle vCPUs") - ax.set_xlabel("Time [s]") - - elif plot_name == "ts_xvm_links": - """ - This plot presents a timeseries of the # of cross-VM links over time - """ - num_points = 500 - - xs_slurm = range(len(results[num_vms]["slurm"]["ts_xvm_links"])) - xs_batch = range(len(results[num_vms]["batch"]["ts_xvm_links"])) - - # xs_granny = list(results[num_vms]["granny"]["ts_xvm_links"].keys()) - # ys_granny = [results[num_vms]["granny"]["ts_xvm_links"][x] for x in xs_granny] - # spl_granny = CubicSpline(xs_granny, ys_granny) - # new_xs_granny = linspace(0, max(xs_granny), num=num_points) - - xs_granny_migrate = list( - results[num_vms]["granny-migrate"]["ts_xvm_links"].keys() - ) - ys_granny_migrate = [ - results[num_vms]["granny-migrate"]["ts_xvm_links"][x] - for x in xs_granny_migrate - ] - spl_granny_migrate = CubicSpline(xs_granny_migrate, ys_granny_migrate) - new_xs_granny_migrate = linspace( - 0, max(xs_granny_migrate), num=num_points - ) - - ax.plot( - xs_slurm, - [results[num_vms]["slurm"]["ts_xvm_links"][x] for x in xs_slurm], - label="slurm", - color=get_color_for_baseline("mpi-migrate", "slurm"), - ) - ax.plot( - xs_batch, - [results[num_vms]["batch"]["ts_xvm_links"][x] for x in xs_batch], - label="batch", - color=get_color_for_baseline("mpi-migrate", "batch"), - ) - """ - ax.plot( - new_xs_granny, - spl_granny(new_xs_granny), - label="granny", - color=PLOT_COLORS["granny"], - ) - """ - ax.plot( - new_xs_granny_migrate, - spl_granny_migrate(new_xs_granny_migrate), - label="granny-migrate", - color=get_color_for_baseline("mpi-migrate", "granny-migrate"), - ) - - xlim = max(xs_batch[-1], xs_slurm[-1], new_xs_granny_migrate[-1]) - ax.set_xlim(left=0, right=xlim) - ax.set_ylim(bottom=0) - ax.set_xlabel("Time [s]") - ax.set_ylabel("# cross-VM links") - elif plot_name == "ts_idle_vms": - """ - This plot presents a timeseries of the # of idle VMs over time - """ - num_points = 500 - - xs_slurm = range(len(results[num_vms]["slurm"]["ts_xvm_links"])) - xs_batch = range(len(results[num_vms]["batch"]["ts_xvm_links"])) - - # xs_granny = list(results[num_vms]["granny"]["ts_xvm_links"].keys()) - # ys_granny = [results[num_vms]["granny"]["ts_xvm_links"][x] for x in xs_granny] - # spl_granny = CubicSpline(xs_granny, ys_granny) - # new_xs_granny = linspace(0, max(xs_granny), num=num_points) - - # TODO: FIXME: move from granny-migrate to granny-evict! - xs_granny_migrate = list( - results[num_vms]["granny-migrate"]["ts_idle_vms"].keys() - ) - ys_granny_migrate = [ - (results[num_vms]["granny-migrate"]["ts_idle_vms"][x] / num_vms) - * 100 - for x in xs_granny_migrate - ] - spl_granny_migrate = CubicSpline(xs_granny_migrate, ys_granny_migrate) - new_xs_granny_migrate = linspace( - 0, max(xs_granny_migrate), num=num_points - ) - - ax.plot( - xs_slurm, - [ - (results[num_vms]["slurm"]["ts_idle_vms"][x] / num_vms) * 100 - for x in xs_slurm - ], - label=get_label_for_baseline("mpi-migrate", "slurm"), - color=get_color_for_baseline("mpi-migrate", "slurm"), - ) - ax.plot( - xs_batch, - [ - (results[num_vms]["batch"]["ts_idle_vms"][x] / num_vms) * 100 - for x in xs_batch - ], - label=get_label_for_baseline("mpi-migrate", "batch"), - color=get_color_for_baseline("mpi-migrate", "batch"), - ) - """ - ax.plot( - new_xs_granny, - spl_granny(new_xs_granny), - label="granny", - color=PLOT_COLORS["granny"], - ) - """ - # TODO: FIXME move to granny-evict - ax.plot( - new_xs_granny_migrate, - spl_granny_migrate(new_xs_granny_migrate), - label=get_label_for_baseline("mpi-migrate", "granny-migrate"), - color=get_color_for_baseline("mpi-migrate", "granny-migrate"), - ) - - xlim = max(xs_batch[-1], xs_slurm[-1], new_xs_granny_migrate[-1]) - ax.set_xlim(left=0, right=xlim) - ax.set_ylim(bottom=0, top=100) - ax.set_xlabel("Time [s]") - ax.set_ylabel("Idle VMs [%]") - elif plot_name == "percentage_vcpus": - labels = ["slurm", "batch", "granny", "granny-migrate"] - - xs = [] - ys = [] - colors = [] - xticks = [] - xticklabels = [] - - num_cpus_per_vm = 8 - - # Integral of idle CPU cores over time - # WARNING: this plot reads num_vms as an array - cumsum_ys = {} - for la in labels: - cumsum_ys[la] = {} - - for n_vms in num_vms: - timestamps = list(results[n_vms][la]["ts_vcpus"].keys()) - total_cpusecs = ( - (timestamps[-1] - timestamps[0]) - * num_cpus_per_vm - * int(n_vms) - ) - - cumsum = cum_sum( - timestamps, - [ - res * num_cpus_per_vm * int(n_vms) / 100 - for res in list( - results[n_vms][la]["ts_vcpus"].values() - ) - ], - ) - - # Record both the total idle CPUsecs and the percentage - cumsum_ys[la][n_vms] = (cumsum, (cumsum / total_cpusecs) * 100) - - xs = [ind for ind in range(len(num_vms))] - xticklabels = [] - - for (n_vms, n_tasks) in zip(num_vms, num_tasks): - xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks)) - for la in labels: - ys = [cumsum_ys[la][n_vms][1] for n_vms in num_vms] - ax.plot( - xs, - ys, - color=get_color_for_baseline("mpi-migrate", la), - linestyle="-", - marker=".", - label=get_label_for_baseline("mpi-migrate", la), - ) - - ax.set_ylim(bottom=0) - ax.set_xlim(left=-0.25) - ax.set_ylabel("Idle CPU-seconds /\n Total CPU-seconds [%]", fontsize=8) - ax.set_xticks(xs, labels=xticklabels, fontsize=6) - - elif plot_name == "percentage_xvm": - labels = ["slurm", "batch", "granny", "granny-migrate"] - - xs = [] - ys = [] - colors = [] - xticks = [] - xticklabels = [] - - num_cpus_per_vm = 8 - - # Integral of idle CPU cores over time - cumsum_ys = {} - for la in labels: - cumsum_ys[la] = {} - - for n_vms in num_vms: - timestamps = list(results[n_vms][la]["ts_xvm_links"].keys()) - total_cpusecs = ( - (timestamps[-1] - timestamps[0]) - * num_cpus_per_vm - * int(n_vms) - ) - - cumsum = cum_sum( - timestamps, - [ - res - for res in list( - results[n_vms][la]["ts_xvm_links"].values() - ) - ], - ) - - cumsum_ys[la][n_vms] = cumsum - - # WARNING: this plot reads num_vms as an array - xs = [ind for ind in range(len(num_vms))] - xticklabels = [] - for (n_vms, n_tasks) in zip(num_vms, num_tasks): - xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks)) - for la in labels: - ys = [ - cumsum_ys[la][n_vms] / cumsum_ys["batch"][n_vms] - for n_vms in num_vms - ] - ax.plot( - xs, - ys, - label=get_label_for_baseline("mpi-migrate", la), - color=get_color_for_baseline("mpi-migrate", la), - linestyle="-", - marker=".", - ) - - ax.set_ylim(bottom=0) - ax.set_xlim(left=-0.25) - ax.set_ylabel("Total cross-VM / Optimal cross-VM links", fontsize=8) - ax.set_xticks(xs, labels=xticklabels, fontsize=6) - - # TODO: delete me - elif plot_name == "boxplot_xvm": - labels = ["slurm", "batch", "granny", "granny-migrate"] - - xs = [] - ys = [] - colors = [] - xticks = [] - xticklabels = [] - - # WARNING: this plot reads num_vms as an array - for ind, n_vms in enumerate(num_vms): - x_offset = ind * len(labels) + (ind + 1) - - # For each cluster size, and for each label, we add two boxplots - - # Number of cross-VM links - xs += [x + x_offset for x in range(len(labels))] - ys += [ - list(results[n_vms][la]["ts_xvm_links"].values()) - for la in labels - ] - - # Color and alpha for each box - colors += [ - get_color_for_baseline("mpi-migrate", la) for la in labels - ] - - # Add one tick and xlabel per VM size - xticks.append(x_offset + len(labels) / 2) - xticklabels.append( - "{} VMs\n({} Jobs)".format(n_vms, num_tasks[ind]) - ) - - bplot = ax.boxplot( - ys, - sym="", - vert=True, - positions=xs, - patch_artist=True, - widths=0.5, - ) - - for (box, color) in zip(bplot["boxes"], colors): - box.set_facecolor(color) - box.set_edgecolor("black") - - ax.set_ylim(bottom=0) - ax.set_ylabel("# cross-VM links") - ax.set_xticks(xticks, labels=xticklabels, fontsize=6) - - # Manually craft legend - legend_entries = [ - Patch( - color=get_color_for_baseline("mpi-migrate", label), - label=get_label_for_baseline("mpi-migrate", label), - ) - for label in labels - ] - ax.legend(handles=legend_entries, ncols=2, fontsize=8) - elif plot_name == "used_vmsecs": - # TODO: FIXME: move to granny-evict - labels = ["slurm", "batch", "granny", "granny-migrate"] - - xs = [] - ys = [] - colors = [] - xticks = [] - xticklabels = [] - - # Integral of idle CPU cores over time - cumsum_ys = {} - for la in labels: - cumsum_ys[la] = {} - - for n_vms in num_vms: - timestamps = list(results[n_vms][la]["ts_idle_vms"].keys()) - - cumsum = cum_sum( - timestamps, - [ - (n_vms - res) - for res in list( - results[n_vms][la]["ts_idle_vms"].values() - ) - ], - ) - # TODO: delete me - if n_vms == 16: - print( - n_vms, - la, - cumsum, - [ - res - for res in list( - results[n_vms][la]["ts_idle_vms"].values() - ) - ], - ) - - cumsum_ys[la][n_vms] = cumsum - - # WARNING: this plot reads num_vms as an array - xs = [ind for ind in range(len(num_vms))] - xticklabels = [] - for (n_vms, n_tasks) in zip(num_vms, num_tasks): - xticklabels.append("{} VMs\n({} Jobs)".format(n_vms, n_tasks)) - for la in labels: - ys = [cumsum_ys[la][n_vms] for n_vms in num_vms] - ax.plot( - xs, - ys, - color=get_color_for_baseline("mpi-migrate", la), - label=get_label_for_baseline("mpi-migrate", la), - linestyle="-", - marker=".", - ) - - ax.set_ylim(bottom=0) - ax.set_xlim(left=-0.25) - ax.set_ylabel("Used VM Seconds", fontsize=8) - ax.set_xticks(xs, labels=xticklabels, fontsize=6) - ax.ticklabel_format(axis="y", style="sci", scilimits=(3, 3)) - ax.legend(fontsize=6, ncols=2) diff --git a/tasks/util/planner.py b/tasks/util/planner.py index fa8e766..d5818d0 100644 --- a/tasks/util/planner.py +++ b/tasks/util/planner.py @@ -2,6 +2,7 @@ get_available_hosts as planner_get_available_hosts, get_in_fligh_apps as planner_get_in_fligh_apps, ) +from math import ceil from time import sleep @@ -33,6 +34,10 @@ def get_num_available_slots_from_in_flight_apps( user_id=None, num_evicted_vms=None, openmp=False, + # Used to leave some slack CPUs to help de-fragment (for `mpi-locality`) + next_task_size=None, + # Used to make Granny behave like batch (for `mpi-locality`) + batch=False, ): """ For Granny baselines, we cannot use static knowledge of the @@ -62,6 +67,10 @@ def get_num_available_slots_from_in_flight_apps( ] ) + used_slots_map = { + host.ip: host.usedSlots for host in available_hosts.hosts + } + next_evicted_vm_ips = [] try: next_evicted_vm_ips = in_flight_apps.nextEvictedVmIps @@ -87,7 +96,6 @@ def get_num_available_slots_from_in_flight_apps( # sleep for a bit and ask again (we allow the size to go over the # specified size in case of an elsatic scale-up) if any([len(app.hostIps) < app.size for app in in_flight_apps.apps]): - print("App not fully in-flight. We wait...") sleep(short_sleep_secs) continue @@ -146,14 +154,36 @@ def get_num_available_slots_from_in_flight_apps( ] ) + # In a batch setting, we allocate resources to jobs at VM granularity + # The planner will by default do so, if enough free VMs are available + if batch and next_task_size is not None: + num_needed_vms = ceil(next_task_size / num_cpus_per_vm) + if ( + num_vms - len(list(worker_occupation.keys())) + ) < num_needed_vms: + sleep(5 * long_sleep_secs) + continue + num_available_slots = ( num_vms - len(list(worker_occupation.keys())) ) * num_cpus_per_vm for ip in worker_occupation: + if worker_occupation[ip] != used_slots_map[ip]: + print( + "Inconsistent worker used slots map for ip: {}".format(ip) + ) + must_hold_back = True + break + num_available_slots += num_cpus_per_vm - worker_occupation[ip] + if must_hold_back: + sleep(long_sleep_secs) + continue + # Double-check the number of available slots with our other source of truth - if user_id is not None and num_available_slots != available_slots: + # are consistent with the in-flight apps? + if num_available_slots != available_slots: print( "WARNING: inconsistency in the number of available slots" " (in flight: {} - registered: {})".format( @@ -163,6 +193,18 @@ def get_num_available_slots_from_in_flight_apps( sleep(short_sleep_secs) continue + # TODO: decide on the percentage, 10% or 5% ? + # with 10% we almost always have perfect locality + pctg = 0.05 + if ( + next_task_size is not None + and not batch + and (num_available_slots - next_task_size) + < int(num_vms * num_cpus_per_vm * pctg) + ): + sleep(long_sleep_secs) + continue + # If we have made it this far, we are done break diff --git a/tasks/util/plot.py b/tasks/util/plot.py index 3d3e3a4..e718002 100644 --- a/tasks/util/plot.py +++ b/tasks/util/plot.py @@ -24,6 +24,9 @@ PLOT_PATTERNS = ["//", "\\\\", "||", "-", "*-", "o-"] SINGLE_COL_FIGSIZE = (6, 3) +SINGLE_COL_FIGSIZE_HALF = (3, 3) +DOUBLE_COL_FIGSIZE_HALF = SINGLE_COL_FIGSIZE +DOUBLE_COL_FIGSIZE_THIRD = (4, 4) def fix_hist_step_vertical_line_at_end(ax): @@ -74,7 +77,7 @@ def _do_get_for_baseline(workload, baseline, color=False, label=False): return this_label if color: return _PLOT_COLORS[this_label] - if baseline == "batch" or baseline == "slurm": + if baseline in ["slurm", "batch"]: this_label = baseline if label: return this_label @@ -87,6 +90,32 @@ def _do_get_for_baseline(workload, baseline, color=False, label=False): ) ) + if workload == "mpi-locality": + if baseline == "granny": + this_label = "slurm" + if label: + return this_label + if color: + return _PLOT_COLORS[this_label] + if baseline == "granny-migrate": + this_label = "granny" + if label: + return this_label + if color: + return _PLOT_COLORS[this_label] + if baseline == "granny-batch": + this_label = "batch" + if label: + return this_label + if color: + return _PLOT_COLORS[this_label] + + raise RuntimeError( + "Unrecognised baseline ({}) for workload: {}".format( + baseline, workload + ) + ) + if workload == "mpi-spot": if baseline == "granny": this_label = "granny" diff --git a/tasks/util/spot.py b/tasks/util/spot.py index 7700867..ea85f28 100644 --- a/tasks/util/spot.py +++ b/tasks/util/spot.py @@ -108,7 +108,7 @@ def _do_plot_makespan(results, ax, **kwargs): if tight: ax.set_xticks([]) else: - ax.set_xticks(xticks, labels=xticklabels, fontsize=6) + ax.set_xticks(xticks, labels=xticklabels) def _do_plot_cost(results, ax, **kwargs): @@ -203,11 +203,9 @@ def _do_plot_cost(results, ax, **kwargs): ) else: ax.text( - xs[-1] + 0.5, + xs[-1] - 0.5, ys[discount][-1] + 0.0001, "{}% off".format(discount), - fontsize=6, - rotation=90, ) bottom_ys = ys[discount] @@ -231,7 +229,7 @@ def _do_plot_cost(results, ax, **kwargs): else: ax.set_ylabel("Cost [VM Hours]") ax.legend(fontsize=8) - ax.set_xticks(xticks, labels=xticklabels, fontsize=6) + ax.set_xticks(xticks, labels=xticklabels) def plot_spot_results(plot_name, results, ax, **kwargs):