Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

makespan: add a few more plots #39

Merged
merged 1 commit into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tasks/lammps/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Upload the WASM file:
and run the experiment with:

```bash
# TODO: consider making this experiment shorter
# TODO: do we need the network workload?
(faasm-exp-faabric) inv lammps.run.wasm -w compute -w network
```

Expand Down
5 changes: 4 additions & 1 deletion tasks/lammps/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ def native(ctx, w, repeats=1):
"""
num_cpus_per_vm = 8
num_vms = 2
# data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0],
# TODO: is this a good idea? FIXME FIXME DELETE ME
data_file = get_faasm_benchmark("compute")["data"][0]

for workload in w:
if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS:
Expand All @@ -121,7 +124,7 @@ def native(ctx, w, repeats=1):

native_cmdline = "-in {}/{}.faasm.native".format(
LAMMPS_MIGRATION_NET_DOCKER_DIR,
get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0],
data_file,
)

for nproc in NPROCS_EXPERIMENT:
Expand Down
127 changes: 126 additions & 1 deletion tasks/makespan/plot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from invoke import task
from matplotlib.pyplot import subplots
from matplotlib.pyplot import subplots, subplot_mosaic
from tasks.util.makespan import (
MAKESPAN_PLOTS_DIR,
do_makespan_plot,
Expand Down Expand Up @@ -49,3 +49,128 @@ def migration(ctx):
# ----------

save_plot(fig, MAKESPAN_PLOTS_DIR, "mpi_migration")


@task
def conservative(ctx):
"""
Macrobenchmark plot showing the benefits of migrating MPI applications to
improve locality of execution. We show:
- LHS: box plot of idle vCPUs and # of cross-VM links for all VM sizes
- RHS: timeseries of one of the box plots
"""
# NOTE: probably we want highter num-tasks here to make sure we migrate
# more
# num_vms = [16, 24, 32, 48, 64]
# num_tasks = [50, 75, 100, 150, 200]
num_vms = [8, 16, 24]
num_tasks = [25, 50, 75]
num_cpus_per_vm = 8

# RHS: zoom in one of the bars
timeseries_num_vms = 16 # 32
timeseries_num_tasks = 50 # 100

results = {}
for (n_vms, n_tasks) in zip(num_vms, num_tasks):
results[n_vms] = read_makespan_results(n_vms, n_tasks, num_cpus_per_vm)

fig, ax = subplot_mosaic([['upper left', 'upper right'],
['lower left', 'lower right']])

# ----------
# Plot 1: boxplot of idle vCPUs and num xVM links for various cluster sizes
# ----------

do_makespan_plot(
"boxplot_vcpus",
results,
ax["upper left"],
num_vms,
num_tasks
)

do_makespan_plot(
"percentage_xvm",
results,
ax["lower left"],
num_vms,
num_tasks
)

# ----------
# Plot 2: (two) timeseries of one of the cluster sizes
# ----------

do_makespan_plot(
"ts_vcpus",
results,
ax["upper right"],
timeseries_num_vms,
timeseries_num_tasks
)

do_makespan_plot(
"ts_xvm_links",
results,
ax["lower right"],
timeseries_num_vms,
timeseries_num_tasks
)

# ax[0][0].legend()
save_plot(fig, MAKESPAN_PLOTS_DIR, "resource_usage")


@task
def eviction(ctx):
"""
Macrobenchmark plot showing the benefits of migrating MPI applications to
evict idle VMs.
- LHS: Bar plot of the VMseconds used per execution
- RHS: timeseries of the number of idle VMs over time
"""
# NOTE: probably we want lower num-tasks here to just show the benefits
# at the tails
# num_vms = [16, 24, 32, 48, 64]
# num_tasks = [50, 75, 100, 150, 200]
num_vms = [8, 16, 24]
num_tasks = [25, 50, 75]
num_cpus_per_vm = 8

# RHS: zoom in one of the bars
timeseries_num_vms = 16 # 32
timeseries_num_tasks = 50 # 100

results = {}
for (n_vms, n_tasks) in zip(num_vms, num_tasks):
results[n_vms] = read_makespan_results(n_vms, n_tasks, num_cpus_per_vm)

fig, ax = subplot_mosaic([['left', 'right'],
['left', 'right']])

# ----------
# Plot 1: bar plot of the CPUsecs per execution
# ----------

do_makespan_plot(
"used_vmsecs",
results,
ax["left"],
num_vms,
num_tasks
)

# ----------
# Plot 2: timeseries of one of the cluster sizes
# ----------

do_makespan_plot(
"ts_idle_vms",
results,
ax["right"],
timeseries_num_vms,
timeseries_num_tasks
)

save_plot(fig, MAKESPAN_PLOTS_DIR, "idle_vms")
34 changes: 19 additions & 15 deletions tasks/makespan/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
EXEC_TASK_INFO_FILE_PREFIX,
GRANNY_BASELINES,
IDLE_CORES_FILE_PREFIX,
NATIVE_BASELINES,
init_csv_file,
get_idle_core_count_from_task_info,
get_num_cpus_per_vm_from_trace,
Expand Down Expand Up @@ -73,7 +74,7 @@ def granny(
@task()
def native_slurm(
ctx,
workload="all",
workload="mpi-migrate",
num_vms=32,
num_cpus_per_vm=8,
num_tasks=100,
Expand All @@ -96,7 +97,7 @@ def native_slurm(
@task()
def native_batch(
ctx,
workload="all",
workload="mpi-migrate",
num_vms=32,
num_cpus_per_vm=8,
num_tasks=100,
Expand Down Expand Up @@ -152,22 +153,25 @@ def _do_run(baseline, num_vms, trace):

executed_task_info = scheduler.run(baseline, task_trace)

num_idle_cores_per_time_step = get_idle_core_count_from_task_info(
baseline,
executed_task_info,
task_trace,
num_vms,
num_cpus_per_vm,
)
for time_step in num_idle_cores_per_time_step:
write_line_to_csv(
# For granny we get the idle cores as we run the experiment, from the
# planner
if baseline in NATIVE_BASELINES:
num_idle_cores_per_time_step = get_idle_core_count_from_task_info(
baseline,
IDLE_CORES_FILE_PREFIX,
executed_task_info,
task_trace,
num_vms,
trace,
time_step,
num_idle_cores_per_time_step[time_step],
num_cpus_per_vm,
)
for time_step in num_idle_cores_per_time_step:
write_line_to_csv(
baseline,
IDLE_CORES_FILE_PREFIX,
num_vms,
trace,
time_step,
num_idle_cores_per_time_step[time_step],
)

# Finally shutdown the scheduler
scheduler.shutdown()
Expand Down
Loading
Loading