Skip to content

Commit a6d859b

Browse files
committed
migration: ubench fixes
1 parent da421c1 commit a6d859b

5 files changed

Lines changed: 54 additions & 35 deletions

File tree

tasks/migration/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ applications to benefit from dynamic changes in the compute environment.
66
First, provision the cluster:
77

88
```bash
9-
(faasm-exp-pase) inv cluster.provision --vm Standard_D8_v5 --nodes 3 --name ${CLUSTER_NAME}
10-
(faasm-exp-base) inv cluster.credentials --name ${CLUSTER_NAME}
9+
inv cluster.provision --vm Standard_D8_v5 --nodes 3 --name ${CLUSTER_NAME}
10+
inv cluster.credentials --name ${CLUSTER_NAME}
1111
```
1212

1313
Second, deploy the cluster

tasks/migration/oracle.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
get_faasm_benchmark,
2525
get_lammps_migration_params,
2626
)
27+
from tasks.util.plot import save_plot
2728
from time import sleep
2829

2930

@@ -54,14 +55,14 @@ def calculate_cross_vm_links(part):
5455

5556

5657
@task()
57-
def run(ctx, workload="network", nprocs=None):
58+
def run(ctx, workload="very-network", nprocs=None):
5859
"""
5960
Experiment to measure the benefits of migration in isolation
6061
"""
6162
# Work out the number of processes to run with
62-
num_procs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
63+
num_procs = [2, 3, 4, 5, 6, 7, 8] # , 9, 10, 11, 12, 13, 14, 15, 16]
6364
num_cpus_per_vm = 8
64-
num_vms = 16
65+
num_vms = 8 # 16
6566
if nprocs is not None:
6667
num_procs = [int(nprocs)]
6768

@@ -160,20 +161,17 @@ def do_write_csv_line(csv_name, part, xvm_links, actual_time):
160161

161162

162163
@task
163-
def plot(ctx):
164+
def plot(ctx, workload="very-network"):
164165
plots_dir = join(PLOTS_ROOT, "migration")
165166
makedirs(plots_dir, exist_ok=True)
166-
out_file = join(
167-
plots_dir, "migration_oracle_{}.pdf".format(LAMMPS_SIM_WORKLOAD)
168-
)
169167

170168
results_dir = join(PROJ_ROOT, "results", "migration")
171169
result_dict = {}
172170

173171
for csv in glob(
174172
join(
175173
results_dir,
176-
"migration_oracle_{}_*.csv".format(LAMMPS_SIM_WORKLOAD),
174+
"migration_oracle_{}_*.csv".format(workload),
177175
)
178176
):
179177
num_procs = csv.split("_")[-1].split(".")[0]
@@ -193,13 +191,12 @@ def plot(ctx):
193191
float(line.split(",")[-1])
194192
)
195193

196-
print(result_dict)
197194
num_plots = len(result_dict)
198195
num_cols = 4
199196
num_rows = ceil(num_plots / num_cols)
200197
fig, axes = subplots(nrows=num_rows, ncols=num_cols)
201198
fig.suptitle(
202-
"Correlation between execution time (Y) and x-VM links (X)\n(wload: compute)"
199+
"Correlation between execution time (Y) and x-VM links (X)\n(wload: {})".format(workload)
203200
)
204201

205202
def do_plot(ax, results, num_procs):
@@ -213,6 +210,4 @@ def do_plot(ax, results, num_procs):
213210
axes[int(i / 4)][int(i % 4)], result_dict[num_procs], num_procs
214211
)
215212

216-
fig.tight_layout()
217-
savefig(out_file, format="pdf") # , bbox_inches="tight")
218-
print("Plot saved to: {}".format(out_file))
213+
save_plot(fig, join(PLOTS_ROOT, "migration"), "migration_oracle_{}".format(workload))

tasks/migration/plot.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from tasks.util.plot import save_plot
1010

1111

12-
ALL_WORKLOADS = ["compute", "network"]
12+
ALL_WORKLOADS = ["all-to-all", "compute", "network", "og-network", "very-network"]
1313

1414

1515
def _read_results():
@@ -43,8 +43,11 @@ def plot(ctx):
4343
"""
4444
migration_results = _read_results()
4545

46+
do_plot("all-to-all", migration_results)
4647
do_plot("compute", migration_results)
4748
do_plot("network", migration_results)
49+
do_plot("very-network", migration_results)
50+
do_plot("og-network", migration_results)
4851

4952

5053
def do_plot(workload, migration_results):

tasks/migration/run.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44
from os import makedirs
55
from os.path import basename, join
66
from tasks.migration.util import generate_host_list
7-
from tasks.util.env import RESULTS_DIR
7+
from tasks.util.env import (
8+
MPI_MIGRATE_FAASM_FUNC,
9+
MPI_MIGRATE_FAASM_USER,
10+
RESULTS_DIR,
11+
)
812
from tasks.util.faasm import (
913
get_faasm_exec_time_from_json,
1014
post_async_msg_and_get_result_json,
@@ -48,23 +52,24 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8):
4852
"""
4953
num_vms = len(get_faasm_worker_ips())
5054
assert num_vms == 2, "Expected 2 VMs got: {}!".format(num_vms)
51-
# data_file = basename(get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0])
52-
# TODO: is this a good idea? FIXME FIXME DELETE ME
53-
data_file = basename(get_faasm_benchmark("compute")["data"][0])
5455

5556
if check_in is None:
5657
check_array = [0, 2, 4, 6, 8, 10]
5758
else:
5859
check_array = [int(check_in)]
5960

6061
for workload in w:
61-
if workload not in LAMMPS_SIM_WORKLOAD_CONFIGS:
62+
if workload != "all-to-all" and workload not in LAMMPS_SIM_WORKLOAD_CONFIGS:
6263
print(
6364
"Unrecognised workload config ({}) must be one in: {}".format(
6465
workload, LAMMPS_SIM_WORKLOAD.keys()
6566
)
6667
)
67-
workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
68+
raise RuntimeError("Unrecognised workload: {}".format(workload))
69+
70+
if workload != "all-to-all":
71+
workload_config = LAMMPS_SIM_WORKLOAD_CONFIGS[workload]
72+
data_file = basename(get_faasm_benchmark(workload_config["data_file"])["data"][0])
6873

6974
csv_name = "migration_{}.csv".format(workload)
7075
_init_csv_file(csv_name)
@@ -75,37 +80,41 @@ def run(ctx, w, check_in=None, repeats=1, num_cores_per_vm=8):
7580

7681
# Print progress
7782
print(
78-
"Running migration micro-benchmark (wload:"
83+
"Running migration micro-benchmark (wload: "
7984
+ "{} - check-at: {} - repeat: {}/{})".format(
8085
workload, check, run_num + 1, repeats
8186
)
8287
)
8388

84-
"""
85-
TODO: do we want to keep the all-to-all baseline?
8689
if workload == "all-to-all":
8790
num_loops = 100000
8891
user = MPI_MIGRATE_FAASM_USER
8992
func = MPI_MIGRATE_FAASM_FUNC
9093
cmdline = "{} {}".format(
9194
check if check != 0 else 5, num_loops
9295
)
93-
"""
96+
input_data = None
97+
else:
98+
user = LAMMPS_FAASM_USER
99+
func = LAMMPS_FAASM_MIGRATION_NET_FUNC
100+
cmdline = "-in faasm://lammps-data/{}".format(data_file)
101+
input_data = get_lammps_migration_params(
102+
check_every=check if check != 0 else 5,
103+
num_loops=5,
104+
num_net_loops=workload_config["num_net_loops"],
105+
chunk_size=workload_config["chunk_size"],
106+
)
94107

95-
# Run LAMMPS
96-
cmdline = "-in faasm://lammps-data/{}".format(data_file)
97108
msg = {
98-
"user": LAMMPS_FAASM_USER,
99-
"function": LAMMPS_FAASM_MIGRATION_NET_FUNC,
109+
"user": user,
110+
"function": func,
100111
"cmdline": cmdline,
101112
"mpi_world_size": int(num_cores_per_vm),
102-
"input_data": get_lammps_migration_params(
103-
num_loops=5,
104-
num_net_loops=workload_config["num_net_loops"],
105-
chunk_size=workload_config["chunk_size"],
106-
),
107113
}
108114

115+
if input_data is not None:
116+
msg["input_data"] = input_data
117+
109118
if check == 0:
110119
# Setting a check fraction of 0 means we don't
111120
# under-schedule. We use it as a baseline

tasks/util/lammps.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,25 @@
4949

5050
LAMMPS_SIM_WORKLOAD_CONFIGS = {
5151
"compute": {
52+
"data_file": "compute",
5253
"num_net_loops": 0,
5354
"chunk_size": 0,
5455
},
5556
"network": {
57+
"data_file": "compute",
5658
"num_net_loops": LAMMPS_SIM_NUM_NET_LOOPS,
5759
"chunk_size": LAMMPS_SIM_CHUNK_SIZE,
5860
},
61+
"very-network": {
62+
"data_file": "compute",
63+
"num_net_loops": 1e5,
64+
"chunk_size": 10,
65+
},
66+
"og-network": {
67+
"data_file": "network",
68+
"num_net_loops": 0,
69+
"chunk_size": 0,
70+
},
5971
}
6072

6173
# Different supported LAMMPS benchmarks

0 commit comments

Comments
 (0)