Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit d980e66

Browse files
committedMay 9, 2024·
makespan(elastic): working on compose
1 parent 264ead1 commit d980e66

17 files changed

+573
-126
lines changed
 

‎tasks/elastic/run.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def wasm(ctx, num_threads=None, elastic=False, repeats=1):
8787
"user": user,
8888
"function": func,
8989
"cmdline": cmdline,
90-
"input_data": get_elastic_input_data(),
90+
"input_data": get_elastic_input_data(num_loops=2),
9191
"isOmp": True,
9292
"ompNumThreads": nthread,
9393
}

‎tasks/makespan/elastic.md

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# Makespan Experiment (Elastic Scaling Version)
2+
3+
In this experiment we study the benefits of using Granules to elastically
4+
scale shared memory applications when there are idle CPU cores.
5+
6+
For each experiment run, we increase the cluster size (in terms of number of
7+
VMs) and the number of jobs in the tasks, proportionally.
8+
9+
Re-run the following instructions with the following values:
10+
11+
```bash
12+
NUM_VMS=8,16,24,32
13+
NUM_TASKS=25,50,75,100
14+
```
15+
16+
## Deploy the cluster
17+
18+
First, to deploy the cluster, run:
19+
20+
```bash
21+
export NUM_VMS=
22+
export NUM_TASKS=
23+
24+
inv cluster.provision --vm Standard_D8_v5 --nodes $((${NUM_VMS} + 1))
25+
inv cluster.credentials
26+
```
27+
28+
## Native (OpenMPI)
29+
30+
First, deploy the native `k8s` cluster:
31+
32+
```bash
33+
inv makespan.native.deploy --num-vms ${NUM_VMS}
34+
```
35+
36+
Now, you can run the different baselines, with and without spot VMs:
37+
38+
```bash
39+
# TODO: native batch and native slurm should be the same for OpenMP ?
40+
inv makespan.run.native-batch --workload omp-elastic --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
41+
inv makespan.run.native-slurm --workload omp-elastic --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
42+
```
43+
44+
Once you are done, you may remove the native OpenMPI cluster:
45+
46+
```bash
47+
inv makespan.native.delete
48+
```
49+
50+
## Granny
51+
52+
To run the Granny baseline, first deploy the cluster:
53+
54+
```bash
55+
faasmctl deploy.k8s --workers=${NUM_VMS}
56+
```
57+
58+
Second, upload the corresponding WASM files:
59+
60+
```bash
61+
inv makespan.wasm.upload
62+
```
63+
64+
Third, run the experiment:
65+
66+
```bash
67+
# No elastic scaling
68+
inv makespan.run.granny --workload omp-elastic --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
69+
70+
# Elastic scaling
71+
inv makespan.run.granny --workload omp-elastic --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} --elastic
72+
```
73+
74+
During an experiment, you may monitor the state of the cluster (in a separete
75+
shell) by using:
76+
77+
```bash
78+
faasmctl monitor.planner --policy spot
79+
```
80+
81+
Once you are done, you may delete the cluster:
82+
83+
```bash
84+
faasmctl delete
85+
```
86+
87+
## Delete the AKS cluster
88+
89+
Once you are done with the cluster, run:
90+
91+
```bash
92+
inv cluster.delete
93+
```
94+
95+
then you may move to the next (cluster size, batch size) pair.
96+
97+
## Plot the results
98+
99+
Finally, you may plot the results with:
100+
101+
```bash
102+
inv makespan.plot.elastic
103+
```

‎tasks/makespan/run.py

+34-35
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,16 @@
3232

3333

3434
def _get_workload_from_cmdline(workload):
35-
base_workloads = ["mpi", "omp", "mix"]
36-
exp_workloads = ["mpi-migrate"]
37-
all_workloads = ["mix", "mpi", "mpi-migrate", "mpi-evict", "mpi-spot", "omp", "omp-elastic"]
38-
if workload == "all":
39-
workload = all_workloads
40-
elif workload == "base":
41-
workload = base_workloads
42-
elif workload == "exp":
43-
workload = exp_workloads
44-
elif workload in all_workloads:
45-
workload = [workload]
46-
else:
35+
# TODO: rename mpi-migrate to something like mpi-locality
36+
all_workloads = ["mpi-evict", "mpi-migrate", "mpi-spot", "omp-elastic"]
37+
38+
if workload not in all_workloads:
4739
raise RuntimeError(
4840
"Unrecognised workload: {}. Must be one in: {}".format(
4941
workload, all_workloads
5042
)
5143
)
44+
5245
return workload
5346

5447

@@ -59,25 +52,33 @@ def granny(
5952
num_vms=32,
6053
num_cpus_per_vm=8,
6154
num_tasks=100,
55+
# Optional flag for mpi-migrate workload to migrate to improve locality
6256
migrate=False,
57+
# Optional flag for mpi-spot workload to inject faults
6358
fault=False,
59+
# Optional flag for omp-elastic workload to elastically use idle CPUs
60+
elastic=False,
61+
# Mandatory flag for the mpi-evict workload (not in the paper)
6462
num_users=None,
6563
):
6664
"""
67-
Run: `inv makespan.run.granny --workload [mpi-migrate,mpi-evict,omp]
65+
Run: `inv makespan.run.granny --workload [mpi-migrate,mpi-spot,omp-elastic]
6866
"""
6967
# Work-out the baseline name from the arguments
7068
baseline = "granny"
7169
if migrate:
70+
assert workload == "mpi-migrate", "--migrate flag should only be used with mpi-migrate workload!"
7271
baseline = "granny-migrate"
7372
if fault:
73+
assert workload == "mpi-spot", "--fault flag should only be used with mpi-spot workload!"
7474
baseline = "granny-ft"
75+
if elastic:
76+
assert workload == "omp-elastic", "--fault flag should only be used with omp-elastic workload!"
77+
baseline = "granny-elastic"
7578

7679
workload = _get_workload_from_cmdline(workload)
77-
for wload in workload:
78-
trace = get_trace_from_parameters(wload, num_tasks, num_cpus_per_vm)
79-
_do_run(baseline, num_vms, trace, num_users)
80-
sleep(5)
80+
trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
81+
_do_run(baseline, num_vms, trace, num_users)
8182

8283

8384
@task()
@@ -99,15 +100,13 @@ def native_slurm(
99100
baseline = "slurm-ft"
100101

101102
workload = _get_workload_from_cmdline(workload)
102-
for wload in workload:
103-
trace = get_trace_from_parameters(wload, num_tasks, num_cpus_per_vm)
104-
_do_run(
105-
baseline,
106-
num_vms,
107-
trace,
108-
num_users,
109-
)
110-
sleep(5)
103+
trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
104+
_do_run(
105+
baseline,
106+
num_vms,
107+
trace,
108+
num_users,
109+
)
111110

112111

113112
@task()
@@ -129,15 +128,13 @@ def native_batch(
129128
baseline = "batch-ft"
130129

131130
workload = _get_workload_from_cmdline(workload)
132-
for wload in workload:
133-
trace = get_trace_from_parameters(wload, num_tasks, num_cpus_per_vm)
134-
_do_run(
135-
baseline,
136-
num_vms,
137-
trace,
138-
num_users,
139-
)
140-
sleep(5)
131+
trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
132+
_do_run(
133+
baseline,
134+
num_vms,
135+
trace,
136+
num_users,
137+
)
141138

142139

143140
def _do_run(baseline, num_vms, trace, num_users):
@@ -169,6 +166,8 @@ def _do_run(baseline, num_vms, trace, num_users):
169166
set_planner_policy("bin-pack")
170167
elif job_workload == "mpi-spot":
171168
set_planner_policy("spot")
169+
elif job_workload == "omp-elastic":
170+
set_planner_policy("bin-pack")
172171

173172
scheduler = BatchScheduler(
174173
baseline,

‎tasks/makespan/scheduler.py

+55-72
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
get_faasm_worker_ips,
33
get_faasm_worker_names,
44
)
5-
from faasmctl.util.flush import flush_workers
65
from faasmctl.util.planner import (
76
get_in_fligh_apps as planner_get_in_fligh_apps,
87
set_next_evicted_host as planner_set_next_evicted_host,
@@ -25,17 +24,19 @@
2524
TaskObject,
2625
WorkQueueItem,
2726
)
28-
from tasks.makespan.env import (
29-
DGEMM_DOCKER_BINARY,
30-
DGEMM_FAASM_FUNC,
31-
DGEMM_FAASM_USER,
32-
get_dgemm_cmdline,
27+
from tasks.util.elastic import (
28+
ELASTIC_KERNEL,
29+
OPENMP_ELASTIC_FUNCTION,
30+
OPENMP_ELASTIC_NATIVE_BINARY,
31+
OPENMP_ELASTIC_USER,
32+
get_elastic_input_data,
3333
)
3434
from tasks.util.faasm import (
3535
get_faasm_exec_time_from_json,
3636
has_app_failed,
3737
post_async_msg_and_get_result_json,
3838
)
39+
from tasks.util.kernels import get_openmp_kernel_cmdline
3940
from tasks.util.k8s import wait_for_pods as wait_for_native_mpi_pods
4041
from tasks.util.lammps import (
4142
LAMMPS_FAASM_USER,
@@ -53,12 +54,14 @@
5354
ALLOWED_BASELINES,
5455
EXEC_TASK_INFO_FILE_PREFIX,
5556
GRANNY_BASELINES,
57+
GRANNY_ELASTIC_BASELINES,
5658
GRANNY_FT_BASELINES,
5759
GRANNY_MIGRATE_BASELINES,
5860
MPI_MIGRATE_WORKLOADS,
5961
MPI_WORKLOADS,
6062
NATIVE_BASELINES,
6163
NATIVE_FT_BASELINES,
64+
OPENMP_WORKLOADS,
6265
SCHEDULING_INFO_FILE_PREFIX,
6366
get_num_cpus_per_vm_from_trace,
6467
get_user_id_from_task,
@@ -255,10 +258,7 @@ def thread_print(msg):
255258

256259
# Choose the right data file if running a LAMMPS simulation
257260
if work_item.task.app in MPI_WORKLOADS:
258-
# We always use the same LAMMPS benchmark ("compute-xl")
259-
# TODO: FIXME: delete me!
260-
data_file = get_faasm_benchmark("compute")["data"][0]
261-
# data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]
261+
data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]
262262

263263
# Record the start timestamp
264264
start_ts = 0
@@ -301,13 +301,11 @@ def thread_print(msg):
301301
"su mpirun -c '{}'".format(mpirun_cmd),
302302
]
303303
exec_cmd = " ".join(exec_cmd)
304-
elif work_item.task.app == "omp":
305-
# TODO(omp): should we set the parallelism level to be
306-
# min(work_item.task.size, num_slots_per_vm) ? I.e. what will
307-
# happen when we oversubscribe?
308-
openmp_cmd = "bash -c '{} {}'".format(
309-
DGEMM_DOCKER_BINARY,
310-
get_dgemm_cmdline(work_item.task.size),
304+
elif work_item.task.app in OPENMP_WORKLOADS:
305+
openmp_cmd = "bash -c '{} {} {}'".format(
306+
get_elastic_input_data(native=True),
307+
OPENMP_ELASTIC_NATIVE_BINARY,
308+
get_openmp_kernel_cmdline(ELASTIC_KERNEL, work_item.task.size),
311309
)
312310

313311
exec_cmd = [
@@ -358,43 +356,40 @@ def thread_print(msg):
358356
msg["input_data"] = get_lammps_migration_params(
359357
check_every=check_every
360358
)
361-
elif work_item.task.app == "omp":
359+
elif work_item.task.app in OPENMP_WORKLOADS:
362360
if work_item.task.size > num_cpus_per_vm:
363361
print(
364362
"Requested OpenMP execution with more parallelism"
365363
"than slots in the current environment:"
366364
"{} > {}".format(work_item.task.size, num_cpus_per_vm)
367365
)
368366
raise RuntimeError("Error in OpenMP task trace!")
369-
user = DGEMM_FAASM_USER
370-
func = "{}_{}".format(DGEMM_FAASM_FUNC, work_item.task.task_id)
367+
user = OPENMP_ELASTIC_USER
368+
func = OPENMP_ELASTIC_FUNCTION
371369
msg = {
372370
"user": user,
373371
"function": func,
374-
# The input_data is the number of OMP threads
375-
"cmdline": get_dgemm_cmdline(work_item.task.size),
372+
"input_data": get_elastic_input_data(),
373+
"cmdline": get_openmp_kernel_cmdline(ELASTIC_KERNEL, work_item.task.size),
374+
"isOmp": True,
375+
"ompNumThreads": work_item.task.size,
376376
}
377377

378378
req["user"] = user
379379
req["function"] = func
380+
req["singleHostHint"] = True
381+
req["elasticScaleHint"] = baseline in GRANNY_ELASTIC_BASELINES
380382

381-
start_ts = time()
382383
# Post asynch request and wait for JSON result
383-
try:
384-
result_json = post_async_msg_and_get_result_json(msg, req_dict=req)
385-
actual_time = int(get_faasm_exec_time_from_json(result_json))
386-
has_failed = has_app_failed(result_json)
387-
thread_print(
388-
"Finished executiong app {} (time: {})".format(
389-
result_json[0]["appId"], actual_time
390-
)
391-
)
392-
except RuntimeError:
393-
print("WEE EVER HERE?? DELETE THIS CATCH")
394-
actual_time = -1
395-
sch_logger.error(
396-
"Error executing task {}".format(work_item.task.task_id)
384+
start_ts = time()
385+
result_json = post_async_msg_and_get_result_json(msg, req_dict=req)
386+
actual_time = int(get_faasm_exec_time_from_json(result_json))
387+
has_failed = has_app_failed(result_json)
388+
thread_print(
389+
"Finished executiong app {} (time: {})".format(
390+
result_json[0]["appId"], actual_time
397391
)
392+
)
398393

399394
end_ts = time()
400395

@@ -432,7 +427,7 @@ class SchedulerState:
432427
# number of cpus per vm
433428
trace_str: str
434429
# The workload indicates the type of application we are runing. It can
435-
# either be `omp` or `mpi-migrate`, or `mpi-evict`
430+
# either be `omp-elastic` or `mpi-migrate` or `mpi-evict` or `mpi-spot`
436431
workload: str
437432
num_tasks: int
438433
num_cpus_per_vm: int
@@ -833,16 +828,25 @@ def num_available_slots_from_vm_list(self, vm_list):
833828
# Helper method to know if we have enough slots to schedule a task
834829
def have_enough_slots_for_task(self, task: TaskObject):
835830
if self.state.baseline in NATIVE_BASELINES:
836-
# For `mpi-evict` we run a multi-tenant trace, and prevent apps
837-
# from different users from running in the same VM
838831
if self.state.workload == "mpi-evict":
832+
# For `mpi-evict` we run a multi-tenant trace, and prevent apps
833+
# from different users from running in the same VM
839834
sorted_vms = sorted(
840835
self.state.vm_map.items(), key=lambda item: item[1], reverse=True
841836
)
842837

843838
pruned_vms = self.prune_node_list_from_different_users(sorted_vms, task)
844839

845840
return self.num_available_slots_from_vm_list(pruned_vms) >= task.size
841+
elif self.state.workload in OPENMP_WORKLOADS:
842+
# For OpenMP workloads, we can only allocate them in one VM, so
843+
# we compare the requested size with the largest capacity we
844+
# have in one VM
845+
sorted_vms = sorted(
846+
self.state.vm_map.items(), key=lambda item: item[1], reverse=True
847+
)
848+
849+
return sorted_vms[0][1] >= task.size
846850
else:
847851
return self.state.total_available_slots >= task.size
848852
else:
@@ -862,6 +866,13 @@ def have_enough_slots_for_task(self, task: TaskObject):
862866
num_evicted_vms=self.state.num_faults,
863867
) >= task.size
864868

869+
if self.state.workload in OPENMP_WORKLOADS:
870+
return get_num_available_slots_from_in_flight_apps(
871+
self.state.num_vms,
872+
self.state.num_cpus_per_vm,
873+
openmp=True,
874+
) >= task.size
875+
865876
return get_num_available_slots_from_in_flight_apps(
866877
self.state.num_vms,
867878
self.state.num_cpus_per_vm
@@ -898,10 +909,12 @@ def schedule_task_to_vm(
898909
if self.state.workload == "mpi-evict":
899910
sorted_vms = self.prune_node_list_from_different_users(sorted_vms, task)
900911

912+
if self.state.workload in OPENMP_WORKLOADS:
913+
sorted_vms = [sorted_vms[0]]
914+
901915
# For GRANNY baselines we can skip the python-side accounting as the
902916
# planner has all the scheduling information
903-
# TODO(omp): why should it be any different with OpenMP?
904-
if self.state.baseline in NATIVE_BASELINES and task.app in MPI_WORKLOADS:
917+
if self.state.baseline in NATIVE_BASELINES:
905918
for vm, num_slots in sorted_vms:
906919
# Work out how many slots can we take up in this pod
907920
if self.state.baseline == "batch":
@@ -935,36 +948,6 @@ def schedule_task_to_vm(
935948
raise RuntimeError(
936949
"Scheduling error: inconsistent scheduler state"
937950
)
938-
"""
939-
elif task.app == "omp":
940-
if len(sorted_vms) == 0:
941-
# TODO: maybe we should raise an inconsistent state error here
942-
return NOT_ENOUGH_SLOTS
943-
vm, num_slots = sorted_vms[0]
944-
if num_slots == 0:
945-
# TODO: maybe we should raise an inconsistent state error here
946-
return NOT_ENOUGH_SLOTS
947-
if self.state.baseline in NATIVE_BASELINES:
948-
if task.size > self.state.num_cpus_per_vm:
949-
print(
950-
"Overcomitting for task {} ({} > {})".format(
951-
task.task_id,
952-
task.size,
953-
self.state.num_cpus_per_vm,
954-
)
955-
)
956-
num_on_this_vm = self.state.num_cpus_per_vm
957-
else:
958-
if num_slots < task.size:
959-
return NOT_ENOUGH_SLOTS
960-
num_on_this_vm = task.size
961-
962-
scheduling_decision.append((vm, num_on_this_vm))
963-
self.state.vm_map[vm] -= num_on_this_vm
964-
# TODO: when we overcommit, do we substract the number of cores
965-
# we occupy, or the ones we agree to run?
966-
self.state.total_available_slots -= num_on_this_vm
967-
"""
968951

969952
# Before returning, persist the scheduling decision to state
970953
self.state.in_flight_tasks[task.task_id] = scheduling_decision

‎tasks/makespan/spot.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ VMs) and the number of jobs in the tasks, proportionally.
99
Re-run the following instructions with the following values:
1010

1111
```bash
12-
NUM_VMS=8,16,32
13-
NUM_TASKS=25,50,100
12+
NUM_VMS=8,16,24,32
13+
NUM_TASKS=25,50,75,100
1414
```
1515

1616
## Deploy the cluster
@@ -21,7 +21,7 @@ First, to deploy the cluster, run:
2121
export NUM_VMS=
2222
export NUM_TASKS=
2323

24-
inv cluster.provision --vm Standard_D8_v5 --nodes ${NUM_VMS} + 1
24+
inv cluster.provision --vm Standard_D8_v5 --nodes $((${NUM_VMS} + 1))
2525
inv cluster.credentials
2626
```
2727

@@ -100,7 +100,7 @@ then you may move to the next (cluster size, batch size) pair.
100100

101101
## Plot the results
102102

103-
Finally, you may plot the results wiht:
103+
Finally, you may plot the results with:
104104

105105
```bash
106106
inv makespan.plot.spot

‎tasks/makespan/trace.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,14 @@ def generate(ctx, workload, num_tasks, num_cores_per_vm, lmbd="0.1"):
4545
inter_arrival_times.insert(0, 0)
4646

4747
# Work out the possible different workloads
48-
if workload == "mpi":
49-
possible_workloads = ["mpi"]
50-
elif workload == "mpi-migrate":
48+
if workload == "mpi-migrate":
5149
possible_workloads = ["mpi-migrate"]
5250
elif workload == "mpi-evict":
5351
possible_workloads = ["mpi-migrate"]
5452
elif workload == "mpi-spot":
5553
possible_workloads = ["mpi-migrate"]
56-
elif workload == "omp":
54+
elif workload == "omp-elastic":
5755
possible_workloads = ["omp"]
58-
elif workload == "mix":
59-
possible_workloads = ["mpi", "omp"]
6056
else:
6157
raise RuntimeError("Unrecognised workload: {}".format(workload))
6258

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
TaskId,App,Size,InterArrivalTimeSecs
2+
0,omp,1,0
3+
1,omp,6,3
4+
2,omp,5,19
5+
3,omp,1,19
6+
4,omp,5,9
7+
5,omp,7,3
8+
6,omp,6,0
9+
7,omp,1,26
10+
8,omp,4,17
11+
9,omp,1,6
12+
10,omp,2,3
13+
11,omp,1,17
14+
12,omp,5,47
15+
13,omp,3,24
16+
14,omp,7,1
17+
15,omp,7,6
18+
16,omp,1,17
19+
17,omp,5,21
20+
18,omp,1,7
21+
19,omp,5,0
22+
20,omp,4,3
23+
21,omp,6,15
24+
22,omp,2,16
25+
23,omp,6,10
26+
24,omp,5,15
27+
25,omp,5,4
28+
26,omp,4,1
29+
27,omp,3,0
30+
28,omp,6,12
31+
29,omp,6,12
32+
30,omp,3,8
33+
31,omp,1,6
34+
32,omp,3,6
35+
33,omp,7,15
36+
34,omp,1,9
37+
35,omp,5,1
38+
36,omp,2,2
39+
37,omp,4,3
40+
38,omp,2,39
41+
39,omp,5,6
42+
40,omp,4,15
43+
41,omp,4,7
44+
42,omp,4,14
45+
43,omp,2,0
46+
44,omp,4,0
47+
45,omp,2,17
48+
46,omp,4,0
49+
47,omp,4,38
50+
48,omp,6,1
51+
49,omp,1,2
52+
50,omp,6,0
53+
51,omp,2,3
54+
52,omp,7,13
55+
53,omp,7,2
56+
54,omp,4,18
57+
55,omp,1,12
58+
56,omp,2,1
59+
57,omp,5,3
60+
58,omp,7,12
61+
59,omp,7,15
62+
60,omp,2,29
63+
61,omp,1,2
64+
62,omp,4,32
65+
63,omp,4,8
66+
64,omp,3,0
67+
65,omp,4,12
68+
66,omp,5,3
69+
67,omp,1,3
70+
68,omp,1,2
71+
69,omp,5,7
72+
70,omp,5,5
73+
71,omp,3,36
74+
72,omp,7,36
75+
73,omp,3,10
76+
74,omp,5,2
77+
75,omp,2,3
78+
76,omp,5,8
79+
77,omp,7,3
80+
78,omp,3,9
81+
79,omp,4,11
82+
80,omp,4,1
83+
81,omp,6,36
84+
82,omp,5,2
85+
83,omp,1,1
86+
84,omp,6,8
87+
85,omp,3,26
88+
86,omp,4,0
89+
87,omp,5,25
90+
88,omp,3,6
91+
89,omp,7,9
92+
90,omp,4,3
93+
91,omp,1,16
94+
92,omp,3,0
95+
93,omp,1,4
96+
94,omp,3,20
97+
95,omp,7,2
98+
96,omp,6,8
99+
97,omp,7,9
100+
98,omp,7,7
101+
99,omp,2,12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
TaskId,App,Size,InterArrivalTimeSecs
2+
0,omp,5,0
3+
1,omp,4,5
4+
2,omp,5,1
5+
3,omp,2,0
6+
4,omp,7,13
7+
5,omp,7,1
8+
6,omp,3,19
9+
7,omp,7,9
10+
8,omp,2,11
11+
9,omp,3,0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
TaskId,App,Size,InterArrivalTimeSecs
2+
0,omp,2,0
3+
1,omp,3,6
4+
2,omp,5,14
5+
3,omp,2,19
6+
4,omp,1,12
7+
5,omp,5,11
8+
6,omp,5,10
9+
7,omp,5,13
10+
8,omp,3,20
11+
9,omp,6,29
12+
10,omp,6,2
13+
11,omp,3,0
14+
12,omp,1,15
15+
13,omp,3,2
16+
14,omp,3,12
17+
15,omp,3,2
18+
16,omp,3,7
19+
17,omp,2,9
20+
18,omp,4,5
21+
19,omp,1,17
22+
20,omp,6,4
23+
21,omp,2,7
24+
22,omp,1,5
25+
23,omp,6,8
26+
24,omp,3,0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
TaskId,App,Size,InterArrivalTimeSecs
2+
0,omp,2,0
3+
1,omp,2,22
4+
2,omp,3,22
5+
3,omp,7,8
6+
4,omp,1,4
7+
5,omp,3,4
8+
6,omp,6,1
9+
7,omp,6,3
10+
8,omp,4,3
11+
9,omp,4,14
12+
10,omp,1,2
13+
11,omp,1,0
14+
12,omp,7,5
15+
13,omp,4,12
16+
14,omp,6,0
17+
15,omp,3,8
18+
16,omp,3,6
19+
17,omp,6,4
20+
18,omp,1,3
21+
19,omp,4,4
22+
20,omp,3,5
23+
21,omp,7,3
24+
22,omp,3,6
25+
23,omp,1,41
26+
24,omp,2,6
27+
25,omp,7,9
28+
26,omp,1,10
29+
27,omp,5,12
30+
28,omp,4,17
31+
29,omp,2,27
32+
30,omp,2,10
33+
31,omp,7,0
34+
32,omp,3,0
35+
33,omp,6,0
36+
34,omp,1,3
37+
35,omp,3,1
38+
36,omp,1,12
39+
37,omp,1,21
40+
38,omp,2,20
41+
39,omp,2,5
42+
40,omp,4,9
43+
41,omp,2,0
44+
42,omp,2,9
45+
43,omp,1,30
46+
44,omp,7,32
47+
45,omp,7,36
48+
46,omp,2,9
49+
47,omp,7,10
50+
48,omp,1,0
51+
49,omp,6,1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
TaskId,App,Size,InterArrivalTimeSecs
2+
0,omp,2,0
3+
1,omp,4,4
4+
2,omp,4,13
5+
3,omp,3,3
6+
4,omp,6,26
7+
5,omp,5,5
8+
6,omp,2,22
9+
7,omp,5,43
10+
8,omp,1,7
11+
9,omp,3,9
12+
10,omp,3,2
13+
11,omp,1,14
14+
12,omp,7,1
15+
13,omp,6,5
16+
14,omp,4,14
17+
15,omp,1,30
18+
16,omp,7,4
19+
17,omp,5,1
20+
18,omp,4,4
21+
19,omp,7,6
22+
20,omp,6,11
23+
21,omp,2,2
24+
22,omp,2,9
25+
23,omp,6,4
26+
24,omp,6,3
27+
25,omp,2,3
28+
26,omp,3,4
29+
27,omp,3,17
30+
28,omp,1,16
31+
29,omp,2,8
32+
30,omp,1,6
33+
31,omp,4,13
34+
32,omp,6,18
35+
33,omp,7,3
36+
34,omp,3,14
37+
35,omp,3,0
38+
36,omp,7,9
39+
37,omp,5,8
40+
38,omp,1,0
41+
39,omp,6,10
42+
40,omp,1,37
43+
41,omp,7,14
44+
42,omp,3,10
45+
43,omp,3,3
46+
44,omp,2,6
47+
45,omp,5,4
48+
46,omp,4,15
49+
47,omp,6,16
50+
48,omp,1,3
51+
49,omp,7,8
52+
50,omp,7,4
53+
51,omp,2,8
54+
52,omp,7,5
55+
53,omp,4,0
56+
54,omp,1,14
57+
55,omp,2,5
58+
56,omp,7,15
59+
57,omp,6,11
60+
58,omp,2,4
61+
59,omp,1,11
62+
60,omp,2,16
63+
61,omp,5,4
64+
62,omp,4,3
65+
63,omp,6,12
66+
64,omp,3,7
67+
65,omp,7,24
68+
66,omp,2,1
69+
67,omp,1,1
70+
68,omp,7,14
71+
69,omp,2,27
72+
70,omp,3,24
73+
71,omp,2,0
74+
72,omp,7,8
75+
73,omp,6,19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
TaskId,App,Size,InterArrivalTimeSecs
2+
0,omp,6,0
3+
1,omp,2,0
4+
2,omp,4,4
5+
3,omp,4,27
6+
4,omp,5,4
7+
5,omp,6,2
8+
6,omp,1,6
9+
7,omp,5,10
10+
8,omp,7,0
11+
9,omp,5,0
12+
10,omp,2,36
13+
11,omp,7,30
14+
12,omp,7,3
15+
13,omp,7,3
16+
14,omp,6,12
17+
15,omp,4,7
18+
16,omp,2,39
19+
17,omp,3,11
20+
18,omp,7,11
21+
19,omp,5,15
22+
20,omp,2,6
23+
21,omp,2,3
24+
22,omp,6,4
25+
23,omp,3,29
26+
24,omp,5,1
27+
25,omp,4,3
28+
26,omp,6,21
29+
27,omp,7,25
30+
28,omp,2,18
31+
29,omp,4,10
32+
30,omp,5,45
33+
31,omp,4,2
34+
32,omp,2,2
35+
33,omp,1,8
36+
34,omp,3,9
37+
35,omp,6,0
38+
36,omp,3,12
39+
37,omp,4,2
40+
38,omp,7,5
41+
39,omp,4,0
42+
40,omp,7,1
43+
41,omp,1,5
44+
42,omp,6,3
45+
43,omp,3,13
46+
44,omp,2,20
47+
45,omp,3,4
48+
46,omp,6,23
49+
47,omp,7,7
50+
48,omp,7,13
51+
49,omp,6,8
52+
50,omp,3,11
53+
51,omp,2,1
54+
52,omp,5,4
55+
53,omp,7,1
56+
54,omp,4,15
57+
55,omp,2,6
58+
56,omp,3,7
59+
57,omp,3,8
60+
58,omp,5,13
61+
59,omp,2,0
62+
60,omp,5,22
63+
61,omp,1,28
64+
62,omp,6,41
65+
63,omp,3,11
66+
64,omp,1,2
67+
65,omp,1,15
68+
66,omp,4,2
69+
67,omp,2,1
70+
68,omp,2,3
71+
69,omp,3,7
72+
70,omp,3,1
73+
71,omp,7,1
74+
72,omp,5,13
75+
73,omp,2,6
76+
74,omp,1,10

‎tasks/makespan/wasm.py

+11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
from invoke import task
2+
from tasks.util.elastic import (
3+
OPENMP_ELASTIC_FUNCTION,
4+
OPENMP_ELASTIC_USER,
5+
OPENMP_ELASTIC_WASM,
6+
)
27
from tasks.util.lammps import (
38
LAMMPS_FAASM_USER,
49
LAMMPS_MIGRATION_NET_DOCKER_WASM,
@@ -20,6 +25,12 @@ def upload(ctx):
2025
"wasm_function": LAMMPS_FAASM_MIGRATION_NET_FUNC,
2126
"copies": 1,
2227
},
28+
{
29+
"wasm_file": OPENMP_ELASTIC_WASM,
30+
"wasm_user": OPENMP_ELASTIC_USER,
31+
"wasm_function": OPENMP_ELASTIC_FUNCTION,
32+
"copies": 1,
33+
},
2334
]
2435

2536
upload_wasm(wasm_file_details)

‎tasks/util/elastic.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@
1616

1717
ELASTIC_KERNELS_DOCKER_DIR = join(EXAMPLES_DOCKER_DIR, "Kernels-elastic")
1818
ELASTIC_KERNELS_WASM_DIR = join(ELASTIC_KERNELS_DOCKER_DIR, "build", "wasm")
19-
KERNELS_NATIVE_DIR = join(ELASTIC_KERNELS_DOCKER_DIR, "build", "native")
19+
ELASTIC_KERNELS_NATIVE_DIR = join(ELASTIC_KERNELS_DOCKER_DIR, "build", "native")
2020

2121
OPENMP_ELASTIC_WASM = join(ELASTIC_KERNELS_WASM_DIR, "omp_{}.wasm".format(ELASTIC_KERNEL))
22+
OPENMP_ELASTIC_NATIVE_BINARY = join(ELASTIC_KERNELS_NATIVE_DIR, "omp_{}.wasm".format(ELASTIC_KERNEL))
2223

24+
# Parameters for the macrobenchmark
25+
OPENMP_ELASTIC_NUM_LOOPS = 5
2326

24-
def get_elastic_input_data(num_loops=2, native=False):
27+
28+
def get_elastic_input_data(num_loops=OPENMP_ELASTIC_NUM_LOOPS, native=False):
2529
if native:
2630
return "-x FAASM_BENCH_PARAMS={}".format(int(num_loops))
2731

‎tasks/util/kernels.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545
def get_openmp_kernel_cmdline(kernel, num_threads):
4646
kernels_cmdline = {
47-
# dgemm: iterations, matrix order, tile size (20 iterations fine, 100 long)
47+
# dgemm: iterations, order, tile size (20 iterations fine, 100 long)
4848
"dgemm": "100 2048 32",
4949
# global: iterations, scramble string length
5050
# string length must be multiple of num_threads
@@ -68,5 +68,3 @@ def get_openmp_kernel_cmdline(kernel, num_threads):
6868
}
6969

7070
return "{} {}".format(num_threads, kernels_cmdline[kernel])
71-
72-

‎tasks/util/makespan.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,16 @@
3131
# - Slurm: native OpenMPI where we schedule jobs at CPU core granularity
3232
NATIVE_FT_BASELINES = ["batch-ft", "slurm-ft"]
3333
NATIVE_BASELINES = ["batch", "slurm"] + NATIVE_FT_BASELINES
34+
GRANNY_ELASTIC_BASELINES = ["granny-elastic"]
3435
GRANNY_FT_BASELINES = ["granny-ft"]
3536
GRANNY_MIGRATE_BASELINES = ["granny-migrate"]
36-
GRANNY_BASELINES = ["granny"] + GRANNY_MIGRATE_BASELINES + GRANNY_FT_BASELINES
37+
GRANNY_BASELINES = ["granny"] + GRANNY_MIGRATE_BASELINES + GRANNY_FT_BASELINES + GRANNY_ELASTIC_BASELINES
3738
ALLOWED_BASELINES = NATIVE_BASELINES + GRANNY_BASELINES
3839

3940
# Workload/Migration related constants
4041
MPI_MIGRATE_WORKLOADS = ["mpi-migrate", "mpi-evict", "mpi-spot"]
4142
MPI_WORKLOADS = ["mpi"] + MPI_MIGRATE_WORKLOADS
43+
OPENMP_WORKLOADS = ["omp", "omp-elastic"]
4244

4345

4446
def cum_sum(ts, values):

‎tasks/util/planner.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def get_num_available_slots_from_in_flight_apps(
2929
num_cpus_per_vm,
3030
user_id = None,
3131
num_evicted_vms = None,
32+
openmp = False,
3233
):
3334
"""
3435
For Granny baselines, we cannot use static knowledge of the
@@ -68,8 +69,10 @@ def get_num_available_slots_from_in_flight_apps(
6869

6970
# Annoyingly, we may query for the in-flight apps as soon as we
7071
# schedule them, missing the init stage of the mpi app. Thus we
71-
# sleep for a bit and ask again
72-
if any([len(app.hostIps) != app.size for app in in_flight_apps.apps]):
72+
# sleep for a bit and ask again (we allow the size to go over the
73+
# specified size in case of an elsatic scale-up)
74+
if any([len(app.hostIps) < app.size for app in in_flight_apps.apps]):
75+
print("App not fully in-flight. We wait...")
7376
sleep(short_sleep_secs)
7477
continue
7578

@@ -111,6 +114,14 @@ def get_num_available_slots_from_in_flight_apps(
111114
if worker_occupation[ip] < int(num_cpus_per_vm):
112115
worker_occupation[ip] += 1
113116

117+
# For OpenMP, we only care if any VM has enough slots to run the full
118+
# application. Otherwise we wait.
119+
if openmp:
120+
if num_vms > len(list(worker_occupation.keys())):
121+
return num_cpus_per_vm
122+
123+
return max([num_cpus_per_vm - worker_occupation[ip] for ip in worker_occupation])
124+
114125
num_available_slots = (num_vms - len(list(worker_occupation.keys()))) * num_cpus_per_vm
115126
for ip in worker_occupation:
116127
num_available_slots += num_cpus_per_vm - worker_occupation[ip]

0 commit comments

Comments
 (0)
Please sign in to comment.