faasm · May 9, 2024
diff --git a/‎tasks/elastic/run.py
+1-1 b/‎tasks/elastic/run.py
+1-1
diff --git a/‎tasks/makespan/elastic.md
+103 b/‎tasks/makespan/elastic.md
+103
diff --git a/‎tasks/makespan/run.py
+34-35 b/‎tasks/makespan/run.py
+34-35
diff --git a/‎tasks/makespan/scheduler.py
+55-72 b/‎tasks/makespan/scheduler.py
+55-72
diff --git a/‎tasks/makespan/spot.md
+4-4 b/‎tasks/makespan/spot.md
+4-4
diff --git a/‎tasks/makespan/trace.py
+2-6 b/‎tasks/makespan/trace.py
+2-6
diff --git a/‎tasks/makespan/traces/trace_omp-elastic_100_8.csv
+101 b/‎tasks/makespan/traces/trace_omp-elastic_100_8.csv
+101
diff --git a/‎tasks/makespan/traces/trace_omp-elastic_10_8.csv
+11 b/‎tasks/makespan/traces/trace_omp-elastic_10_8.csv
+11
diff --git a/‎tasks/makespan/traces/trace_omp-elastic_25_8.csv
+26 b/‎tasks/makespan/traces/trace_omp-elastic_25_8.csv
+26
diff --git a/‎tasks/makespan/traces/trace_omp-elastic_50_8.csv
+51 b/‎tasks/makespan/traces/trace_omp-elastic_50_8.csv
+51
diff --git a/‎tasks/makespan/traces/trace_omp-elastic_74_8.csv
+75 b/‎tasks/makespan/traces/trace_omp-elastic_74_8.csv
+75
diff --git a/‎tasks/makespan/traces/trace_omp-elastic_75_8.csv
+76 b/‎tasks/makespan/traces/trace_omp-elastic_75_8.csv
+76
diff --git a/‎tasks/makespan/wasm.py
+11 b/‎tasks/makespan/wasm.py
+11
diff --git a/‎tasks/util/elastic.py
+6-2 b/‎tasks/util/elastic.py
+6-2
diff --git a/‎tasks/util/kernels.py
+1-3 b/‎tasks/util/kernels.py
+1-3
diff --git a/‎tasks/util/makespan.py
+3-1 b/‎tasks/util/makespan.py
+3-1
diff --git a/‎tasks/util/planner.py
+13-2 b/‎tasks/util/planner.py
+13-2
@@ -87,7 +87,7 @@ def wasm(ctx, num_threads=None, elastic=False, repeats=1):
                 "user": user,
                 "function": func,
                 "cmdline": cmdline,
-                "input_data": get_elastic_input_data(),
+                "input_data": get_elastic_input_data(num_loops=2),
                 "isOmp": True,
                 "ompNumThreads": nthread,
             }
 
@@ -0,0 +1,103 @@
+# Makespan Experiment (Elastic Scaling Version)
+
+In this experiment we study the benefits of using Granules to elastically
+scale shared memory applications when there are idle CPU cores.
+
+For each experiment run, we increase the cluster size (in terms of number of
+VMs) and the number of jobs in the tasks, proportionally.
+
+Re-run the following instructions with the following values:
+
+```bash
+NUM_VMS=8,16,24,32
+NUM_TASKS=25,50,75,100
+```
+
+## Deploy the cluster
+
+First, to deploy the cluster, run:
+
+```bash
+export NUM_VMS=
+export NUM_TASKS=
+
+inv cluster.provision --vm Standard_D8_v5 --nodes $((${NUM_VMS} + 1))
+inv cluster.credentials
+```
+
+## Native (OpenMPI)
+
+First, deploy the native `k8s` cluster:
+
+```bash
+inv makespan.native.deploy --num-vms ${NUM_VMS}
+```
+
+Now, you can run the different baselines, with and without spot VMs:
+
+```bash
+# TODO: native batch and native slurm should be the same for OpenMP ?
+inv makespan.run.native-batch --workload omp-elastic --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
+inv makespan.run.native-slurm --workload omp-elastic --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
+```
+
+Once you are done, you may remove the native OpenMPI cluster:
+
+```bash
+inv makespan.native.delete
+```
+
+## Granny
+
+To run the Granny baseline, first deploy the cluster:
+
+```bash
+faasmctl deploy.k8s --workers=${NUM_VMS}
+```
+
+Second, upload the corresponding WASM files:
+
+```bash
+inv makespan.wasm.upload
+```
+
+Third, run the experiment:
+
+```bash
+# No elastic scaling
+inv makespan.run.granny --workload omp-elastic --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS}
+
+# Elastic scaling
+inv makespan.run.granny --workload omp-elastic --num-vms ${NUM_VMS} --num-tasks ${NUM_TASKS} --elastic
+```
+
+During an experiment, you may monitor the state of the cluster (in a separete
+shell) by using:
+
+```bash
+faasmctl monitor.planner --policy spot
+```
+
+Once you are done, you may delete the cluster:
+
+```bash
+faasmctl delete
+```
+
+## Delete the AKS cluster
+
+Once you are done with the cluster, run:
+
+```bash
+inv cluster.delete
+```
+
+then you may move to the next (cluster size, batch size) pair.
+
+## Plot the results
+
+Finally, you may plot the results with:
+
+```bash
+inv makespan.plot.elastic
+```
@@ -32,23 +32,16 @@
 
 
 def _get_workload_from_cmdline(workload):
-    base_workloads = ["mpi", "omp", "mix"]
-    exp_workloads = ["mpi-migrate"]
-    all_workloads = ["mix", "mpi", "mpi-migrate", "mpi-evict", "mpi-spot", "omp", "omp-elastic"]
-    if workload == "all":
-        workload = all_workloads
-    elif workload == "base":
-        workload = base_workloads
-    elif workload == "exp":
-        workload = exp_workloads
-    elif workload in all_workloads:
-        workload = [workload]
-    else:
+    # TODO: rename mpi-migrate to something like mpi-locality
+    all_workloads = ["mpi-evict", "mpi-migrate", "mpi-spot", "omp-elastic"]
+
+    if workload not in all_workloads:
         raise RuntimeError(
             "Unrecognised workload: {}. Must be one in: {}".format(
                 workload, all_workloads
             )
         )
+
     return workload
 
 
@@ -59,25 +52,33 @@ def granny(
     num_vms=32,
     num_cpus_per_vm=8,
     num_tasks=100,
+    # Optional flag for mpi-migrate workload to migrate to improve locality
     migrate=False,
+    # Optional flag for mpi-spot workload to inject faults
     fault=False,
+    # Optional flag for omp-elastic workload to elastically use idle CPUs
+    elastic=False,
+    # Mandatory flag for the mpi-evict workload (not in the paper)
     num_users=None,
 ):
     """
-    Run: `inv makespan.run.granny --workload [mpi-migrate,mpi-evict,omp]
+    Run: `inv makespan.run.granny --workload [mpi-migrate,mpi-spot,omp-elastic]
     """
     # Work-out the baseline name from the arguments
     baseline = "granny"
     if migrate:
+        assert workload == "mpi-migrate", "--migrate flag should only be used with mpi-migrate workload!"
         baseline = "granny-migrate"
     if fault:
+        assert workload == "mpi-spot", "--fault flag should only be used with mpi-spot workload!"
         baseline = "granny-ft"
+    if elastic:
+        assert workload == "omp-elastic", "--fault flag should only be used with omp-elastic workload!"
+        baseline = "granny-elastic"
 
     workload = _get_workload_from_cmdline(workload)
-    for wload in workload:
-        trace = get_trace_from_parameters(wload, num_tasks, num_cpus_per_vm)
-        _do_run(baseline, num_vms, trace, num_users)
-        sleep(5)
+    trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
+    _do_run(baseline, num_vms, trace, num_users)
 
 
 @task()
@@ -99,15 +100,13 @@ def native_slurm(
         baseline = "slurm-ft"
 
     workload = _get_workload_from_cmdline(workload)
-    for wload in workload:
-        trace = get_trace_from_parameters(wload, num_tasks, num_cpus_per_vm)
-        _do_run(
-            baseline,
-            num_vms,
-            trace,
-            num_users,
-        )
-        sleep(5)
+    trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
+    _do_run(
+        baseline,
+        num_vms,
+        trace,
+        num_users,
+    )
 
 
 @task()
@@ -129,15 +128,13 @@ def native_batch(
         baseline = "batch-ft"
 
     workload = _get_workload_from_cmdline(workload)
-    for wload in workload:
-        trace = get_trace_from_parameters(wload, num_tasks, num_cpus_per_vm)
-        _do_run(
-            baseline,
-            num_vms,
-            trace,
-            num_users,
-        )
-        sleep(5)
+    trace = get_trace_from_parameters(workload, num_tasks, num_cpus_per_vm)
+    _do_run(
+        baseline,
+        num_vms,
+        trace,
+        num_users,
+    )
 
 
 def _do_run(baseline, num_vms, trace, num_users):
@@ -169,6 +166,8 @@ def _do_run(baseline, num_vms, trace, num_users):
             set_planner_policy("bin-pack")
         elif job_workload == "mpi-spot":
             set_planner_policy("spot")
+        elif job_workload == "omp-elastic":
+            set_planner_policy("bin-pack")
 
     scheduler = BatchScheduler(
         baseline,
 
@@ -2,7 +2,6 @@
     get_faasm_worker_ips,
     get_faasm_worker_names,
 )
-from faasmctl.util.flush import flush_workers
 from faasmctl.util.planner import (
     get_in_fligh_apps as planner_get_in_fligh_apps,
     set_next_evicted_host as planner_set_next_evicted_host,
@@ -25,17 +24,19 @@
     TaskObject,
     WorkQueueItem,
 )
-from tasks.makespan.env import (
-    DGEMM_DOCKER_BINARY,
-    DGEMM_FAASM_FUNC,
-    DGEMM_FAASM_USER,
-    get_dgemm_cmdline,
+from tasks.util.elastic import (
+    ELASTIC_KERNEL,
+    OPENMP_ELASTIC_FUNCTION,
+    OPENMP_ELASTIC_NATIVE_BINARY,
+    OPENMP_ELASTIC_USER,
+    get_elastic_input_data,
 )
 from tasks.util.faasm import (
     get_faasm_exec_time_from_json,
     has_app_failed,
     post_async_msg_and_get_result_json,
 )
+from tasks.util.kernels import get_openmp_kernel_cmdline
 from tasks.util.k8s import wait_for_pods as wait_for_native_mpi_pods
 from tasks.util.lammps import (
     LAMMPS_FAASM_USER,
@@ -53,12 +54,14 @@
     ALLOWED_BASELINES,
     EXEC_TASK_INFO_FILE_PREFIX,
     GRANNY_BASELINES,
+    GRANNY_ELASTIC_BASELINES,
     GRANNY_FT_BASELINES,
     GRANNY_MIGRATE_BASELINES,
     MPI_MIGRATE_WORKLOADS,
     MPI_WORKLOADS,
     NATIVE_BASELINES,
     NATIVE_FT_BASELINES,
+    OPENMP_WORKLOADS,
     SCHEDULING_INFO_FILE_PREFIX,
     get_num_cpus_per_vm_from_trace,
     get_user_id_from_task,
@@ -255,10 +258,7 @@ def thread_print(msg):
 
         # Choose the right data file if running a LAMMPS simulation
         if work_item.task.app in MPI_WORKLOADS:
-            # We always use the same LAMMPS benchmark ("compute-xl")
-            # TODO: FIXME: delete me!
-            data_file = get_faasm_benchmark("compute")["data"][0]
-            # data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]
+            data_file = get_faasm_benchmark(LAMMPS_SIM_WORKLOAD)["data"][0]
 
         # Record the start timestamp
         start_ts = 0
@@ -301,13 +301,11 @@ def thread_print(msg):
                     "su mpirun -c '{}'".format(mpirun_cmd),
                 ]
                 exec_cmd = " ".join(exec_cmd)
-            elif work_item.task.app == "omp":
-                # TODO(omp): should we set the parallelism level to be
-                # min(work_item.task.size, num_slots_per_vm) ? I.e. what will
-                # happen when we oversubscribe?
-                openmp_cmd = "bash -c '{} {}'".format(
-                    DGEMM_DOCKER_BINARY,
-                    get_dgemm_cmdline(work_item.task.size),
+            elif work_item.task.app in OPENMP_WORKLOADS:
+                openmp_cmd = "bash -c '{} {} {}'".format(
+                    get_elastic_input_data(native=True),
+                    OPENMP_ELASTIC_NATIVE_BINARY,
+                    get_openmp_kernel_cmdline(ELASTIC_KERNEL, work_item.task.size),
                 )
 
                 exec_cmd = [
@@ -358,43 +356,40 @@ def thread_print(msg):
                     msg["input_data"] = get_lammps_migration_params(
                         check_every=check_every
                     )
-            elif work_item.task.app == "omp":
+            elif work_item.task.app in OPENMP_WORKLOADS:
                 if work_item.task.size > num_cpus_per_vm:
                     print(
                         "Requested OpenMP execution with more parallelism"
                         "than slots in the current environment:"
                         "{} > {}".format(work_item.task.size, num_cpus_per_vm)
                     )
                     raise RuntimeError("Error in OpenMP task trace!")
-                user = DGEMM_FAASM_USER
-                func = "{}_{}".format(DGEMM_FAASM_FUNC, work_item.task.task_id)
+                user = OPENMP_ELASTIC_USER
+                func = OPENMP_ELASTIC_FUNCTION
                 msg = {
                     "user": user,
                     "function": func,
-                    # The input_data is the number of OMP threads
-                    "cmdline": get_dgemm_cmdline(work_item.task.size),
+                    "input_data": get_elastic_input_data(),
+                    "cmdline": get_openmp_kernel_cmdline(ELASTIC_KERNEL, work_item.task.size),
+                    "isOmp": True,
+                    "ompNumThreads": work_item.task.size,
                 }
 
                 req["user"] = user
                 req["function"] = func
+                req["singleHostHint"] = True
+                req["elasticScaleHint"] = baseline in GRANNY_ELASTIC_BASELINES
 
-            start_ts = time()
             # Post asynch request and wait for JSON result
-            try:
-                result_json = post_async_msg_and_get_result_json(msg, req_dict=req)
-                actual_time = int(get_faasm_exec_time_from_json(result_json))
-                has_failed = has_app_failed(result_json)
-                thread_print(
-                    "Finished executiong app {} (time: {})".format(
-                        result_json[0]["appId"], actual_time
-                    )
-                )
-            except RuntimeError:
-                print("WEE EVER HERE?? DELETE THIS CATCH")
-                actual_time = -1
-                sch_logger.error(
-                    "Error executing task {}".format(work_item.task.task_id)
+            start_ts = time()
+            result_json = post_async_msg_and_get_result_json(msg, req_dict=req)
+            actual_time = int(get_faasm_exec_time_from_json(result_json))
+            has_failed = has_app_failed(result_json)
+            thread_print(
+                "Finished executiong app {} (time: {})".format(
+                    result_json[0]["appId"], actual_time
                 )
+            )
 
         end_ts = time()
 
@@ -432,7 +427,7 @@ class SchedulerState:
     # number of cpus per vm
     trace_str: str
     # The workload indicates the type of application we are runing. It can
-    # either be `omp` or `mpi-migrate`, or `mpi-evict`
+    # either be `omp-elastic` or `mpi-migrate` or `mpi-evict` or `mpi-spot`
     workload: str
     num_tasks: int
     num_cpus_per_vm: int
@@ -833,16 +828,25 @@ def num_available_slots_from_vm_list(self, vm_list):
     # Helper method to know if we have enough slots to schedule a task
     def have_enough_slots_for_task(self, task: TaskObject):
         if self.state.baseline in NATIVE_BASELINES:
-            # For `mpi-evict` we run a multi-tenant trace, and prevent apps
-            # from different users from running in the same VM
             if self.state.workload == "mpi-evict":
+                # For `mpi-evict` we run a multi-tenant trace, and prevent apps
+                # from different users from running in the same VM
                 sorted_vms = sorted(
                     self.state.vm_map.items(), key=lambda item: item[1], reverse=True
                 )
 
                 pruned_vms = self.prune_node_list_from_different_users(sorted_vms, task)
 
                 return self.num_available_slots_from_vm_list(pruned_vms) >= task.size
+            elif self.state.workload in OPENMP_WORKLOADS:
+                # For OpenMP workloads, we can only allocate them in one VM, so
+                # we compare the requested size with the largest capacity we
+                # have in one VM
+                sorted_vms = sorted(
+                    self.state.vm_map.items(), key=lambda item: item[1], reverse=True
+                )
+
+                return sorted_vms[0][1] >= task.size
             else:
                 return self.state.total_available_slots >= task.size
         else:
@@ -862,6 +866,13 @@ def have_enough_slots_for_task(self, task: TaskObject):
                     num_evicted_vms=self.state.num_faults,
                 ) >= task.size
 
+            if self.state.workload in OPENMP_WORKLOADS:
+                return get_num_available_slots_from_in_flight_apps(
+                    self.state.num_vms,
+                    self.state.num_cpus_per_vm,
+                    openmp=True,
+                ) >= task.size
+
             return get_num_available_slots_from_in_flight_apps(
                 self.state.num_vms,
                 self.state.num_cpus_per_vm
@@ -898,10 +909,12 @@ def schedule_task_to_vm(
         if self.state.workload == "mpi-evict":
             sorted_vms = self.prune_node_list_from_different_users(sorted_vms, task)
 
+        if self.state.workload in OPENMP_WORKLOADS:
+            sorted_vms = [sorted_vms[0]]
+
         # For GRANNY baselines we can skip the python-side accounting as the
         # planner has all the scheduling information
-        # TODO(omp): why should it be any different with OpenMP?
-        if self.state.baseline in NATIVE_BASELINES and task.app in MPI_WORKLOADS:
+        if self.state.baseline in NATIVE_BASELINES:
             for vm, num_slots in sorted_vms:
                 # Work out how many slots can we take up in this pod
                 if self.state.baseline == "batch":
@@ -935,36 +948,6 @@ def schedule_task_to_vm(
                 raise RuntimeError(
                     "Scheduling error: inconsistent scheduler state"
                 )
-        """
-        elif task.app == "omp":
-            if len(sorted_vms) == 0:
-                # TODO: maybe we should raise an inconsistent state error here
-                return NOT_ENOUGH_SLOTS
-            vm, num_slots = sorted_vms[0]
-            if num_slots == 0:
-                # TODO: maybe we should raise an inconsistent state error here
-                return NOT_ENOUGH_SLOTS
-            if self.state.baseline in NATIVE_BASELINES:
-                if task.size > self.state.num_cpus_per_vm:
-                    print(
-                        "Overcomitting for task {} ({} > {})".format(
-                            task.task_id,
-                            task.size,
-                            self.state.num_cpus_per_vm,
-                        )
-                    )
-                num_on_this_vm = self.state.num_cpus_per_vm
-            else:
-                if num_slots < task.size:
-                    return NOT_ENOUGH_SLOTS
-                num_on_this_vm = task.size
-
-            scheduling_decision.append((vm, num_on_this_vm))
-            self.state.vm_map[vm] -= num_on_this_vm
-            # TODO: when we overcommit, do we substract the number of cores
-            # we occupy, or the ones we agree to run?
-            self.state.total_available_slots -= num_on_this_vm
-        """
 
         # Before returning, persist the scheduling decision to state
         self.state.in_flight_tasks[task.task_id] = scheduling_decision
 
@@ -9,8 +9,8 @@ VMs) and the number of jobs in the tasks, proportionally.
 Re-run the following instructions with the following values:
 
 ```bash
-NUM_VMS=8,16,32
-NUM_TASKS=25,50,100
+NUM_VMS=8,16,24,32
+NUM_TASKS=25,50,75,100
 ```
 
 ## Deploy the cluster
@@ -21,7 +21,7 @@ First, to deploy the cluster, run:
 export NUM_VMS=
 export NUM_TASKS=
 
-inv cluster.provision --vm Standard_D8_v5 --nodes ${NUM_VMS} + 1
+inv cluster.provision --vm Standard_D8_v5 --nodes $((${NUM_VMS} + 1))
 inv cluster.credentials
 ```
 
@@ -100,7 +100,7 @@ then you may move to the next (cluster size, batch size) pair.
 
 ## Plot the results
 
-Finally, you may plot the results wiht:
+Finally, you may plot the results with:
 
 ```bash
 inv makespan.plot.spot
 
@@ -45,18 +45,14 @@ def generate(ctx, workload, num_tasks, num_cores_per_vm, lmbd="0.1"):
     inter_arrival_times.insert(0, 0)
 
     # Work out the possible different workloads
-    if workload == "mpi":
-        possible_workloads = ["mpi"]
-    elif workload == "mpi-migrate":
+    if workload == "mpi-migrate":
         possible_workloads = ["mpi-migrate"]
     elif workload == "mpi-evict":
         possible_workloads = ["mpi-migrate"]
     elif workload == "mpi-spot":
         possible_workloads = ["mpi-migrate"]
-    elif workload == "omp":
+    elif workload == "omp-elastic":
         possible_workloads = ["omp"]
-    elif workload == "mix":
-        possible_workloads = ["mpi", "omp"]
     else:
         raise RuntimeError("Unrecognised workload: {}".format(workload))
 
 
@@ -0,0 +1,101 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,omp,1,0
+1,omp,6,3
+2,omp,5,19
+3,omp,1,19
+4,omp,5,9
+5,omp,7,3
+6,omp,6,0
+7,omp,1,26
+8,omp,4,17
+9,omp,1,6
+10,omp,2,3
+11,omp,1,17
+12,omp,5,47
+13,omp,3,24
+14,omp,7,1
+15,omp,7,6
+16,omp,1,17
+17,omp,5,21
+18,omp,1,7
+19,omp,5,0
+20,omp,4,3
+21,omp,6,15
+22,omp,2,16
+23,omp,6,10
+24,omp,5,15
+25,omp,5,4
+26,omp,4,1
+27,omp,3,0
+28,omp,6,12
+29,omp,6,12
+30,omp,3,8
+31,omp,1,6
+32,omp,3,6
+33,omp,7,15
+34,omp,1,9
+35,omp,5,1
+36,omp,2,2
+37,omp,4,3
+38,omp,2,39
+39,omp,5,6
+40,omp,4,15
+41,omp,4,7
+42,omp,4,14
+43,omp,2,0
+44,omp,4,0
+45,omp,2,17
+46,omp,4,0
+47,omp,4,38
+48,omp,6,1
+49,omp,1,2
+50,omp,6,0
+51,omp,2,3
+52,omp,7,13
+53,omp,7,2
+54,omp,4,18
+55,omp,1,12
+56,omp,2,1
+57,omp,5,3
+58,omp,7,12
+59,omp,7,15
+60,omp,2,29
+61,omp,1,2
+62,omp,4,32
+63,omp,4,8
+64,omp,3,0
+65,omp,4,12
+66,omp,5,3
+67,omp,1,3
+68,omp,1,2
+69,omp,5,7
+70,omp,5,5
+71,omp,3,36
+72,omp,7,36
+73,omp,3,10
+74,omp,5,2
+75,omp,2,3
+76,omp,5,8
+77,omp,7,3
+78,omp,3,9
+79,omp,4,11
+80,omp,4,1
+81,omp,6,36
+82,omp,5,2
+83,omp,1,1
+84,omp,6,8
+85,omp,3,26
+86,omp,4,0
+87,omp,5,25
+88,omp,3,6
+89,omp,7,9
+90,omp,4,3
+91,omp,1,16
+92,omp,3,0
+93,omp,1,4
+94,omp,3,20
+95,omp,7,2
+96,omp,6,8
+97,omp,7,9
+98,omp,7,7
+99,omp,2,12
@@ -0,0 +1,11 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,omp,5,0
+1,omp,4,5
+2,omp,5,1
+3,omp,2,0
+4,omp,7,13
+5,omp,7,1
+6,omp,3,19
+7,omp,7,9
+8,omp,2,11
+9,omp,3,0
@@ -0,0 +1,26 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,omp,2,0
+1,omp,3,6
+2,omp,5,14
+3,omp,2,19
+4,omp,1,12
+5,omp,5,11
+6,omp,5,10
+7,omp,5,13
+8,omp,3,20
+9,omp,6,29
+10,omp,6,2
+11,omp,3,0
+12,omp,1,15
+13,omp,3,2
+14,omp,3,12
+15,omp,3,2
+16,omp,3,7
+17,omp,2,9
+18,omp,4,5
+19,omp,1,17
+20,omp,6,4
+21,omp,2,7
+22,omp,1,5
+23,omp,6,8
+24,omp,3,0
@@ -0,0 +1,51 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,omp,2,0
+1,omp,2,22
+2,omp,3,22
+3,omp,7,8
+4,omp,1,4
+5,omp,3,4
+6,omp,6,1
+7,omp,6,3
+8,omp,4,3
+9,omp,4,14
+10,omp,1,2
+11,omp,1,0
+12,omp,7,5
+13,omp,4,12
+14,omp,6,0
+15,omp,3,8
+16,omp,3,6
+17,omp,6,4
+18,omp,1,3
+19,omp,4,4
+20,omp,3,5
+21,omp,7,3
+22,omp,3,6
+23,omp,1,41
+24,omp,2,6
+25,omp,7,9
+26,omp,1,10
+27,omp,5,12
+28,omp,4,17
+29,omp,2,27
+30,omp,2,10
+31,omp,7,0
+32,omp,3,0
+33,omp,6,0
+34,omp,1,3
+35,omp,3,1
+36,omp,1,12
+37,omp,1,21
+38,omp,2,20
+39,omp,2,5
+40,omp,4,9
+41,omp,2,0
+42,omp,2,9
+43,omp,1,30
+44,omp,7,32
+45,omp,7,36
+46,omp,2,9
+47,omp,7,10
+48,omp,1,0
+49,omp,6,1
@@ -0,0 +1,75 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,omp,2,0
+1,omp,4,4
+2,omp,4,13
+3,omp,3,3
+4,omp,6,26
+5,omp,5,5
+6,omp,2,22
+7,omp,5,43
+8,omp,1,7
+9,omp,3,9
+10,omp,3,2
+11,omp,1,14
+12,omp,7,1
+13,omp,6,5
+14,omp,4,14
+15,omp,1,30
+16,omp,7,4
+17,omp,5,1
+18,omp,4,4
+19,omp,7,6
+20,omp,6,11
+21,omp,2,2
+22,omp,2,9
+23,omp,6,4
+24,omp,6,3
+25,omp,2,3
+26,omp,3,4
+27,omp,3,17
+28,omp,1,16
+29,omp,2,8
+30,omp,1,6
+31,omp,4,13
+32,omp,6,18
+33,omp,7,3
+34,omp,3,14
+35,omp,3,0
+36,omp,7,9
+37,omp,5,8
+38,omp,1,0
+39,omp,6,10
+40,omp,1,37
+41,omp,7,14
+42,omp,3,10
+43,omp,3,3
+44,omp,2,6
+45,omp,5,4
+46,omp,4,15
+47,omp,6,16
+48,omp,1,3
+49,omp,7,8
+50,omp,7,4
+51,omp,2,8
+52,omp,7,5
+53,omp,4,0
+54,omp,1,14
+55,omp,2,5
+56,omp,7,15
+57,omp,6,11
+58,omp,2,4
+59,omp,1,11
+60,omp,2,16
+61,omp,5,4
+62,omp,4,3
+63,omp,6,12
+64,omp,3,7
+65,omp,7,24
+66,omp,2,1
+67,omp,1,1
+68,omp,7,14
+69,omp,2,27
+70,omp,3,24
+71,omp,2,0
+72,omp,7,8
+73,omp,6,19
@@ -0,0 +1,76 @@
+TaskId,App,Size,InterArrivalTimeSecs
+0,omp,6,0
+1,omp,2,0
+2,omp,4,4
+3,omp,4,27
+4,omp,5,4
+5,omp,6,2
+6,omp,1,6
+7,omp,5,10
+8,omp,7,0
+9,omp,5,0
+10,omp,2,36
+11,omp,7,30
+12,omp,7,3
+13,omp,7,3
+14,omp,6,12
+15,omp,4,7
+16,omp,2,39
+17,omp,3,11
+18,omp,7,11
+19,omp,5,15
+20,omp,2,6
+21,omp,2,3
+22,omp,6,4
+23,omp,3,29
+24,omp,5,1
+25,omp,4,3
+26,omp,6,21
+27,omp,7,25
+28,omp,2,18
+29,omp,4,10
+30,omp,5,45
+31,omp,4,2
+32,omp,2,2
+33,omp,1,8
+34,omp,3,9
+35,omp,6,0
+36,omp,3,12
+37,omp,4,2
+38,omp,7,5
+39,omp,4,0
+40,omp,7,1
+41,omp,1,5
+42,omp,6,3
+43,omp,3,13
+44,omp,2,20
+45,omp,3,4
+46,omp,6,23
+47,omp,7,7
+48,omp,7,13
+49,omp,6,8
+50,omp,3,11
+51,omp,2,1
+52,omp,5,4
+53,omp,7,1
+54,omp,4,15
+55,omp,2,6
+56,omp,3,7
+57,omp,3,8
+58,omp,5,13
+59,omp,2,0
+60,omp,5,22
+61,omp,1,28
+62,omp,6,41
+63,omp,3,11
+64,omp,1,2
+65,omp,1,15
+66,omp,4,2
+67,omp,2,1
+68,omp,2,3
+69,omp,3,7
+70,omp,3,1
+71,omp,7,1
+72,omp,5,13
+73,omp,2,6
+74,omp,1,10
@@ -1,4 +1,9 @@
 from invoke import task
+from tasks.util.elastic import (
+    OPENMP_ELASTIC_FUNCTION,
+    OPENMP_ELASTIC_USER,
+    OPENMP_ELASTIC_WASM,
+)
 from tasks.util.lammps import (
     LAMMPS_FAASM_USER,
     LAMMPS_MIGRATION_NET_DOCKER_WASM,
@@ -20,6 +25,12 @@ def upload(ctx):
             "wasm_function": LAMMPS_FAASM_MIGRATION_NET_FUNC,
             "copies": 1,
         },
+        {
+            "wasm_file": OPENMP_ELASTIC_WASM,
+            "wasm_user": OPENMP_ELASTIC_USER,
+            "wasm_function": OPENMP_ELASTIC_FUNCTION,
+            "copies": 1,
+        },
     ]
 
     upload_wasm(wasm_file_details)
 
@@ -16,12 +16,16 @@
 
 ELASTIC_KERNELS_DOCKER_DIR = join(EXAMPLES_DOCKER_DIR, "Kernels-elastic")
 ELASTIC_KERNELS_WASM_DIR = join(ELASTIC_KERNELS_DOCKER_DIR, "build", "wasm")
-KERNELS_NATIVE_DIR = join(ELASTIC_KERNELS_DOCKER_DIR, "build", "native")
+ELASTIC_KERNELS_NATIVE_DIR = join(ELASTIC_KERNELS_DOCKER_DIR, "build", "native")
 
 OPENMP_ELASTIC_WASM = join(ELASTIC_KERNELS_WASM_DIR, "omp_{}.wasm".format(ELASTIC_KERNEL))
+OPENMP_ELASTIC_NATIVE_BINARY = join(ELASTIC_KERNELS_NATIVE_DIR, "omp_{}.wasm".format(ELASTIC_KERNEL))
 
+# Parameters for the macrobenchmark
+OPENMP_ELASTIC_NUM_LOOPS = 5
 
-def get_elastic_input_data(num_loops=2, native=False):
+
+def get_elastic_input_data(num_loops=OPENMP_ELASTIC_NUM_LOOPS, native=False):
     if native:
         return "-x FAASM_BENCH_PARAMS={}".format(int(num_loops))
 
 
@@ -44,7 +44,7 @@
 
 def get_openmp_kernel_cmdline(kernel, num_threads):
     kernels_cmdline = {
-        # dgemm: iterations, matrix order, tile size (20 iterations fine, 100 long)
+        # dgemm: iterations, order, tile size (20 iterations fine, 100 long)
         "dgemm": "100 2048 32",
         # global: iterations, scramble string length
         # string length must be multiple of num_threads
@@ -68,5 +68,3 @@ def get_openmp_kernel_cmdline(kernel, num_threads):
     }
 
     return "{} {}".format(num_threads, kernels_cmdline[kernel])
-
-
@@ -31,14 +31,16 @@
 # - Slurm: native OpenMPI where we schedule jobs at CPU core granularity
 NATIVE_FT_BASELINES = ["batch-ft", "slurm-ft"]
 NATIVE_BASELINES = ["batch", "slurm"] + NATIVE_FT_BASELINES
+GRANNY_ELASTIC_BASELINES = ["granny-elastic"]
 GRANNY_FT_BASELINES = ["granny-ft"]
 GRANNY_MIGRATE_BASELINES = ["granny-migrate"]
-GRANNY_BASELINES = ["granny"] + GRANNY_MIGRATE_BASELINES + GRANNY_FT_BASELINES
+GRANNY_BASELINES = ["granny"] + GRANNY_MIGRATE_BASELINES + GRANNY_FT_BASELINES + GRANNY_ELASTIC_BASELINES
 ALLOWED_BASELINES = NATIVE_BASELINES + GRANNY_BASELINES
 
 # Workload/Migration related constants
 MPI_MIGRATE_WORKLOADS = ["mpi-migrate", "mpi-evict", "mpi-spot"]
 MPI_WORKLOADS = ["mpi"] + MPI_MIGRATE_WORKLOADS
+OPENMP_WORKLOADS = ["omp", "omp-elastic"]
 
 
 def cum_sum(ts, values):
 
@@ -29,6 +29,7 @@ def get_num_available_slots_from_in_flight_apps(
   num_cpus_per_vm,
   user_id = None,
   num_evicted_vms = None,
+  openmp = False,
 ):
     """
     For Granny baselines, we cannot use static knowledge of the
@@ -68,8 +69,10 @@ def get_num_available_slots_from_in_flight_apps(
 
         # Annoyingly, we may query for the in-flight apps as soon as we
         # schedule them, missing the init stage of the mpi app. Thus we
-        # sleep for a bit and ask again
-        if any([len(app.hostIps) != app.size for app in in_flight_apps.apps]):
+        # sleep for a bit and ask again (we allow the size to go over the
+        # specified size in case of an elsatic scale-up)
+        if any([len(app.hostIps) < app.size for app in in_flight_apps.apps]):
+            print("App not fully in-flight. We wait...")
             sleep(short_sleep_secs)
             continue
 
@@ -111,6 +114,14 @@ def get_num_available_slots_from_in_flight_apps(
                 if worker_occupation[ip] < int(num_cpus_per_vm):
                     worker_occupation[ip] += 1
 
+        # For OpenMP, we only care if any VM has enough slots to run the full
+        # application. Otherwise we wait.
+        if openmp:
+            if num_vms > len(list(worker_occupation.keys())):
+                return num_cpus_per_vm
+
+            return max([num_cpus_per_vm - worker_occupation[ip] for ip in worker_occupation])
+
         num_available_slots = (num_vms - len(list(worker_occupation.keys()))) * num_cpus_per_vm
         for ip in worker_occupation:
             num_available_slots += num_cpus_per_vm - worker_occupation[ip]
Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@ def wasm(ctx, num_threads=None, elastic=False, repeats=1):`
`87`	`87`	`"user": user,`
`88`	`88`	`"function": func,`
`89`	`89`	`"cmdline": cmdline,`
`90`		`- "input_data": get_elastic_input_data(),`
	`90`	`+ "input_data": get_elastic_input_data(num_loops=2),`
`91`	`91`	`"isOmp": True,`
`92`	`92`	`"ompNumThreads": nthread,`
`93`	`93`	`}`