Skip to content

Commit

Permalink
omp: elastic scaling experiment (#41)
Browse files Browse the repository at this point in the history
* elastic: microbenchmark running

* makespan(elastic): working on compose

* makespan(elastic): more fixes in plots

* util: plot fixes

* plots: add legend on top

* plots: more ground work

* docker: bump examples commit after merge

* nits: self-review
  • Loading branch information
csegarragonz authored May 14, 2024
1 parent 4b35b0f commit 2b1ea3a
Show file tree
Hide file tree
Showing 38 changed files with 2,079 additions and 360 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.0
0.6.0
2 changes: 1 addition & 1 deletion bin/workon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export PS1="(faasm-exp-faabric) $PS1"
# Experiment-specific variables
export FAASM_INI_FILE=${PROJ_ROOT}/faasm.ini
export FAASM_WASM_VM=wamr
export FAASM_VERSION=0.26.0
export FAASM_VERSION=0.27.0

popd >> /dev/null

10 changes: 9 additions & 1 deletion docker/faabric-experiments.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ RUN rm -rf /code \
# Checkout to a specific commit, to make sure we do not forget to update it
# when changes occur upstream, and we do not accidentally cache old WASM
# versions
&& git checkout 269557d7244c67d27ec4c98cc72fb04d7af762c8 \
&& git checkout 428a11c80263b82ea8a83157205c4ef0eceab979 \
&& git submodule update --init -f cpp \
&& git submodule update --init -f python \
&& git submodule update --init -f examples/Kernels \
&& git submodule update --init -f examples/Kernels-elastic \
&& git submodule update --init -f examples/lammps \
&& git submodule update --init -f examples/lammps-migration \
&& git submodule update --init -f examples/lammps-migration-net \
Expand All @@ -22,6 +23,13 @@ RUN rm -rf /code \
&& source ./venv/bin/activate \
&& inv kernels --native \
&& inv kernels \
# FIXME: for some reason, build only works if we create these directories
# manually. Annoyingly, the problem can not be reproduced inside the
# container image
&& mkdir -p /code/faasm-examples/examples/Kernels-elastic/build/native \
&& inv kernels --elastic --native --clean \
&& mkdir -p /code/faasm-examples/examples/Kernels-elastic/build/wasm \
&& inv kernels --elastic --clean \
&& inv lammps --native \
&& inv lammps \
&& inv lammps --migration --native \
Expand Down
2 changes: 2 additions & 0 deletions tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging

from tasks.elastic import ns as elastic_ns
from tasks.kernels_mpi import ns as kernels_mpi_ns
from tasks.kernels_omp import ns as kernels_omp_ns
from tasks.lammps import ns as lammps_ns
Expand All @@ -23,6 +24,7 @@
format_code,
)

ns.add_collection(elastic_ns, name="elastic")
ns.add_collection(kernels_mpi_ns, name="kernels-mpi")
ns.add_collection(kernels_omp_ns, name="kernels-omp")
ns.add_collection(lammps_ns, name="lammps")
Expand Down
59 changes: 59 additions & 0 deletions tasks/elastic/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Elastic Scaling Micro-Benchmark

In this experiment we measure the benefits of elastically scaling-up OpenMP
applications to benefit from idle resources. We run a pipe-lined algorithm
on a matrix with a varying number of threads, and at 50% of execution we
scale-up to the maximum number of available threads. This plot is a best-case
scenario for the benefits we can get by elastically scaling-up.

## Granny

First, start the AKS cluster by running:

```bash
inv cluster.provision --vm Standard_D8_v5 --nodes 2 cluster.credentials
```

Second, deploy the Granny cluster:

```bash
faasmctl deploy.k8s --workers=1
```

Third, upload the WASM file:

```bash
inv elastic.wasm.upload
```

and run the experiment with:

```bash
# Without elastic scaling
inv elastic.run

# With elastic scaling
inv elastic.run --elastic
```

## Plot

You may now plot the results using:

```bash
inv elastic.plot
```

## Clean-Up

Finally, delete the Granny cluster:

```bash
faasmctl delete
```

and the AKS cluster:

```bash
inv cluster.delete
```
8 changes: 8 additions & 0 deletions tasks/elastic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from invoke import Collection

from . import native
from . import plot
from . import run
from . import wasm

ns = Collection(native, plot, run, wasm)
61 changes: 61 additions & 0 deletions tasks/elastic/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from os.path import join
from tasks.util.env import PROJ_ROOT

LAMMPS_DIR = join(PROJ_ROOT, "third-party", "lammps")

LAMMPS_IMAGE_NAME = "experiment-lammps"
LAMMPS_DOCKERFILE = join(PROJ_ROOT, "docker", "lammps.dockerfile")

DOCKER_PROJ_ROOT = "/code/experiment-mpi"
DOCKER_LAMMPS_DIR = join(DOCKER_PROJ_ROOT, "third-party", "lammps")
DOCKER_NATIVE_INSTALL_DIR = join(DOCKER_PROJ_ROOT, "build", "native-install")
DOCKER_LAMMPS_BINARY = join(DOCKER_NATIVE_INSTALL_DIR, "bin", "lmp")

LAMMPS_FAASM_USER = "lammps"
LAMMPS_FAASM_FUNC = "main"
LAMMPS_FAASM_DATA_PREFIX = "/lammps-data"

# Define the different benchmarks we run in LAMMPS

BENCHMARKS = {
"lj": {"data": ["bench/in.lj"], "out_file": "compute"},
"compute": {"data": ["bench/in.lj"], "out_file": "compute"},
"compute-xl": {"data": ["bench/in.lj-xl"], "out_file": "compute"},
"compute-xxl": {"data": ["bench/in.lj-xxl"], "out_file": "compute"},
"controller": {
"data": ["examples/controller/in.controller.wall"],
"out_file": "network",
},
"network": {
"data": ["examples/controller/in.controller.wall"],
"out_file": "network",
},
"eam": {"data": ["bench/in.eam", "bench/Cu_u3.eam"], "out_file": "eam"},
"chute": {
"data": ["bench/in.chute", "bench/data.chute"],
"out_file": "chute",
},
"rhodo": {
"data": ["bench/in.rhodo", "bench/data.rhodo"],
"out_file": "rhodo",
},
"chain": {
"data": ["bench/in.chain", "bench/data.chain"],
"out_file": "chain",
},
"short": {
"data": ["examples/controller/in.controller.wall"],
"out_file": "short",
},
}


def get_faasm_benchmark(bench):
if bench not in BENCHMARKS:
print("Unrecognized benchmark: {}".format(bench))
print(
"The supported LAMMPS benchmarks are: {}".format(BENCHMARKS.keys())
)
raise RuntimeError("Unrecognized LAMMPS benchmark")

return BENCHMARKS[bench]
25 changes: 25 additions & 0 deletions tasks/elastic/native.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from invoke import task
from tasks.util.env import FAABRIC_EXP_IMAGE_NAME
from tasks.util.openmpi import deploy_native_mpi, delete_native_mpi


@task
def deploy(ctx, backend="k8s", num_vms=1):
"""
Deploy the native OpenMP k8s cluster
"""
if backend == "k8s":
deploy_native_mpi("openmp", FAABRIC_EXP_IMAGE_NAME, num_vms)
else:
raise RuntimeError("Backend not supported: {}!".format(backend))


@task
def delete(ctx, backend="k8s", num_vms=1):
"""
Deploy the native OpenMP k8s cluster
"""
if backend == "k8s":
delete_native_mpi("openmp", FAABRIC_EXP_IMAGE_NAME, num_vms)
else:
raise RuntimeError("Backend not supported: {}!".format(backend))
72 changes: 72 additions & 0 deletions tasks/elastic/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from glob import glob
from invoke import task
from matplotlib.pyplot import subplots
from os import makedirs
from os.path import join
from pandas import read_csv
from tasks.util.elastic import ELASTIC_PLOTS_DIR, ELASTIC_RESULTS_DIR
from tasks.util.env import SYSTEM_NAME
from tasks.util.plot import SINGLE_COL_FIGSIZE, save_plot


def _read_results():
result_dict = {}

for csv in glob(join(ELASTIC_RESULTS_DIR, "openmp_*.csv")):
results = read_csv(csv)
baseline = csv.split("_")[1]

groupped_results = results.groupby("NumThreads", as_index=False)
if baseline not in result_dict:
result_dict[baseline] = {}

for nt in groupped_results.mean()["NumThreads"].to_list():
index = groupped_results.mean()["NumThreads"].to_list().index(nt)
result_dict[baseline][nt] = {
"mean": groupped_results.mean()["ExecTimeSecs"].to_list()[
index
],
"sem": groupped_results.sem()["ExecTimeSecs"].to_list()[index],
}

return result_dict


@task(default=True)
def plot(ctx):
"""
Plot the slowdown of OpenMP's ParRes kernels
"""
results = _read_results()
print(results)
makedirs(ELASTIC_PLOTS_DIR, exist_ok=True)
fig, ax = subplots(figsize=SINGLE_COL_FIGSIZE)

assert len(results["elastic"]) == len(results["no-elastic"]), "Results mismatch! (elastic: {} - no-elastic: {})".format(len(results["elastic"]), len(results["no-elastic"]))

xs = list(results["elastic"].keys())
ys = [
float(results["no-elastic"][x]["mean"] / results["elastic"][x]["mean"])
for x in xs
]

ax.bar(
xs,
ys,
edgecolor="black",
)

# Labels
ax.set_xticks(xs)

# Horizontal line at slowdown of 1
xlim_left = 0.5
xlim_right = len(xs) + 0.5
ax.hlines(1, xlim_left, xlim_right, linestyle="dashed", colors="red")

ax.set_xlim(left=xlim_left, right=xlim_right)
ax.set_ylim(bottom=0)
ax.set_xlabel("Number of OpenMP threads")
ax.set_ylabel("Speed-Up \n [No-Elastic / Elastic]")

save_plot(fig, ELASTIC_PLOTS_DIR, "elastic_speedup")
110 changes: 110 additions & 0 deletions tasks/elastic/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from faasmctl.util.planner import get_available_hosts
from faasmctl.util.planner import reset as reset_planner, set_planner_policy
from invoke import task
from os import makedirs
from os.path import join
from tasks.util.faasm import (
get_faasm_exec_time_from_json,
post_async_msg_and_get_result_json,
)
from tasks.util.elastic import (
ELASTIC_KERNEL,
ELASTIC_RESULTS_DIR,
OPENMP_ELASTIC_FUNCTION,
OPENMP_ELASTIC_USER,
get_elastic_input_data,
)
from tasks.util.kernels import get_openmp_kernel_cmdline

EXPECTED_NUM_VMS = 1
TOTAL_NUM_THREADS = [1, 2, 3, 4, 5, 6, 7, 8]


def _init_csv_file(csv_name):
makedirs(ELASTIC_RESULTS_DIR, exist_ok=True)

result_file = join(ELASTIC_RESULTS_DIR, csv_name)
with open(result_file, "w") as out_file:
out_file.write("NumThreads,Run,ExecTimeSecs\n")


def _write_csv_line(csv_name, num_threads, run, exec_time):
result_file = join(ELASTIC_RESULTS_DIR, csv_name)
with open(result_file, "a") as out_file:
out_file.write("{},{},{}\n".format(num_threads, run, exec_time))


def has_execution_failed(results_json):
for result in results_json:
if "returnValue" in result and result["returnValue"] != 0:
return True

if "output_data" in result:
if "ERROR" in result["output_data"]:
return True
if "Call failed" in result["output_data"]:
return True

return False


@task(default=True)
def wasm(ctx, num_threads=None, elastic=False, repeats=1):
"""
Run the OpenMP Kernels
"""
set_planner_policy("bin-pack")

avail_hosts = get_available_hosts().hosts
num_vms = len(avail_hosts)
"""
assert num_vms == EXPECTED_NUM_VMS, "Expected {} VMs got: {}!".format(
EXPECTED_NUM_VMS, num_vms
)
"""

if num_threads is not None:
num_threads = [num_threads]
else:
num_threads = TOTAL_NUM_THREADS

reset_planner(num_vms)

csv_name = "openmp_{}_granny.csv".format("elastic" if elastic else "no-elastic")
_init_csv_file(csv_name)

for nthread in num_threads:
for r in range(int(repeats)):
print(
"Running OpenMP elastic experiment with {} threads (elastic: {} - repeat: {}/{})".format(
nthread, elastic, r + 1, repeats
)
)
user = OPENMP_ELASTIC_USER
func = OPENMP_ELASTIC_FUNCTION
cmdline = get_openmp_kernel_cmdline(ELASTIC_KERNEL, nthread)
msg = {
"user": user,
"function": func,
"cmdline": cmdline,
"input_data": get_elastic_input_data(num_loops=2),
"isOmp": True,
"ompNumThreads": nthread,
}
req = {
"user": user,
"function": func,
"singleHostHint": True,
"elasticScaleHint": elastic,
}

# Note that when executing with just two iterations, the first one
# will always be pre-loaded by the planner (so not elastically
# scaled) thus naturally fitting the goal of our plot
result_json = post_async_msg_and_get_result_json(msg, req_dict=req)
actual_time = get_faasm_exec_time_from_json(
result_json, check=True
)
_write_csv_line(csv_name, nthread, r, actual_time)
# TODO: delete me
print("Actual time: {}".format(actual_time))
Loading

0 comments on commit 2b1ea3a

Please sign in to comment.