diff --git a/.gitignore b/.gitignore index 3f796ef..fa9cd16 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ workflow/src/legendsimflow/_version.py .snakemake /inputs +/simflow-config.yaml generated tests/dummyprod/inputs/hardware tests/dummyprod/inputs/datasets diff --git a/docs/source/manual/prod.md b/docs/source/manual/prod.md index 95174e3..33b13e0 100644 --- a/docs/source/manual/prod.md +++ b/docs/source/manual/prod.md @@ -74,18 +74,21 @@ in the configuration file) for each simulation. Once the production is over, the results can be summarized via the `print_benchmark_stats` rule: ```console -> snakemake -q all print_benchmark_stats -simid CPU time [ms/ev] evts / 1h jobs (1h) / 10^8 evts ------ ---------------- --------- --------------------- -stp.birds-nest-K40 (13s) 2.79 1288475 77 -stp.birds-nest-Ra224-to-Pb208 (191s) 38.33 93916 1064 -stp.fiber-support-copper-Co60 (223s) 44.69 80558 1241 -... ... ... ... +> snakemake print_benchmark_stats +simid runtime [sec] speed (hot loop) [ev/sec] evts / 1h jobs (1h) / 10^8 evts +----- ------------- ------------------------- --------- --------------------- +stp.sis1_z8430_slot2_Bi212_to_Pb208 139.0 717.70 2583720 38 +stp.sis1_z8580_slot2_Pb214_to_Po214 167.0 596.99 2149164 46 +stp.sis1_z8630_slot2_Bi212_to_Pb208 135.0 740.46 2665656 37 +... ... ... ... ... ``` +Which computes statistics by inspecting the `stp`-tier (_remage_) logs. + :::{note} -The CPU time is a good measure of the actual simulation time, since other tasks -(e.g. application loading) are typically not CPU intensive. +The benchmarking statistics refer exclusively to the hot Geant4 simulation loop. +Overheads such as application initialization or remage built-in post processing +are not taken into account. ::: diff --git a/docs/source/manual/setup.md b/docs/source/manual/setup.md index 988c99f..568358b 100644 --- a/docs/source/manual/setup.md +++ b/docs/source/manual/setup.md @@ -1,9 +1,23 @@ # Installation and configuration +Clone [legend-simflow](https://github.com/legend-exp/legend-simflow) and give it +a custom name: + +```console +> git clone git@github.com:legend-exp/legend-simflow +``` + +We recommend tagging the production cycle with a version number to be used as +folder name (e.g. `path/to/productions/v1.0.0`). + +Before a simulation production can be run, the user must configure the run with +a dedicated file and install the required software dependencies. + ## The configuration file -The `simflow-config.yaml` file in the production directory allows to customize -the workflow in great detail. Here's a basic description of its fields: +The `simflow-config.yaml` file resides in the production directory (the root of +the GitHub repository) and allows to customize the workflow in great detail. +Here's a basic description of its fields: - `experiment`: labels the experiment to be simulated. The same name is used in the metadata to label the corresponding configuration files. @@ -34,6 +48,15 @@ Snakemake's `--config` option. ::: +For a quick start, just copy over the default configuration file from the +templates: + +```console +cp templates/default.yaml simflow-config.yaml +``` + +and customize it. + ## Software dependencies The first step is obtaining the software, which is fully specified by the diff --git a/pyproject.toml b/pyproject.toml index b634f47..f85c628 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -171,29 +171,29 @@ legend-simflow = { path = ".", editable = true } [tool.pixi.dependencies] # legend-simflow core dependencies awkward = "*" -dbetto = ">=1.3.2" +dbetto = ">=1.3.2,<1.4" # legend-dataflow-scripts = "..." # not on conda-forge legend-pydataobj = "*" -legend-pygeom-l200 = ">=0.8" -legend-pygeom-tools = ">=0.1" +legend-pygeom-l200 = ">=0.8,<0.9" +legend-pygeom-tools = ">=0.1,<0.2" numpy = "*" -pylegendmeta = ">=1.3.3" +pylegendmeta = ">=1.3.3,<1.4" # execution -snakemake = ">=8.16" +snakemake = ">=8.16,<9" snakemake-storage-plugin-fs = "*" # tier hit -legend-pygeom-hpges = ">=0.9" +legend-pygeom-hpges = ">=0.9,<0.10" pyg4ometry = "*" -pygama = ">=2.2.3" -reboost = ">=0.8.3" +pygama = ">=2.2.3,<2.3" +reboost = ">=0.8.3,<0.9" # tier stp remage = ">=0.16,<0.17" # drift-time maps and other SSD.jl jobs -julia = ">=1.12" +julia = ">=1.12,<1.13" h5py = "*" hdf5 = "*" -revertex = ">=0.1.2" +revertex = ">=0.1.2,<0.2" diff --git a/simflow-config.yaml b/templates/default.yaml similarity index 100% rename from simflow-config.yaml rename to templates/default.yaml diff --git a/workflow/src/legendsimflow/metadata.py b/workflow/src/legendsimflow/metadata.py index 9a6a80d..eef1ac2 100644 --- a/workflow/src/legendsimflow/metadata.py +++ b/workflow/src/legendsimflow/metadata.py @@ -15,7 +15,6 @@ from __future__ import annotations -import hashlib import json import logging import re @@ -80,8 +79,10 @@ def hash_dict(d: dict | AttrsDict) -> str: if isinstance(d, AttrsDict): d = d.to_dict() - s = json.dumps(d, sort_keys=True) - return hashlib.sha256(s.encode()).hexdigest() + return json.dumps(d, sort_keys=True) + + # NOTE: alternatively, return sha256 (shorter string but bad for diffs) + # return hashlib.sha256(s.encode()).hexdigest() def smk_hash_simconfig( diff --git a/workflow/src/legendsimflow/scripts/print_benchmark_stats.py b/workflow/src/legendsimflow/scripts/print_benchmark_stats.py index c4baca4..693ae95 100644 --- a/workflow/src/legendsimflow/scripts/print_benchmark_stats.py +++ b/workflow/src/legendsimflow/scripts/print_benchmark_stats.py @@ -1,3 +1,5 @@ +# ruff: noqa: I002, T201 + # Copyright (C) 2023 Luigi Pertoldi # # This program is free software: you can redistribute it and/or modify it under @@ -13,40 +15,99 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -# ruff: noqa: F821, T201 -from __future__ import annotations - -import csv -from pathlib import Path +import re +from datetime import timedelta +from statistics import mean -import legenddataflowscripts as ldfs +from legendsimflow import nersc def printline(*line): - print("{:<52}{:>16}{:>11}{:>23}".format(*line)) + print("{:<52}{:>16}{:>27}{:>11}{:>23}".format(*line)) + + +args = nersc.dvs_ro_snakemake(snakemake) # noqa: F821 + +speed_pattern = re.compile( + r"^.*Stats: average event processing time was\s+" + r"([0-9]+(?:\.[0-9]+)?)\s+seconds/event\s+=\s+" + r"([0-9]+(?:\.[0-9]+)?)\s+events/second\s*$", + re.MULTILINE, +) + +nev_pattern = re.compile( + r"^.*Run nr\. \d+ completed\. (\d+) events simulated\.", re.MULTILINE +) +time_pattern = re.compile( + r"^.*Stats: run time was (\d+) days, (\d+) hours, (\d+) minutes and (\d+) seconds$", + re.MULTILINE, +) -printline("simid", "CPU time [ms/ev]", "evts / 1h", "jobs (1h) / 10^8 evts") -printline("-----", "----------------", "---------", "---------------------") +# have a look at the latest run +logdir = (nersc.dvs_ro(args.config, args.config.paths.log) / "benchmark").resolve() -bdir = Path(ldfs.as_ro(snakemake.config, snakemake.config.paths.benchmarks)) +if not logdir.is_dir(): + msg = "no benchmark run available!" + raise RuntimeError(msg) -for simd in sorted(bdir.glob("*/*")): - if simd.parent.name not in ("ver", "stp"): +printline( + "simid", + "runtime [sec]", + "speed (hot loop) [ev/sec]", + "evts / 1h", + "jobs (1h) / 10^8 evts", +) +printline( + "-----", + "-------------", + "-------------------------", + "---------", + "---------------------", +) + +for simd in sorted(logdir.glob("*/*")): + # this code works only for remage output + if simd.parent.name != "stp": continue - data = {"cpu_time": 0} - for jobd in simd.glob("*.tsv"): - with jobd.open(newline="") as f: - this_data = next(iter(csv.DictReader(f, delimiter="\t"))) - data["cpu_time"] += float(this_data["cpu_time"]) + speed = 0 + runtime = 0 + for jobd in simd.glob("*.log"): + with jobd.open("r", encoding="utf-8") as f: + # read the full file in memory (assuming it can't be huge) + data = f.read() + + # extract events/sec for each thread + time = [ + float(m.group(2)) for m in speed_pattern.finditer(data) if m is not None + ] + + # simulations might have crashed or still running + if time == []: + runtime = "..." + speed = "..." + + # get the number of simulated events for each thread (it's always the same) + nev = int(nev_pattern.search(data).group(1)) + + # get the runtime of each thread + runtimes = [ + timedelta( + days=int(d), hours=int(h), minutes=int(mi), seconds=int(s) + ).total_seconds() + for d, h, mi, s in time_pattern.findall(data) + ] + + runtime = mean(runtimes) + speed += mean(time) - speed = data["cpu_time"] / snakemake.config.benchmark.n_primaries[simd.parent.name] - evts_1h = int(60 * 60 / speed) if speed > 0 else "..." + evts_1h = int(speed * 60 * 60) if speed > 0 else "..." njobs = int(1e8 / evts_1h) if not isinstance(evts_1h, str) else 0 printline( simd.parent.name + "." + simd.name, - "({:}s) {:.2f}".format(int(data["cpu_time"]), 1000 * speed), + ("!!! " if runtime < 10 else "") + f"{runtime:.1f}", + f"{speed:.2f}", evts_1h, njobs, ) diff --git a/workflow/src/legendsimflow/utils.py b/workflow/src/legendsimflow/utils.py index d4805eb..31a67f6 100644 --- a/workflow/src/legendsimflow/utils.py +++ b/workflow/src/legendsimflow/utils.py @@ -91,11 +91,17 @@ def _make_path(d): # I have verified only that this variable is visible in scripts (not shell directives) os.environ["MPLCONFIGDIR"] = f"{workflow.basedir}/src/legendsimflow" + proctime = ( + "benchmark" + if config.benchmark.enabled + else datetime.now().strftime("%Y%m%dT%H%M%SZ") + ) + return AttrsDict( { "config": config, "basedir": workflow.basedir, - "proctime": datetime.now().strftime("%Y%m%dT%H%M%SZ"), + "proctime": proctime, } )