diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 930fe48e..a2137dae 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -9,32 +9,15 @@ on: - main jobs: - python_format: + code_formatting: runs-on: ubuntu-24.04 container: - image: rootproject/root:6.32.00-ubuntu24.04 - options: --user 0 # run as root - + image: kingmakerimages/kingmaker_standalone:V1 + options: --user 0 steps: - - uses: actions/checkout@v3 - - - name: apt update - run: apt-get -y update - - - name: Install missing software - run: apt-get install -y git python3-pip python3-venv - - - name: Create venv - shell: bash - run: | - cd $GITHUB_WORKSPACE - python3 -m venv venv - source venv/bin/activate - pip install black==24.4.2 - - - name: Check Python formatting - shell: bash + - name: Clone project + uses: actions/checkout@v4 + - name: Run all formatting checks run: | - cd $GITHUB_WORKSPACE - source venv/bin/activate - bash scripts/python-formatting.sh + git config --global --add safe.directory '*' + bash ./checks/python-formatting.sh diff --git a/.gitignore b/.gitignore index 3f59ee5f..059bb190 100644 --- a/.gitignore +++ b/.gitignore @@ -8,13 +8,12 @@ tarballs/ workdir/ logs/ *.tar.gz -*.txt miniconda/ miniforge/ quantities_map/ output/ -CROWN/ .nfs* +*.txt #luigid pickle luigid_state.pickle @@ -34,6 +33,7 @@ dist/ downloads/ eggs/ .eggs/ +.conda/ lib/ lib64/ parts/ diff --git a/.gitmodules b/.gitmodules index c5036b36..ff16f99f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,6 +4,6 @@ [submodule "sample_database"] path = sample_database url = git@github.com:KIT-CMS/KingMaker_sample_database.git -[submodule "kingmaker-images"] - path = kingmaker-images - url = git@github.com:KIT-CMS/kingmaker-images.git +[submodule "CROWN"] + path = CROWN + url = git@github.com:KIT-CMS/CROWN.git diff --git a/CROWN b/CROWN new file mode 160000 index 00000000..b47d1b86 --- /dev/null +++ b/CROWN @@ -0,0 +1 @@ +Subproject commit b47d1b8649d7a859e34f7d09678f070f572ee9b9 diff --git a/README.md b/README.md index ac1cbd32..56291205 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,13 @@ # KingMaker - -[![Analysis Containers](https://github.com/KIT-CMS/kingmaker-images/actions/workflows/deploy-base-images.yml/badge.svg?branch=main)](https://github.com/KIT-CMS/kingmaker-images/actions/workflows/deploy-base-images.yml) - KingMaker is the workflow management for producing ntuples with the [CROWN](https://github.com/KIT-CMS/CROWN) framework. The workflow management is based on [law](https://github.com/riga/law), which is using [luigi](https://github.com/spotify/luigi) as backend. **⚠ Important: A detailed description of the KingMaker workflow to produce NTuples can be found in the [CROWN documentation](https://crown.readthedocs.io/en/latest/kingmaker.html#).** + + +## 🛠 Infrastructure & Containers + +KingMaker can run within container environments to ensure reproducibility. + +* **Container Images and Environments**: Dockerfiles and Conda environment specifications are located in the [`/containers`](./containers) directory. +* **Usage**: For instructions on building custom images or using existing ones from CVMFS, see the [Container Documentation](./containers/README.md). diff --git a/checks/format_ignore.txt b/checks/format_ignore.txt new file mode 100644 index 00000000..e69de29b diff --git a/checks/python-formatting.sh b/checks/python-formatting.sh new file mode 100644 index 00000000..4826db2b --- /dev/null +++ b/checks/python-formatting.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Configuration +APPLY_FIXES=false + +# Parse CLI arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --apply) APPLY_FIXES=true ;; + *) echo "Unknown parameter: $1"; exit 1 ;; + esac + shift +done + +echo "🔍 Finding Python files ..." + +# Find tracked python files +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +FILES=$(git ls-files | grep -E '\.py$' | grep -vEf "$SCRIPT_DIR/format_ignore.txt") + +if [[ -z "$FILES" ]]; then + echo "No Python files found." + exit 0 +fi + +if [ "$APPLY_FIXES" = true ]; then + echo "🛠 Running black to fix formatting..." + # Black formats in-place by default + echo "$FILES" | xargs black + echo "✅ Python formatting complete." +else + echo "🧪 Checking Python formatting (Dry Run)..." + + # --check: return exit code 1 if files need formatting + # --diff: show what would change + # --quiet: reduce noise, we only want the diffs/errors + if echo "$FILES" | xargs black --check --diff; then + echo "✨ Python code is looking good!" + exit 0 + else + echo "------------------------------------------------------" + echo "❌ ERROR: Python formatting violations found." + echo "Run this script with --apply to fix them." + echo "------------------------------------------------------" + exit 1 + fi +fi diff --git a/containers/Base_env.yml b/containers/Base_env.yml new file mode 100644 index 00000000..2e4dc58f --- /dev/null +++ b/containers/Base_env.yml @@ -0,0 +1,302 @@ +channels: + - conda-forge +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - _sysroot_linux-64_curr_repodata_hack=3=h69a702a_14 + - afterimage=1.21=h28ea39c_1005 + - anyio=4.4.0=pyhd8ed1ab_0 + - argon2-cffi=23.1.0=pyhd8ed1ab_0 + - argon2-cffi-bindings=21.2.0=py39hd1e30aa_4 + - arrow=1.3.0=pyhd8ed1ab_0 + - asttokens=2.4.1=pyhd8ed1ab_0 + - async-lru=2.0.4=pyhd8ed1ab_0 + - atk-1.0=2.38.0=h04ea711_2 + - attrs=23.2.0=pyh71513ae_0 + - babel=2.14.0=pyhd8ed1ab_0 + - beautifulsoup4=4.12.3=pyha770c72_0 + - binutils=2.40=h4852527_6 + - binutils_impl_linux-64=2.40=ha1999f0_6 + - binutils_linux-64=2.40=hb3c18ed_7 + - bleach=6.1.0=pyhd8ed1ab_0 + - brotli-python=1.1.0=py39h3d6467e_1 + - bzip2=1.0.8=hd590300_5 + - c-ares=1.28.1=hd590300_0 + - c-compiler=1.7.0=hd590300_1 + - ca-certificates=2024.6.2=hbcca054_0 + - cached-property=1.5.2=hd8ed1ab_1 + - cached_property=1.5.2=pyha770c72_1 + - cairo=1.18.0=h3faef2a_0 + - certifi=2024.6.2=pyhd8ed1ab_0 + - cffi=1.16.0=py39h7a31438_0 + - cfitsio=4.3.1=hbdc6101_0 + - cgsi-gsoap=1.3.11=h410d046_8 + - charset-normalizer=3.3.2=pyhd8ed1ab_0 + - colorama=0.4.6=pyhd8ed1ab_0 + - comm=0.2.2=pyhd8ed1ab_0 + - compilers=1.7.0=ha770c72_1 + - cxx-compiler=1.7.0=h00ab1b0_1 + - cyrus-sasl=2.1.27=h54b06d7_7 + - davix=0.8.6=h5f3b820_0 + - dcap=2.47.14=h481617c_2 + - debugpy=1.8.1=py39h3d6467e_0 + - decorator=5.1.1=pyhd8ed1ab_0 + - defusedxml=0.7.1=pyhd8ed1ab_0 + - entrypoints=0.4=pyhd8ed1ab_0 + - exceptiongroup=1.2.0=pyhd8ed1ab_2 + - executing=2.0.1=pyhd8ed1ab_0 + - expat=2.6.2=h59595ed_0 + - fftw=3.3.10=nompi_hf1063bd_110 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=h77eed37_2 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - fortran-compiler=1.7.0=heb67821_1 + - fqdn=1.5.1=pyhd8ed1ab_0 + - freetype=2.12.1=h267a509_2 + - fribidi=1.0.10=h36c2ea0_0 + - ftgl=2.4.0=hbcb1f35_0 + - gcc=12.3.0=h915e2ae_10 + - gcc_impl_linux-64=12.3.0=h58ffeeb_10 + - gcc_linux-64=12.3.0=h6b3dd4b_7 + - gct=6.2.1705709074=h6bbaf85_0 + - gdk-pixbuf=2.42.12=hb9ae30d_0 + - gettext=0.22.5=h59595ed_2 + - gettext-tools=0.22.5=h59595ed_2 + - gfal2=2.22.2=h40185fc_0 + - gfal2-util=1.8.1=pyhd8ed1ab_0 + - gfortran=12.3.0=h915e2ae_10 + - gfortran_impl_linux-64=12.3.0=h8f2110c_10 + - gfortran_linux-64=12.3.0=h5877db1_7 + - giflib=5.2.2=hd590300_0 + - gl2ps=1.4.2=hae5d5c5_1 + - glew=2.1.0=h9c3ff4c_2 + - graphite2=1.3.13=h59595ed_1003 + - graphviz=9.0.0=h78e8752_1 + - gsl=2.7=he838d99_0 + - gsoap=2.8.123=h8dc497d_0 + - gtest=1.14.0=h00ab1b0_1 + - gtk2=2.24.33=h280cfa0_4 + - gts=0.7.6=h977cf35_4 + - gxx=12.3.0=h915e2ae_10 + - gxx_impl_linux-64=12.3.0=h2a574ab_10 + - gxx_linux-64=12.3.0=ha28b414_7 + - h11=0.14.0=pyhd8ed1ab_0 + - h2=4.1.0=pyhd8ed1ab_0 + - harfbuzz=8.5.0=hfac3d4d_0 + - hpack=4.0.0=pyh9f0ad1d_0 + - httpcore=1.0.5=pyhd8ed1ab_0 + - httpx=0.27.0=pyhd8ed1ab_0 + - hyperframe=6.0.1=pyhd8ed1ab_0 + - icu=73.2=h59595ed_0 + - idna=3.7=pyhd8ed1ab_0 + - importlib-metadata=7.1.0=pyha770c72_0 + - importlib_metadata=7.1.0=hd8ed1ab_0 + - importlib_resources=6.4.0=pyhd8ed1ab_0 + - ipykernel=6.29.4=pyh3099207_0 + - ipyparallel=8.8.0=pyhd8ed1ab_0 + - ipython=8.18.1=pyh707e725_3 + - isoduration=20.11.0=pyhd8ed1ab_0 + - jedi=0.19.1=pyhd8ed1ab_0 + - jinja2=3.1.4=pyhd8ed1ab_0 + - json-c=0.17=h7ab15ed_0 + - json5=0.9.25=pyhd8ed1ab_0 + - jsonpointer=3.0.0=py39hf3d152e_0 + - jsonschema=4.22.0=pyhd8ed1ab_0 + - jsonschema-specifications=2023.12.1=pyhd8ed1ab_0 + - jsonschema-with-format-nongpl=4.22.0=pyhd8ed1ab_0 + - jupyter-lsp=2.2.5=pyhd8ed1ab_0 + - jupyter_client=8.6.2=pyhd8ed1ab_0 + - jupyter_core=5.7.2=py39hf3d152e_0 + - jupyter_events=0.10.0=pyhd8ed1ab_0 + - jupyter_server=2.14.1=pyhd8ed1ab_0 + - jupyter_server_terminals=0.5.3=pyhd8ed1ab_0 + - jupyterlab=4.2.2=pyhd8ed1ab_0 + - jupyterlab_pygments=0.3.0=pyhd8ed1ab_1 + - jupyterlab_server=2.27.2=pyhd8ed1ab_0 + - kernel-headers_linux-64=3.10.0=h4a8ded7_14 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.21.2=h659d440_0 + - ld_impl_linux-64=2.40=hf3520f5_6 + - lerc=4.0.0=h27087fc_0 + - libasprintf=0.22.5=h661eb56_2 + - libasprintf-devel=0.22.5=h661eb56_2 + - libblas=3.9.0=22_linux64_openblas + - libboost-python=1.84.0=py39h85c637f_3 + - libcblas=3.9.0=22_linux64_openblas + - libcurl=8.8.0=hca28451_0 + - libcxx=16.0.6=h00ab1b0_0 + - libcxxabi=16.0.6=ha770c72_0 + - libdeflate=1.20=hd590300_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=hd590300_2 + - libexpat=2.6.2=h59595ed_0 + - libffi=3.4.2=h7f98852_5 + - libgcc-devel_linux-64=12.3.0=h6b66f73_110 + - libgcc-ng=13.2.0=h77fa898_10 + - libgd=2.3.3=h119a65a_9 + - libgettextpo=0.22.5=h59595ed_2 + - libgettextpo-devel=0.22.5=h59595ed_2 + - libgfortran-ng=13.2.0=h69a702a_10 + - libgfortran5=13.2.0=h3d2ce59_10 + - libglib=2.80.2=h8a4344b_1 + - libglu=9.0.0=hac7e632_1003 + - libgomp=13.2.0=h77fa898_10 + - libhwloc=2.10.0=default_h5622ce7_1001 + - libiconv=1.17=hd590300_2 + - libjpeg-turbo=3.0.0=hd590300_1 + - liblapack=3.9.0=22_linux64_openblas + - libllvm13=13.0.1=hf817b99_2 + - libllvm14=14.0.6=hcd5def8_4 + - libnghttp2=1.58.0=h47da74e_1 + - libnsl=2.0.1=hd590300_0 + - libntlm=1.4=h7f98852_1002 + - libopenblas=0.3.27=pthreads_h413a1c8_0 + - libpng=1.6.43=h2797004_0 + - librsvg=2.58.0=hadf69e7_1 + - libsanitizer=12.3.0=hb8811af_10 + - libsodium=1.0.18=h36c2ea0_1 + - libsqlite=3.46.0=hde9e2c9_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-devel_linux-64=12.3.0=h6b66f73_110 + - libstdcxx-ng=13.2.0=hc0a3c3a_10 + - libtiff=4.6.0=h1dd3fc0_3 + - libtool=2.4.7=h27087fc_0 + - libuuid=2.38.1=h0b41bf4_0 + - libwebp=1.4.0=h2c329e2_0 + - libwebp-base=1.4.0=hd590300_0 + - libxcb=1.15=h0b41bf4_0 + - libxcrypt=4.4.36=hd590300_1 + - libxml2=2.12.7=hc051c1a_1 + - libzlib=1.3.1=h4ab18f5_1 + - llvmlite=0.42.0=py39h174d805_1 + - lz4-c=1.9.4=hcb278e6_0 + - markupsafe=2.1.5=py39hd1e30aa_0 + - matplotlib-inline=0.1.7=pyhd8ed1ab_0 + - metakernel=0.30.2=pyhd8ed1ab_0 + - mistune=3.0.2=pyhd8ed1ab_0 + - nbclient=0.10.0=pyhd8ed1ab_0 + - nbconvert-core=7.16.4=pyhd8ed1ab_1 + - nbformat=5.10.4=pyhd8ed1ab_0 + - ncurses=6.5=h59595ed_0 + - nest-asyncio=1.6.0=pyhd8ed1ab_0 + - nlohmann_json=3.11.2=h27087fc_0 + - notebook=7.2.1=pyhd8ed1ab_0 + - notebook-shim=0.2.4=pyhd8ed1ab_0 + - numba=0.59.1=py39h615d6bd_0 + - numpy=1.26.4=py39h474f0d3_0 + - openldap=2.6.8=hedd0468_0 + - openssl=3.3.1=h4ab18f5_0 + - overrides=7.7.0=pyhd8ed1ab_0 + - packaging=24.1=pyhd8ed1ab_0 + - pandocfilters=1.5.0=pyhd8ed1ab_0 + - pango=1.54.0=h84a9a3c_0 + - parso=0.8.4=pyhd8ed1ab_0 + - pcre=8.45=h9c3ff4c_0 + - pcre2=10.44=h0f59acf_0 + - perl=5.32.1=7_hd590300_perl5 + - pexpect=4.9.0=pyhd8ed1ab_0 + - pickleshare=0.7.5=py_1003 + - pip=24.0=pyhd8ed1ab_0 + - pixman=0.43.2=h59595ed_0 + - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1 + - platformdirs=4.2.2=pyhd8ed1ab_0 + - portalocker=2.8.2=py39hf3d152e_1 + - prometheus_client=0.20.0=pyhd8ed1ab_0 + - prompt-toolkit=3.0.47=pyha770c72_0 + - psutil=5.9.8=py39hd1e30aa_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - ptyprocess=0.7.0=pyhd3deb0d_0 + - pugixml=1.14=h59595ed_0 + - pure_eval=0.2.2=pyhd8ed1ab_0 + - pycparser=2.22=pyhd8ed1ab_0 + - pygments=2.18.0=pyhd8ed1ab_0 + - pysocks=1.7.1=pyha2e5f31_6 + - pythia8=8.310=py39h3d6467e_0 + - python=3.9.19=h0755675_0_cpython + - python-fastjsonschema=2.20.0=pyhd8ed1ab_0 + - python-gfal2=1.12.2=py39hf65e428_2 + - python-json-logger=2.0.7=pyhd8ed1ab_0 + - python_abi=3.9=4_cp39 + - pytz=2024.1=pyhd8ed1ab_0 + - pyyaml=6.0.1=py39hd1e30aa_1 + - pyzmq=26.0.3=py39ha1047a2_0 + - readline=8.2=h8228510_1 + - referencing=0.35.1=pyhd8ed1ab_0 + - requests=2.32.3=pyhd8ed1ab_0 + - rfc3339-validator=0.1.4=pyhd8ed1ab_0 + - rfc3986-validator=0.1.1=pyh9f0ad1d_0 + - root=6.30.4=py39hddac248_0 + - root_base=6.30.4=py39h479b7f5_0 + - rpds-py=0.18.1=py39ha68c5e3_0 + - scitokens-cpp=1.0.2=haea88ab_0 + - send2trash=1.8.3=pyh0d859eb_0 + - setuptools=70.0.0=pyhd8ed1ab_0 + - six=1.16.0=pyh6c4a22f_0 + - sniffio=1.3.1=pyhd8ed1ab_0 + - soupsieve=2.5=pyhd8ed1ab_1 + - srm-ifce=1.24.6=h3b26d37_2 + - stack_data=0.6.2=pyhd8ed1ab_0 + - sysroot_linux-64=2.17=h4a8ded7_14 + - tbb=2021.12.0=h297d8ca_1 + - terminado=0.18.1=pyh0d859eb_0 + - tinycss2=1.3.0=pyhd8ed1ab_0 + - tk=8.6.13=noxft_h4845f30_101 + - tomli=2.0.1=pyhd8ed1ab_0 + - tornado=6.4.1=py39hd3abc70_0 + - tqdm=4.66.4=pyhd8ed1ab_0 + - traitlets=5.14.3=pyhd8ed1ab_0 + - types-python-dateutil=2.9.0.20240316=pyhd8ed1ab_0 + - typing-extensions=4.12.2=hd8ed1ab_0 + - typing_extensions=4.12.2=pyha770c72_0 + - typing_utils=0.1.0=pyhd8ed1ab_0 + - tzdata=2024a=h0c530f3_0 + - uri-template=1.3.0=pyhd8ed1ab_0 + - urllib3=2.2.1=pyhd8ed1ab_0 + - vdt=0.4.4=h59595ed_0 + - vector-classes=1.4.4=h00ab1b0_0 + - vim=9.1.0356=py39pl5321ha23271e_0 + - voms=2.1.0rc3=h25bd2b9_0 + - wcwidth=0.2.13=pyhd8ed1ab_0 + - webcolors=24.6.0=pyhd8ed1ab_0 + - webencodings=0.5.1=pyhd8ed1ab_2 + - websocket-client=1.8.0=pyhd8ed1ab_0 + - wheel=0.43.0=pyhd8ed1ab_1 + - xorg-fixesproto=5.0=h7f98852_1002 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.1.1=hd590300_0 + - xorg-libsm=1.2.4=h7391055_0 + - xorg-libx11=1.8.9=h8ee46fc_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxcursor=1.2.0=h0b41bf4_1 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxfixes=5.0.3=h7f98852_1004 + - xorg-libxft=2.3.8=hf69aa0a_0 + - xorg-libxpm=3.5.17=hd590300_0 + - xorg-libxrender=0.9.11=hd590300_0 + - xorg-libxt=1.3.0=hd590300_1 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xrootd=5.6.8=py39h2326971_0 + - xxhash=0.8.2=hd590300_0 + - xz=5.2.6=h166bdaf_0 + - yaml=0.2.5=h7f98852_2 + - zeromq=4.3.5=h75354e8_4 + - zipp=3.19.2=pyhd8ed1ab_0 + - zlib=1.3.1=h4ab18f5_1 + - zstd=1.5.6=ha6fb4c9_0 + - pip: + - docutils==0.21.2 + - lockfile==0.12.2 + - luigi==3.5.1 + - markdown-it-py==3.0.0 + - mdurl==0.1.2 + - python-daemon==3.0.1 + - python-dateutil==2.9.0.post0 + - rich==13.7.1 + - tenacity==8.3.0 diff --git a/containers/Dockerfile b/containers/Dockerfile new file mode 100644 index 00000000..b92b4526 --- /dev/null +++ b/containers/Dockerfile @@ -0,0 +1,93 @@ +FROM cern/alma9-base:latest + +LABEL author="Tim Voigtländer " + +# ------------------------------------------------------------------------------ +# Environment +# ------------------------------------------------------------------------------ +ENV CONDA_DIR=/opt/conda + +# ------------------------------------------------------------------------------ +# Prepare filesystem +# ------------------------------------------------------------------------------ +RUN mkdir -p /srv + +# ------------------------------------------------------------------------------ +# System packages +# ------------------------------------------------------------------------------ +RUN dnf -y update && \ + dnf -y install epel-release && \ + dnf -y install \ + wget \ + python3 \ + python3-pip \ + tini \ + which \ + zlib-devel \ + gcc \ + glibc-devel \ + kernel-headers \ + pcre2-utf16 \ + libSM \ + strace \ + perf \ + openssh \ + git \ + crypto-policies-scripts && \ + dnf clean all + +# ------------------------------------------------------------------------------ +# Allow self-signed certificates (ROOT / grid compatibility) +# ------------------------------------------------------------------------------ +RUN update-crypto-policies --set DEFAULT:SHA1 + +# ------------------------------------------------------------------------------ +# Python tooling +# ------------------------------------------------------------------------------ +RUN python3 -m pip install --upgrade pip + +# ------------------------------------------------------------------------------ +# Install Miniforge (Conda) +# ------------------------------------------------------------------------------ +ARG MINIFORGE_NAME=Miniforge3 +ARG MINIFORGE_VERSION=23.11.0-0 +RUN wget -q https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ + bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ + rm -f /tmp/miniforge.sh + +# ------------------------------------------------------------------------------ +# Conda environment specification +# Fails for default value +# ------------------------------------------------------------------------------ +ARG ENV_NAME=env +ARG ENV_FILE_NAME=None +RUN if [ "$ENV_FILE_NAME" = "None" ]; then \ + echo "ERROR: You must pass --build-arg ENV_FILE_NAME="; \ + exit 1; \ + fi +COPY ${ENV_FILE_NAME} /srv/conda_env.yml + +# ------------------------------------------------------------------------------ +# Create conda environment (build-time only) +# ------------------------------------------------------------------------------ + +RUN (source ${CONDA_DIR}/bin/activate "" && conda env create -f /srv/conda_env.yml -n ${ENV_NAME}) + +# ------------------------------------------------------------------------------ +# Runtime Configuration +# ------------------------------------------------------------------------------ +# Create the entrypoint script inline +RUN mkdir -p /.singularity.d/env && \ + printf "#!/bin/bash\nsource /opt/conda/bin/activate ${ENV_NAME}\n" > /.singularity.d/env/99-conda.sh && \ + chmod +x /.singularity.d/env/99-conda.sh + +# Create the Docker Entrypoint +RUN printf "#!/bin/bash\nsource /opt/conda/bin/activate ${ENV_NAME}\nexec \"\$@\"\n" > /usr/local/bin/entrypoint.sh && \ + chmod +x /usr/local/bin/entrypoint.sh + +# Make the Conda environment binaries globally accessible for github +ENV CONDA_PREFIX=${CONDA_DIR}/envs/${ENV_NAME} +ENV PATH="${CONDA_DIR}/envs/${ENV_NAME}/bin:$PATH" + +ENTRYPOINT ["tini", "--", "/usr/local/bin/entrypoint.sh"] +CMD ["/bin/bash"] diff --git a/containers/KingMakerMinimal_env.yml b/containers/KingMakerMinimal_env.yml new file mode 100644 index 00000000..29c2207b --- /dev/null +++ b/containers/KingMakerMinimal_env.yml @@ -0,0 +1,18 @@ +channels: + - conda-forge +dependencies: + - gfal2-util + - git + - kernel-headers_linux-64 + - python=3.12 + - rich + - setuptools<81 + - vim + - apptainer + - pip + # formatting + - black + - pip: + - luigi + - questionary + - uproot diff --git a/containers/KingMakerStandaloneMinimal_env.yml b/containers/KingMakerStandaloneMinimal_env.yml new file mode 100644 index 00000000..fc1357e1 --- /dev/null +++ b/containers/KingMakerStandaloneMinimal_env.yml @@ -0,0 +1,37 @@ +channels: + - conda-forge +dependencies: + - boost-cpp + - ccache + - cmake + - make + - correctionlib + - gcc + - gfal2-util + - git + - kernel-headers_linux-64 + - onnxruntime-cpp + - openmpi-mpicxx + - python=3.12 + - rich + - root=6.36 + - setuptools<81 + - vim + - xrootd + - pip + - openssh + # documentation + - doxygen + # formatting + - clang-format + - black + - pip: + - luigi + - questionary + - uproot + # documentation + - breathe + - sphinx + - sphinx_rtd_theme + # formatting + - cmakelang diff --git a/containers/KingMakerStandalone_env.yml b/containers/KingMakerStandalone_env.yml new file mode 100644 index 00000000..ea3e141c --- /dev/null +++ b/containers/KingMakerStandalone_env.yml @@ -0,0 +1,405 @@ +channels: + - conda-forge +dependencies: + - _openmp_mutex=4.5 + - _python_abi3_support=1.0 + - _x86_64-microarch-level=2 + - adwaita-icon-theme=49.0 + - annotated-types=0.7.0 + - anyio=4.12.1 + - argon2-cffi=25.1.0 + - argon2-cffi-bindings=25.1.0 + - arrow=1.4.0 + - asttokens=3.0.1 + - async-lru=2.2.0 + - at-spi2-atk=2.38.0 + - at-spi2-core=2.40.3 + - atk-1.0=2.38.0 + - attr=2.5.2 + - attrs=25.4.0 + - babel=2.18.0 + - backports.zstd=1.3.0 + - beautifulsoup4=4.14.3 + - binutils=2.45.1 + - binutils_impl_linux-64=2.45.1 + - binutils_linux-64=2.45.1 + - black=26.1.0 + - bleach=6.3.0 + - bleach-with-css=6.3.0 + - boost-cpp=1.85.0 + - brotli-python=1.2.0 + - bzip2=1.0.8 + - c-ares=1.34.6 + - c-compiler=1.11.0 + - ca-certificates=2026.1.4 + - cached-property=1.5.2 + - cached_property=1.5.2 + - cairo=1.18.4 + - ccache=4.12.3 + - certifi=2026.1.4 + - cffi=2.0.0 + - cfitsio=4.6.3 + - cgsi-gsoap=1.3.12 + - charset-normalizer=3.4.4 + - clang-format=21.1.8 + - clang-format-21=21.1.8 + - click=8.3.1 + - cmake=4.2.3 + - comm=0.2.3 + - compilers=1.11.0 + - conda-gcc-specs=14.3.0 + - correctionlib=2.7.0 + - cpython=3.12.12 + - cxx-compiler=1.11.0 + - cyrus-sasl=2.1.28 + - davix=0.8.10 + - dbus=1.16.2 + - dcap=2.47.14 + - debugpy=1.8.20 + - decorator=5.2.1 + - defusedxml=0.7.1 + - doxygen=1.13.2 + - epoxy=1.5.10 + - exceptiongroup=1.3.1 + - executing=2.2.1 + - expat=2.7.4 + - fastjet-cxx=3.5.1 + - fftw=3.3.10 + - font-ttf-dejavu-sans-mono=2.37 + - font-ttf-inconsolata=3.000 + - font-ttf-source-code-pro=2.038 + - font-ttf-ubuntu=0.83 + - fontconfig=2.17.1 + - fonts-conda-ecosystem=1 + - fonts-conda-forge=1 + - fortran-compiler=1.11.0 + - fqdn=1.5.1 + - freetype=2.14.1 + - fribidi=1.0.16 + - ftgl=2.4.0 + - gcc=14.3.0 + - gcc_impl_linux-64=14.3.0 + - gcc_linux-64=14.3.0 + - gct=6.2.1705709074 + - gdk-pixbuf=2.44.5 + - gettext=0.25.1 + - gettext-tools=0.25.1 + - gfal2=2.23.5 + - gfal2-util=1.9.0 + - gfortran=14.3.0 + - gfortran_impl_linux-64=14.3.0 + - gfortran_linux-64=14.3.0 + - giflib=5.2.2 + - git=2.53.0 + - gl2ps=1.4.2 + - glew=2.3.0 + - glib-tools=2.86.4 + - graphite2=1.3.14 + - graphviz=14.1.2 + - gsl=2.7 + - gsoap=2.8.123 + - gtest=1.17.0 + - gtk3=3.24.43 + - gts=0.7.6 + - gxx=14.3.0 + - gxx_impl_linux-64=14.3.0 + - gxx_linux-64=14.3.0 + - h11=0.16.0 + - h2=4.3.0 + - harfbuzz=12.2.0 + - hicolor-icon-theme=0.17 + - hpack=4.1.0 + - httpcore=1.0.9 + - httpx=0.28.1 + - hyperframe=6.1.0 + - icu=75.1 + - idna=3.11 + - importlib-metadata=8.7.0 + - importlib_resources=6.5.2 + - ipykernel=7.2.0 + - ipyparallel=9.0.2 + - ipython=9.10.0 + - ipython_pygments_lexers=1.1.1 + - isoduration=20.11.0 + - jedi=0.19.2 + - jinja2=3.1.6 + - json-c=0.18 + - json5=0.13.0 + - jsonpointer=3.0.0 + - jsonschema=4.26.0 + - jsonschema-specifications=2025.9.1 + - jsonschema-with-format-nongpl=4.26.0 + - jupyter-lsp=2.3.0 + - jupyter_client=8.8.0 + - jupyter_core=5.9.1 + - jupyter_events=0.12.0 + - jupyter_server=2.17.0 + - jupyter_server_terminals=0.5.4 + - jupyterlab=4.5.5 + - jupyterlab_pygments=0.3.0 + - jupyterlab_server=2.28.0 + - kernel-headers_linux-64=3.10.0 + - keyutils=1.6.3 + - krb5=1.21.3 + - lark=1.3.1 + - ld_impl_linux-64=2.45.1 + - lerc=4.0.0 + - libasprintf=0.25.1 + - libasprintf-devel=0.25.1 + - libblas=3.11.0 + - libboost=1.85.0 + - libboost-devel=1.85.0 + - libboost-headers=1.85.0 + - libboost-python=1.86.0 + - libcap=2.77 + - libcblas=3.11.0 + - libcbor=0.10.2 + - libclang-cpp21.1=21.1.8 + - libcups=2.3.3 + - libcurl=8.18.0 + - libdeflate=1.25 + - libdrm=2.4.125 + - libedit=3.1.20250104 + - libegl=1.7.0 + - libegl-devel=1.7.0 + - libev=4.33 + - libevent=2.1.12 + - libexpat=2.7.4 + - libfabric=2.4.0 + - libfabric1=2.4.0 + - libffi=3.5.2 + - libfido2=1.16.0 + - libfreetype=2.14.1 + - libfreetype6=2.14.1 + - libgcc=15.2.0 + - libgcc-devel_linux-64=14.3.0 + - libgcc-ng=15.2.0 + - libgd=2.3.3 + - libgettextpo=0.25.1 + - libgettextpo-devel=0.25.1 + - libgfortran=15.2.0 + - libgfortran5=15.2.0 + - libgl=1.7.0 + - libgl-devel=1.7.0 + - libglib=2.86.4 + - libglu=9.0.3 + - libglvnd=1.7.0 + - libglx=1.7.0 + - libglx-devel=1.7.0 + - libgomp=15.2.0 + - libhiredis=1.3.0 + - libhwloc=2.12.2 + - libiconv=1.18 + - libjpeg-turbo=3.1.2 + - liblapack=3.11.0 + - libllvm18=18.1.8 + - libllvm21=21.1.8 + - libltdl=2.4.3a + - liblzma=5.8.2 + - liblzma-devel=5.8.2 + - libnghttp2=1.67.0 + - libnl=3.11.0 + - libnsl=2.0.1 + - libntlm=1.8 + - libopenblas=0.3.30 + - libopengl=1.7.0 + - libpciaccess=0.18 + - libpmix=5.0.8 + - libpng=1.6.55 + - librsvg=2.60.2 + - libsanitizer=14.3.0 + - libsodium=1.0.20 + - libsqlite=3.51.2 + - libssh2=1.11.1 + - libstdcxx=15.2.0 + - libstdcxx-devel_linux-64=14.3.0 + - libstdcxx-ng=15.2.0 + - libsystemd0=259.1 + - libtiff=4.7.1 + - libtool=2.5.4 + - libudev1=259.1 + - libuuid=2.41.3 + - libuv=1.51.0 + - libwebp-base=1.6.0 + - libxcb=1.17.0 + - libxcrypt=4.4.36 + - libxkbcommon=1.13.1 + - libxml2=2.15.1 + - libxml2-16=2.15.1 + - libxml2-devel=2.15.1 + - libzlib=1.3.1 + - llvm-openmp=21.1.8 + - llvmlite=0.46.0 + - lz4-c=1.10.0 + - make=4.4.1 + - markdown-it-py=4.0.0 + - markupsafe=3.0.3 + - matplotlib-inline=0.2.1 + - mdurl=0.1.2 + - metakernel=0.30.4 + - mistune=3.2.0 + - mpi=1.0.1 + - mypy_extensions=1.1.0 + - nbclient=0.10.4 + - nbconvert-core=7.17.0 + - nbformat=5.10.4 + - ncurses=6.5 + - nest-asyncio=1.6.0 + - nlohmann_json=3.12.0 + - notebook=7.5.4 + - notebook-shim=0.2.4 + - numba=0.63.1 + - numpy=2.3.5 + - onnxruntime-cpp=1.22.2 + - openldap=2.6.10 + - openmpi=5.0.8 + - openmpi-mpicxx=5.0.8 + - openssh=10.2p1 + - openssl=3.6.1 + - overrides=7.7.0 + - packaging=26.0 + - pandocfilters=1.5.0 + - pango=1.56.4 + - parso=0.8.6 + - pathspec=1.0.4 + - pcre=8.45 + - pcre2=10.47 + - perl=5.32.1 + - pexpect=4.9.0 + - pip=26.0.1 + - pixman=0.46.4 + - platformdirs=4.9.2 + - portalocker=3.2.0 + - prometheus_client=0.24.1 + - prompt-toolkit=3.0.52 + - psutil=7.2.2 + - pthread-stubs=0.4 + - ptyprocess=0.7.0 + - pugixml=1.15 + - pure_eval=0.2.3 + - pycparser=2.22 + - pydantic=2.12.5 + - pydantic-core=2.41.5 + - pygments=2.19.2 + - pysocks=1.7.1 + - pythia8=8.312 + - python=3.12.12 + - python-dateutil=2.9.0.post0 + - python-fastjsonschema=2.21.2 + - python-gfal2=1.13.0 + - python-gil=3.12.12 + - python-json-logger=2.0.7 + - python-tzdata=2025.3 + - python_abi=3.12 + - pytokens=0.4.1 + - pytz=2025.2 + - pyyaml=6.0.3 + - pyzmq=27.1.0 + - rdma-core=61.0 + - readline=8.3 + - referencing=0.37.0 + - requests=2.32.5 + - rfc3339-validator=0.1.4 + - rfc3986-validator=0.1.1 + - rfc3987-syntax=1.1.0 + - rhash=1.4.6 + - rich=14.3.3 + - root=6.36.08 + - root_base=6.36.08 + - root_cxx_standard=20 + - rpds-py=0.30.0 + - scitokens-cpp=1.4.0 + - send2trash=2.1.0 + - setuptools=80.10.2 + - siscone=3.1.2 + - six=1.17.0 + - sniffio=1.3.1 + - soupsieve=2.8.3 + - srm-ifce=1.24.6 + - stack_data=0.6.3 + - sysroot_linux-64=2.17 + - tbb=2022.3.0 + - terminado=0.18.1 + - tinycss2=1.4.0 + - tk=8.6.13 + - tomli=2.4.0 + - tornado=6.5.4 + - tqdm=4.67.3 + - traitlets=5.14.3 + - typing-extensions=4.15.0 + - typing-inspection=0.4.2 + - typing_extensions=4.15.0 + - typing_utils=0.1.0 + - tzdata=2025c + - ucc=1.6.0 + - ucx=1.20.0 + - uri-template=1.3.0 + - urllib3=2.6.3 + - vdt=0.4.4 + - vector-classes=1.4.5 + - vim=9.2.0045 + - voms=2.1.0rc3 + - wayland=1.24.0 + - wcwidth=0.6.0 + - webcolors=25.10.0 + - webencodings=0.5.1 + - websocket-client=1.9.0 + - wheel=0.46.3 + - xkeyboard-config=2.46 + - xorg-libice=1.1.2 + - xorg-libsm=1.2.6 + - xorg-libx11=1.8.13 + - xorg-libxau=1.0.12 + - xorg-libxcomposite=0.4.7 + - xorg-libxcursor=1.2.3 + - xorg-libxdamage=1.1.6 + - xorg-libxdmcp=1.1.5 + - xorg-libxext=1.3.7 + - xorg-libxfixes=6.0.2 + - xorg-libxft=2.3.9 + - xorg-libxi=1.8.2 + - xorg-libxinerama=1.1.6 + - xorg-libxpm=3.5.18 + - xorg-libxrandr=1.5.5 + - xorg-libxrender=0.9.12 + - xorg-libxt=1.3.1 + - xorg-libxtst=1.2.5 + - xorg-libxxf86vm=1.1.7 + - xorg-xorgproto=2025.1 + - xrootd=5.9.1 + - xz=5.8.2 + - xz-gpl-tools=5.8.2 + - xz-tools=5.8.2 + - yaml=0.2.5 + - zeromq=4.3.5 + - zipp=3.23.0 + - zlib=1.3.1 + - zstd=1.5.7 + - pip: + - alabaster==1.0.0 + - awkward==2.9.0 + - awkward-cpp==52 + - breathe==4.36.0 + - cmakelang==0.6.13 + - cramjam==2.11.0 + - docutils==0.22.4 + - fsspec==2026.2.0 + - imagesize==1.4.1 + - lockfile==0.12.2 + - luigi==3.7.3 + - python-daemon==3.1.2 + - questionary==2.1.1 + - roman-numerals==4.1.0 + - snowballstemmer==3.0.1 + - sphinx==9.1.0 + - sphinx-rtd-theme==3.1.0 + - sphinxcontrib-applehelp==2.0.0 + - sphinxcontrib-devhelp==2.0.0 + - sphinxcontrib-htmlhelp==2.1.0 + - sphinxcontrib-jquery==4.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==2.0.0 + - sphinxcontrib-serializinghtml==2.0.0 + - tenacity==8.5.0 + - uproot==5.7.1 + - xxhash==3.6.0 diff --git a/containers/KingMaker_env.yml b/containers/KingMaker_env.yml new file mode 100644 index 00000000..c5d7c188 --- /dev/null +++ b/containers/KingMaker_env.yml @@ -0,0 +1,132 @@ +channels: + - conda-forge +dependencies: + - _openmp_mutex=4.5 + - apptainer=1.4.5 + - black=26.1.0 + - bzip2=1.0.8 + - c-ares=1.34.6 + - ca-certificates=2026.2.25 + - cgsi-gsoap=1.3.12 + - click=8.3.1 + - cni=1.0.1 + - cni-plugins=1.3.0 + - cyrus-sasl=2.1.28 + - davix=0.8.10 + - dcap=2.47.14 + - expat=2.7.4 + - fuse-overlayfs=1.16 + - gct=6.2.1705709074 + - gfal2=2.23.5 + - gfal2-util=1.9.0 + - git=2.53.0 + - gocryptfs=2.6.1 + - gsoap=2.8.123 + - gtest=1.17.0 + - icu=78.2 + - jq=1.8.1 + - json-c=0.18 + - kernel-headers_linux-64=6.12.0 + - keyutils=1.6.3 + - krb5=1.22.2 + - ld_impl_linux-64=2.45.1 + - libarchive=3.8.5 + - libblas=3.11.0 + - libboost=1.88.0 + - libboost-python=1.88.0 + - libcblas=3.11.0 + - libcurl=8.18.0 + - libedit=3.1.20250104 + - libev=4.33 + - libexpat=2.7.4 + - libffi=3.5.2 + - libfuse3=3.18.1 + - libgcc=15.2.0 + - libgcc-ng=15.2.0 + - libgfortran=15.2.0 + - libgfortran5=15.2.0 + - libglib=2.86.4 + - libgomp=15.2.0 + - libiconv=1.18 + - liblapack=3.11.0 + - libltdl=2.4.3a + - liblzma=5.8.2 + - libnghttp2=1.67.0 + - libnsl=2.0.1 + - libntlm=1.8 + - libopenblas=0.3.30 + - libseccomp=2.6.0 + - libsqlite=3.51.2 + - libssh2=1.11.1 + - libstdcxx=15.2.0 + - libstdcxx-ng=15.2.0 + - libtool=2.5.4 + - libuuid=2.41.3 + - libxcb=1.17.0 + - libxcrypt=4.4.36 + - libxml2=2.15.1 + - libxml2-16=2.15.1 + - libxml2-devel=2.15.1 + - libzlib=1.3.1 + - lz4-c=1.10.0 + - lzo=2.10 + - markdown-it-py=4.0.0 + - mdurl=0.1.2 + - mypy_extensions=1.1.0 + - ncurses=6.5 + - numpy=2.4.2 + - oniguruma=6.9.10 + - openldap=2.6.10 + - openssl=3.6.1 + - packaging=26.0 + - pathspec=1.0.4 + - pcre2=10.47 + - perl=5.32.1 + - pip=26.0.1 + - platformdirs=4.9.2 + - pthread-stubs=0.4 + - pugixml=1.15 + - pygments=2.19.2 + - python=3.12.12 + - python-gfal2=1.13.1 + - python_abi=3.12 + - pytokens=0.4.1 + - readline=8.3 + - rich=14.3.3 + - scitokens-cpp=1.4.0 + - setuptools=80.10.2 + - squashfs-tools=4.7.5 + - squashfuse=0.6.1 + - srm-ifce=1.24.6 + - tk=8.6.13 + - typing_extensions=4.15.0 + - tzdata=2025c + - vim=9.2.0045 + - voms=2.1.0rc3 + - wheel=0.46.3 + - xorg-libice=1.1.2 + - xorg-libsm=1.2.6 + - xorg-libx11=1.8.13 + - xorg-libxau=1.0.12 + - xorg-libxdmcp=1.1.5 + - xorg-libxt=1.3.1 + - xorg-xorgproto=2025.1 + - xrootd=5.9.1 + - zlib=1.3.1 + - zstd=1.5.7 + - pip: + - awkward==2.9.0 + - awkward-cpp==52 + - cramjam==2.11.0 + - fsspec==2026.2.0 + - lockfile==0.12.2 + - luigi==3.7.3 + - prompt-toolkit==3.0.52 + - python-daemon==3.1.2 + - python-dateutil==2.9.0.post0 + - questionary==2.1.1 + - tenacity==8.5.0 + - tornado==6.5.4 + - uproot==5.7.1 + - wcwidth==0.6.0 + - xxhash==3.6.0 diff --git a/containers/README.md b/containers/README.md new file mode 100644 index 00000000..95e15d55 --- /dev/null +++ b/containers/README.md @@ -0,0 +1,39 @@ +# KingMaker Containers +[← Back to KingMaker Main Project](../README.md) + +This directory contains the Dockerfile and Conda environment YAML files used to build container images for running KingMaker and CROWN workflows. + +Contents + +- `Dockerfile`: primary Dockerfile for the default image, based on Redhat9. +- `*_env.yml`: Conda environment specs. + +Docker Build + +This repository includes a number of Conda env files that can be utilized via build arguments. + +To build a single image: + +```bash +cd containers +docker build --build-arg ENV_FILE_NAME= -t . +``` + +The build will fail if no `ENV_FILE_NAME` build argument is provided. + +Example for build, tag and push: + +```bash +docker build --build-arg ENV_FILE_NAME=KingMaker_env.yml -t testing_abc +docker tag testing_abc kingmakerimages/kingmaker_standalone:V0.1 +docker push kingmakerimages/kingmaker_standalone:V0.1 +``` + +Usage with KingMaker + +Container images built this way can be utilized for both local sandboxing (`sandbox`) and for use in the batch system (`htcondor_container_image`). +Both can be set in the `*_luigi.cfg` files in the `lawluigi_configs` directory. + +KingMaker relies on apptainer-style addresses (i.e. ``kingmakerimages/kingmaker_standalone:V0.1``) +The built container can also be added to [CERN CVMFS unpacked](https://gitlab.cern.ch/unpacked/sync/) once it is considered stable. +The default container (``/cvmfs/unpacked.cern.ch/registry.hub.docker.com/kingmakerimages/kingmaker_standalone:V1/``) is one such example. diff --git a/kingmaker-images b/kingmaker-images deleted file mode 160000 index 38f889de..00000000 --- a/kingmaker-images +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 38f889de9c98da042c3e2c4e636fbb2d9308fd7b diff --git a/lawluigi_configs/GPU_example_luigi.cfg b/lawluigi_configs/GPU_example_luigi.cfg index 5e75eb50..d567e4ca 100644 --- a/lawluigi_configs/GPU_example_luigi.cfg +++ b/lawluigi_configs/GPU_example_luigi.cfg @@ -21,9 +21,9 @@ htcondor_remote_job = True htcondor_request_cpus = 1 htcondor_request_gpus = 1 ; for all cores in total -htcondor_universe = docker +htcondor_universe = container ;image without GPU libraries -# htcondor_docker_image = mschnepf/slc7-condocker:latest +# htcondor_container_image = docker://mschnepf/slc7-condocker:latest ; create log files in htcondor jobs transfer_logs = True ; set local scheduler diff --git a/lawluigi_configs/KingMaker_law.cfg b/lawluigi_configs/KingMaker_law.cfg index 786fe8a3..16bb9480 100644 --- a/lawluigi_configs/KingMaker_law.cfg +++ b/lawluigi_configs/KingMaker_law.cfg @@ -38,3 +38,6 @@ base: root://cmsdcache-kit-disk.gridka.de//store/user/${USER}/CROWN/ntuples/ use_cache: True cache_root: /tmp/${USER}/ cache_max_size: 20000 + +[singularity_sandbox] +forward_law = False diff --git a/lawluigi_configs/KingMaker_luigi.cfg b/lawluigi_configs/KingMaker_luigi.cfg index 38edaac1..cf185470 100644 --- a/lawluigi_configs/KingMaker_luigi.cfg +++ b/lawluigi_configs/KingMaker_luigi.cfg @@ -32,16 +32,12 @@ htcondor_remote_job = True htcondor_request_cpus = 4 ; htcondor_request_gpus = 1 ; for all cores in total -htcondor_universe = docker +htcondor_universe = container ; create log files in htcondor jobs transfer_logs = True ; set local scheduler -############################################################################## -#### CENTRAL SCHEDULER IS UNAVAILABLE FOR LXPLUS. USE THE LOCAL SCHEDULER #### local_scheduler = False scheduler_port = ${LUIGIPORT} -; local_scheduler = True -############################################################################## ; set tolerance for workflow success with failed branches tolerance = 0.00 acceptance = 1.00 @@ -56,6 +52,9 @@ files_per_task = 10 scopes = mt,et shifts = None +htcondor_container_image = /cvmfs/unpacked.cern.ch/registry.hub.docker.com/kingmakerimages/kingmaker_standalone:V1 +sandbox = singularity::/cvmfs/unpacked.cern.ch/registry.hub.docker.com/kingmakerimages/kingmaker_standalone:V1 + ; Only set this parameter to True if the tarball with the law tasks needs to ; be repacked. This might be needed if task code significantly changed between ; different runs of the workflow, and the tarball is already present in the cache. In this case, setting this parameter to True will force the repacking of the tarball, even if it is already present in the cache. diff --git a/lawluigi_configs/KingMaker_lxplus_law.cfg b/lawluigi_configs/KingMaker_lxplus_law.cfg new file mode 100644 index 00000000..75b1e886 --- /dev/null +++ b/lawluigi_configs/KingMaker_lxplus_law.cfg @@ -0,0 +1,44 @@ +[modules] +BuildCROWNLib +ConfigureDatasets +CROWNBase +CROWNBuild +CROWNBuildFriend +CROWNBuildMultiFriend +CROWNFriends +CROWNMultiFriends +CROWNRun +FriendQuantitiesMap +ProduceFriends +ProduceMultiFriends +ProduceSamples +QuantitiesMap + +# [logging] +# law: DEBUG +# luigi-interface: DEBUG + +[luigi_worker] +keep_alive: True +ping_interval: 20 +wait_interval: 20 +max_reschedules: 3 + +[target] +default_wlcg_fs = wlcg_fs + +[wlcg_fs] +# Remote storage location, also change this in the luigi config file +# NRG storage +# base: root://cmsdcache-kit-disk.gridka.de//store/user/${USER}/CROWN/ntuples/ +# EOS storage +base: root://eosuser.cern.ch//eos/user/${USER_FIRST_LETTER}/${USER}/CROWN/ntuples/ +use_cache: True +cache_root: /tmp/${USER}/ +cache_max_size: 20000 + +[singularity_sandbox] +forward_law = False + +[core] +local-scheduler = True diff --git a/lawluigi_configs/KingMaker_lxplus_luigi.cfg b/lawluigi_configs/KingMaker_lxplus_luigi.cfg new file mode 100644 index 00000000..e8efe6c8 --- /dev/null +++ b/lawluigi_configs/KingMaker_lxplus_luigi.cfg @@ -0,0 +1,113 @@ +[core] +no_lock = True +log_level = WARNING + +[worker] +keep_alive = False +ping_interval = 20 +wait_interval = 20 +max_reschedules = 10 + +[DEFAULT] +name = KingMaker +ENV_NAME = KingMaker + +; storing the output locally (local) or on grid (wlcg) +; when using local, make sure to ajust the htcondor requirements so all local paths are accessible +; for ETP, that is TARGET.ProvidesEtpResources +is_local_output = False + +; if the local path is set, the output will be copied to the local path after the job is finished +local_output_path = /ceph/${USER}/CROWN/ntuples/ +; grid storage protocol and path usable from submitting machine and worker nodes of cluster +; job in- and output will be stored in $wlcg_path under subdirectory of analysis $name +; Remote storage location, also change this in the law config file +; NRG storage +# wlcg_path = root://cmsdcache-kit-disk.gridka.de//store/user/${USER}/CROWN/ntuples/ +; EOS storage +wlcg_path = root://eosuser.cern.ch//eos/user/${USER_FIRST_LETTER}/${USER}/CROWN/ntuples/ +; default htcondor job submission configuration (modifiable for each task) +htcondor_accounting_group = cms.higgs +htcondor_remote_job = True +htcondor_request_cpus = 4 +; htcondor_request_gpus = 1 +; for all cores in total +htcondor_universe = container +; create log files in htcondor jobs +transfer_logs = True +; set local scheduler +local_scheduler = True +; set tolerance for workflow success with failed branches +tolerance = 0.00 +acceptance = 1.00 +; submit only missing htcondor workflow branches (should always be true) +only_missing = True + +; bootstrap file to be sourced at beginning of htcondor jobs (relative PATH to framework.py) +bootstrap_file = setup_law_remote.sh +files_per_task = 10 +; scopes and shifts are to be provided in the config, or as command line arguments via --scope and --shift +; in both cases, the values are expected to be comma-separated lists without spaces or quotes +scopes = mt,et +shifts = None + +htcondor_container_image = /cvmfs/unpacked.cern.ch/registry.hub.docker.com/kingmakerimages/kingmaker_standalone:V1 +sandbox = singularity::/cvmfs/unpacked.cern.ch/registry.hub.docker.com/kingmakerimages/kingmaker_standalone:V1 + +################################################### NOTE ##################################################### +# Parameters of tasks that were not explicitly called in the cli will be set through the 'requires' functions. # +# Only parameters that are listed in 'exclude_params_req' are excluded from this. # +# Parameters listed in 'prefer_params_cli' will prioritise the parameters set in the command line # +# over the ones set in the 'requires' functions or the config files. # +################################################################################################################ + +[CROWNBuild] + +[CROWNBuildCombined] + +[CROWNBuildFriend] + +[CROWNBuildMultiFriend] + +[BuildCROWNLib] + +[CROWNRun] +; HTCondor +htcondor_walltime = 10800 +htcondor_request_memory = 16000 +htcondor_request_disk = 20000000 +; for these eras, only one file per task is processed +problematic_eras = ["2018B", "2017C", "2016B-ver2"] + +[CROWNFriends] +; HTCondor +htcondor_walltime = 10800 +htcondor_request_memory = 16000 +htcondor_request_disk = 20000000 +; friends have to be run in single core mode to ensure a correct order of the tree entries +htcondor_request_cpus = 1 + +[CROWNMultiFriends] +; HTCondor +htcondor_walltime = 10800 +htcondor_request_memory = 16000 +htcondor_request_disk = 20000000 +; friends have to be run in single core mode to ensure a correct order of the tree entries +htcondor_request_cpus = 1 + +[ProduceFriends] + +[ProduceMultiFriends] +; the mapping is "config_name": "friend_name" +friend_mapping = {} +; friend_mapping = { +; "unittest_friends": "id_iso_weights", +; "unittest_friends_2": "svfit"} + +[ProduceSamples] + +[ConfigureDatasets] +silent = True +; set to False to print out the datasets + +[QuantitiesMap] diff --git a/processor/framework.py b/processor/framework.py index d320661a..830a739e 100644 --- a/processor/framework.py +++ b/processor/framework.py @@ -20,8 +20,7 @@ except: pass -law.contrib.load("wlcg") -law.contrib.load("htcondor") +law.contrib.load("wlcg", "htcondor", "singularity") # try to get the terminal width, if this fails, we are probably in a remote job, set it to 140 try: current_width = os.get_terminal_size().columns @@ -309,9 +308,8 @@ class HTCondorWorkflow(Task, law.htcondor.HTCondorWorkflow): description="Universe to be set in HTCondor job submission.", significant=False, ) - htcondor_docker_image = luigi.Parameter( - description="Docker image to be used in HTCondor job submission.", - default="Automatic", + htcondor_container_image = luigi.Parameter( + description="Container image to be used in HTCondor job submission.", ) bootstrap_file = luigi.Parameter( description="Bootstrap script to be used in HTCondor job to set up law.", @@ -323,8 +321,8 @@ class HTCondorWorkflow(Task, law.htcondor.HTCondorWorkflow): significant=False, ) remote_source_script = luigi.Parameter( - description="Script to source environment in remote jobs. Leave empty if not needed. Defaults to use with docker images", - default="source /opt/conda/bin/activate env", + description="Script to source environment in remote jobs. Leave empty if not needed. Defaults to use with container images", + default="", significant=False, ) force_repack_tarball = luigi.BoolParameter( @@ -347,7 +345,7 @@ class HTCondorWorkflow(Task, law.htcondor.HTCondorWorkflow): "htcondor_request_memory", "htcondor_request_disk", "htcondor_universe", - "htcondor_docker_image", + "htcondor_container_image", "additional_files", "force_repack_tarball", "workflow", @@ -358,71 +356,6 @@ class HTCondorWorkflow(Task, law.htcondor.HTCondorWorkflow): | exclude_set ) - def get_submission_os(self): - # function to check, if running on centos7, rhel9 or Ubuntu22 - # Other OS are not permitted - # based on this, the correct docker image is chosen, overwriting the htcondor_docker_image parameter - # check if lsb_release is installed, if not, use the information from /etc/os-release - # Please note that this selection can be somewhat unstable. Modify if neccessary. - try: - distro = ( - subprocess.check_output( - "lsb_release -i | cut -f2", stderr=subprocess.STDOUT - ) - .decode() - .replace("Linux", "") - .replace("linux", "") - .replace(" ", "") - .strip() - ) - os_version = ( - subprocess.check_output( - "lsb_release -r | cut -f2", stderr=subprocess.STDOUT - ) - .decode() - .strip() - ) - except (subprocess.CalledProcessError, FileNotFoundError, PermissionError): - distro = ( - subprocess.check_output( - "cat /etc/os-release | grep '^NAME=' | cut -f2 -d='' | tr -d '\"'", - shell=True, - ) - .decode() - .replace("Linux", "") - .replace("linux", "") - .replace(" ", "") - .strip() - ) - os_version = ( - subprocess.check_output( - "cat /etc/os-release | grep '^VERSION_ID=' | cut -f2 -d='' | tr -d '\"'", - shell=True, - ) - .decode() - .strip() - ) - - image_name = None - - if distro == "CentOS": - if os_version[0] == "7": - image_name = "centos7" - elif distro in ("RedHatEnterprise", "Alma"): - if os_version[0] == "9": - image_name = "rhel9" - elif distro == "Ubuntu": - if os_version[0:2] == "22": - image_name = "ubuntu2204" - else: - raise Exception( - f"Unknown OS {distro} {os_version}, KingMaker will not run without changes" - ) - image_hash = os.getenv("IMAGE_HASH") - image = f"ghcr.io/kit-cms/kingmaker-images-{image_name}-{str(self.ENV_NAME).lower()}:main_{image_hash}" - # print(f"Running on {distro} {os_version}, using image {image}") - return image - def htcondor_output_directory(self): return law.LocalDirectoryTarget(self.local_path("job_files")) @@ -453,7 +386,7 @@ def htcondor_job_config(self, config, job_num, branches): print("Unknown domain, default to CERN lxplus settings.") domain = "CERN" - analysis_name = os.getenv("ANA_NAME") + workflow_name = os.getenv("WF_NAME") task_name = self.__class__.__name__ # Write job config file @@ -467,10 +400,7 @@ def htcondor_job_config(self, config, job_num, branches): if self.htcondor_requirements: config.custom_content.append(("Requirements", self.htcondor_requirements)) config.custom_content.append(("universe", self.htcondor_universe)) - if self.htcondor_docker_image != "Automatic": - config.custom_content.append(("docker_image", self.htcondor_docker_image)) - else: - config.custom_content.append(("docker_image", self.get_submission_os())) + config.custom_content.append(("container_image", self.htcondor_container_image)) if domain == "ETP": config.custom_content.append( ("accounting_group", self.htcondor_accounting_group) @@ -541,8 +471,8 @@ def htcondor_job_config(self, config, job_num, branches): "-czf", tarball_local.path, "processor", - f"lawluigi_configs/{analysis_name}_luigi.cfg", - f"lawluigi_configs/{analysis_name}_law.cfg", + f"lawluigi_configs/{workflow_name}_luigi.cfg", + f"lawluigi_configs/{workflow_name}_law.cfg", "law", ] + list(self.additional_files) code, out, error = interruptable_popen( @@ -565,7 +495,7 @@ def htcondor_job_config(self, config, job_num, branches): tarball.copy_from_local(src=tarball_local.path) console.rule("Framework tarball uploaded!") config.render_variables["USER"] = self.local_user - config.render_variables["ANA_NAME"] = os.getenv("ANA_NAME") + config.render_variables["WF_NAME"] = os.getenv("WF_NAME") config.render_variables["ENV_NAME"] = self.ENV_NAME config.render_variables["TAG"] = self.production_tag config.render_variables["NTHREADS"] = self.htcondor_request_cpus @@ -583,9 +513,35 @@ def htcondor_job_config(self, config, job_num, branches): ) config.render_variables["LOCAL_TIMESTAMP"] = startup_time config.render_variables["LOCAL_PWD"] = startup_dir - # only needed for $ANA_NAME=ML_train see setup.sh line 207 - if os.getenv("MODULE_PYTHONPATH"): - config.render_variables["MODULE_PYTHONPATH"] = os.getenv( - "MODULE_PYTHONPATH" - ) return config + + +# Helper function to generate sandbox_pre_setup_cmds functions +# Adds a list of env variables before the setup_sandbox.sh call +def sandbox_pre_setup_cmds_factory(*env_vars): + # Generate dynamic exports + cmds = [f"export {name}={os.getenv(name)}" for name in env_vars] + # Add the static source command + analysis_path = os.getenv("ANALYSIS_PATH") + cmds.append(f"source {analysis_path}/processor/setup_sandbox.sh") + return lambda x: cmds + + +class KingmakerSandbox(law.SandboxTask): + + # Needed to allow for sandbox deactivation via law.NO_STR Parameter + allow_empty_sandbox = True + sandbox = luigi.Parameter( + default=law.NO_STR, + description="path to a sandbox file to be used for the job. Default 'law.NO_STR' deactivates sandboxing.", + ) + # Mount certificate dir to enable voms proxy + singularity_args = lambda x: [ + "-B", + "/etc/grid-security/certificates", + ] + + # Default sandbox init + sandbox_pre_setup_cmds = sandbox_pre_setup_cmds_factory( + "X509_USER_PROXY", "LUIGIPORT", "WF_NAME" + ) diff --git a/processor/setup_law_remote.sh b/processor/setup_law_remote.sh index c84126a3..bf3965fb 100644 --- a/processor/setup_law_remote.sh +++ b/processor/setup_law_remote.sh @@ -32,7 +32,7 @@ action() { echo "------------------------------------------" echo " | USER = ${USER}" echo " | HOSTNAME = $(hostname)" - echo " | ANA_NAME = {{ANA_NAME}}" + echo " | WF_NAME = {{WF_NAME}}" echo " | ENV_NAME = {{ENV_NAME}}" echo " | TAG = {{TAG}}" echo " | XRD_WORKERTHREADS = ${XRD_WORKERTHREADS}" @@ -40,8 +40,10 @@ action() { echo " | XRD_LOGLEVEL = ${XRD_LOGLEVEL}" echo "------------------------------------------" - echo "Setting up environment via {{SOURCE_SCRIPT}}." - {{SOURCE_SCRIPT}} + if [[ ! -z "{{SOURCE_SCRIPT}}" ]]; then + echo "Setting up environment via {{SOURCE_SCRIPT}}." + {{SOURCE_SCRIPT}} + fi if [ "{{IS_LOCAL_OUTPUT}}" = "True" ] then @@ -65,8 +67,8 @@ action() { # setup law variables export LAW_HOME="${SPAWNPOINT}/.law" - export LAW_CONFIG_FILE="${SPAWNPOINT}/lawluigi_configs/{{ANA_NAME}}_law.cfg" - export LUIGI_CONFIG_PATH="${SPAWNPOINT}/lawluigi_configs/{{ANA_NAME}}_luigi.cfg" + export LAW_CONFIG_FILE="${SPAWNPOINT}/lawluigi_configs/{{WF_NAME}}_law.cfg" + export LUIGI_CONFIG_PATH="${SPAWNPOINT}/lawluigi_configs/{{WF_NAME}}_luigi.cfg" # Variables set by local LAW instance and used by batch job LAW instance export LOCAL_TIMESTAMP="{{LOCAL_TIMESTAMP}}" diff --git a/processor/setup_sandbox.sh b/processor/setup_sandbox.sh new file mode 100755 index 00000000..2e8393a4 --- /dev/null +++ b/processor/setup_sandbox.sh @@ -0,0 +1,50 @@ +############################################################################################ +# This script sets up all dependencies necessary for running KingMaker sandboxing # +############################################################################################ + +_addpy() { + [ ! -z "${1}" ] && export PYTHONPATH="${1}:${PYTHONPATH}" +} + +_addbin() { + [ ! -z "${1}" ] && export PATH="${1}:${PATH}" +} + +action() { + # Determine the directory of this file + if [ ! -z "${ZSH_VERSION}" ]; then + local THIS_FILE="${(%):-%x}" + else + local THIS_FILE="${BASH_SOURCE[0]}" + fi + export USER_FIRST_LETTER=${USER:0:1} + + BASE_DIR="$(dirname $( cd "$( dirname "${THIS_FILE}" )" && pwd ))" + + # Check for voms proxy + voms-proxy-info -exists &>/dev/null + if [[ "$?" -eq "1" ]]; then + echo "No valid voms proxy found, remote storage might be inaccessible." + echo "Please ensure that it exists and that 'X509_USER_PROXY' is properly set." + else + echo "Voms proxy found at ${X509_USER_PROXY}" + fi + + echo "Setting up Luigi/Law ..." + export LAW_HOME="${BASE_DIR}/.law/${WF_NAME}" + export LAW_CONFIG_FILE="${BASE_DIR}/lawluigi_configs/${WF_NAME}_law.cfg" + export LUIGI_CONFIG_PATH="${BASE_DIR}/lawluigi_configs/${WF_NAME}_luigi.cfg" + export ANALYSIS_PATH="${BASE_DIR}" + export ANALYSIS_DATA_PATH="${ANALYSIS_PATH}/data" + + # law + _addpy "${BASE_DIR}/law" + _addbin "${BASE_DIR}/law/bin" + + # tasks + _addpy "${BASE_DIR}/processor" + _addpy "${BASE_DIR}/processor/tasks" + echo "KingMaker setup was successful" + +} +action "$@" diff --git a/processor/tasks/BuildCROWNLib.py b/processor/tasks/BuildCROWNLib.py index 123a1ac5..2fa82b63 100644 --- a/processor/tasks/BuildCROWNLib.py +++ b/processor/tasks/BuildCROWNLib.py @@ -1,10 +1,9 @@ import luigi import os -from framework import Task -from framework import console +from framework import Task, console, KingmakerSandbox, sandbox_pre_setup_cmds_factory -class BuildCROWNLib(Task): +class BuildCROWNLib(KingmakerSandbox, Task): """ Compile the CROWN shared libary to be used for all executables with the given configuration """ @@ -15,6 +14,11 @@ class BuildCROWNLib(Task): friend_name = luigi.Parameter(default="ntuples") analysis = luigi.Parameter() + # Copy over X509_USER_PROXY, LUIGIPORT, and CCACHE_DIR env values and run sandbox setup + sandbox_pre_setup_cmds = sandbox_pre_setup_cmds_factory( + "X509_USER_PROXY", "LUIGIPORT", "CCACHE_DIR", "WF_NAME" + ) + def output(self): target = self.remote_target(f"{self.friend_name}/libCROWNLIB.so") return target diff --git a/processor/tasks/CROWNBase.py b/processor/tasks/CROWNBase.py index 4461160c..c907b3a9 100644 --- a/processor/tasks/CROWNBase.py +++ b/processor/tasks/CROWNBase.py @@ -3,8 +3,13 @@ import os import json import shutil -from framework import console -from framework import HTCondorWorkflow, Task +from framework import ( + console, + HTCondorWorkflow, + Task, + KingmakerSandbox, + sandbox_pre_setup_cmds_factory, +) from law.task.base import WrapperTask from rich.table import Table from helpers.helpers import convert_to_comma_seperated @@ -209,7 +214,7 @@ def modify_polling_status_line(self, status_line): return f"{status_line} - {law.util.colored(status_line_pattern, color='light_cyan')}" -class CROWNBuildBase(Task): +class CROWNBuildBase(KingmakerSandbox, Task): # configuration variables scopes = luigi.ListParameter() shifts = luigi.Parameter() @@ -228,6 +233,11 @@ class CROWNBuildBase(Task): # Needed to propagate thread count to build tasks htcondor_request_cpus = luigi.IntParameter(default=1) + # Copy over X509_USER_PROXY, LUIGIPORT, and CCACHE_DIR env values and run sandbox setup + sandbox_pre_setup_cmds = sandbox_pre_setup_cmds_factory( + "X509_USER_PROXY", "LUIGIPORT", "CCACHE_DIR", "WF_NAME" + ) + def get_tarball_hash(self): """ The function `get_tarball_hash` generates a SHA-256 hash based on concatenated and sorted lists of diff --git a/processor/tasks/CROWNFriends.py b/processor/tasks/CROWNFriends.py index b94a3efd..1822c4ea 100644 --- a/processor/tasks/CROWNFriends.py +++ b/processor/tasks/CROWNFriends.py @@ -139,8 +139,6 @@ def run(self): tar = tarfile.open(_tarballpath, "r:gz") tar.extractall(_workdir) os.remove(tempfile) - # set environment using env script - my_env = self.set_environment("{}/init.sh".format(_workdir)) _crown_args = [_outputfile] + [_inputfile] _executable = "./{}_{}_{}_{}".format( self.friend_config, sample_type, era, scope @@ -157,7 +155,6 @@ def run(self): stderr=subprocess.PIPE, bufsize=1, universal_newlines=True, - env=my_env, cwd=_workdir, ) as p: for line in p.stdout: @@ -204,9 +201,7 @@ def run(self): "--scope {}".format(scope), "--sample_type {}".format(self.branch_data["sample_type"]), "--output {}".format(local_outputfile), - ], - sourcescript=[ - "{}/init.sh".format(_workdir), + "--libdir {}".format(os.path.join(_workdir, "lib")), ], silent=True, ) diff --git a/processor/tasks/CROWNMultiFriends.py b/processor/tasks/CROWNMultiFriends.py index 608a932d..f9b0db2a 100644 --- a/processor/tasks/CROWNMultiFriends.py +++ b/processor/tasks/CROWNMultiFriends.py @@ -177,8 +177,6 @@ def run(self): tar = tarfile.open(_tarballpath, "r:gz") tar.extractall(_workdir) os.remove(tempfile) - # set environment using env script - my_env = self.set_environment("{}/init.sh".format(_workdir)) _crown_args = [_outputfile] + [_inputfile] + _friend_inputs _executable = "./{}_{}_{}_{}".format( self.friend_config, sample_type, era, scope @@ -195,7 +193,6 @@ def run(self): stderr=subprocess.PIPE, bufsize=1, universal_newlines=True, - env=my_env, cwd=_workdir, ) as p: for line in p.stdout: @@ -236,9 +233,7 @@ def run(self): "--scope {}".format(scope), "--sample_type {}".format(self.branch_data["sample_type"]), "--output {}".format(local_outputfile), - ], - sourcescript=[ - "{}/init.sh".format(_workdir), + "--libdir {}".format(os.path.join(_workdir, "lib")), ], silent=True, ) diff --git a/processor/tasks/CROWNRun.py b/processor/tasks/CROWNRun.py index 89560e5d..0a2a866b 100644 --- a/processor/tasks/CROWNRun.py +++ b/processor/tasks/CROWNRun.py @@ -148,15 +148,6 @@ def run(self): tar = tarfile.open(_tarballpath, "r:gz") tar.extractall(_workdir) os.remove(_tempfile) - # test running the source command - console.rule("Testing Source command for CROWN") - self.run_command( - command=["source", "{}/init.sh".format(_workdir)], - silent=False, - ) - console.rule("Finished testing Source command for CROWN") - # set environment using env script - my_env = self.set_environment("{}/init.sh".format(_workdir)) _crown_args = [_outputfile] + _inputfiles _executable = "./{}_{}_{}".format( self.config, branch_data["sample_type"], branch_data["era"] @@ -175,7 +166,6 @@ def run(self): stderr=subprocess.PIPE, bufsize=1, universal_newlines=True, - env=my_env, cwd=_workdir, ) as p: for line in p.stdout: @@ -210,9 +200,6 @@ def run(self): "processor/tasks/helpers/ResetROOTStatusBit.py", "--input {}".format(local_filename), ], - sourcescript=[ - "{}/init.sh".format(_workdir), - ], silent=True, ) # for each outputfile, add the scope suffix @@ -236,9 +223,7 @@ def run(self): "--scope {}".format(self.scopes[i]), "--sample_type {}".format(self.branch_data["sample_type"]), "--output {}".format(local_outputfile), - ], - sourcescript=[ - "{}/init.sh".format(_workdir), + "--libdir {}".format(os.path.join(_workdir, "lib")), ], silent=True, ) diff --git a/processor/tasks/helpers/GetQuantitiesMap.py b/processor/tasks/helpers/GetQuantitiesMap.py index 5a9d6b97..5f8552ae 100644 --- a/processor/tasks/helpers/GetQuantitiesMap.py +++ b/processor/tasks/helpers/GetQuantitiesMap.py @@ -11,17 +11,31 @@ def parse_args(): parser.add_argument("--sample_type", help="sample_type") parser.add_argument("--scope", help="scope") parser.add_argument("--output", help="output file") + parser.add_argument("--libdir", help="dict parsing library") args = parser.parse_args() return args -def read_quantities_map(input_file, era, sample_type, scope, outputfile): +def read_quantities_map(input_file, era, sample_type, scope, outputfile, libdir): print(f"Reading quantities Map from {input_file}") - data = {} - ROOT.gSystem.Load(os.path.abspath(__file__), "/maplib.so") + + # Load dict parsing lib + lib_path = os.path.abspath(os.path.join(libdir, "libMyDicts.so")) + # Physical file check + if not os.path.exists(lib_path): + raise FileNotFoundError(f"Missing library: {lib_path}") + # Evaluate ROOT-specific return codes + result = ROOT.gSystem.Load(lib_path) + if result < 0: + err_type = ( + "Version mismatch" if result == -2 else "Linker error/Missing dependency" + ) + raise ImportError(f"Load failed ({result}): {err_type} for {lib_path}") + f = ROOT.TFile.Open(input_file) name = "shift_quantities_map" m = f.Get(name) + data = {} for shift, quantities in m: data[str(shift)] = sorted([str(quantity) for quantity in quantities]) f.Close() @@ -37,6 +51,8 @@ def read_quantities_map(input_file, era, sample_type, scope, outputfile): # call the function with the input file if __name__ == "__main__": args = parse_args() - read_quantities_map(args.input, args.era, args.sample_type, args.scope, args.output) + read_quantities_map( + args.input, args.era, args.sample_type, args.scope, args.output, args.libdir + ) print("Done") exit(0) diff --git a/processor/tasks/helpers/helpers.py b/processor/tasks/helpers/helpers.py index 0f2ab8ef..2230e63d 100644 --- a/processor/tasks/helpers/helpers.py +++ b/processor/tasks/helpers/helpers.py @@ -7,7 +7,6 @@ from XRootD.client import FileSystem from XRootD.client.flags import StatInfoFlags - # Get law loggers for this module logger = get_logger("xrootd.stat") @@ -102,7 +101,7 @@ def get_alternate_file_uri( expression is used: ```python - re.match(r"^root:\/\/[^\/]\/+(.*)$", file) + re.match(r"^root:\\/\\/[^\\/]\\/+(.*)$", file) ``` If `file` is not consistent with a XRootD URI or the file is not found on diff --git a/processor/tasks/scripts/compile_crown.sh b/processor/tasks/scripts/compile_crown.sh index 41d0cb41..ea98e3fc 100644 --- a/processor/tasks/scripts/compile_crown.sh +++ b/processor/tasks/scripts/compile_crown.sh @@ -1,4 +1,5 @@ #! /bin/bash + CROWNFOLDER=${1} ANALYSIS=${2} CONFIG=${3} @@ -10,44 +11,57 @@ INSTALLDIR=${8} BUILDDIR=${9} TARBALLNAME=${10} EXECUTABLE_THREADS=${11} -# setup with analysis clone if needed + +echo "--- CROWN Production Compilation ---" +echo "Crown folder: ${CROWNFOLDER}" +echo "Install dir: ${INSTALLDIR}" +echo "Build dir: ${BUILDDIR}" +echo "Analysis: ${ANALYSIS}" + +# Exit on any error or pipe failure set -o pipefail set -e -source ${ANALYSIS_PATH}/CROWN/init.sh ${ANALYSIS} -# remove conda prefix from $PATH so cmakes uses the LCG stack python and not the conda one -if [[ ! -z "${CONDA_PREFIX}" ]]; then - PATH=$(echo ${PATH} | sed "s@${CONDA_PREFIX}@@g") - # PATH=$(echo ${PATH} | sed 's%/cvmfs/etp.kit.edu/[^:]*:%%g') - CONDA_PYTHON_EXE="" - CONDA_EXE="" - CONDA_PREFIX="" -fi -# use a fourth of the machine for compiling + +# Use a fourth of the machine for compiling THREADS_AVAILABLE=$(grep -c ^processor /proc/cpuinfo) THREADS=$((THREADS_AVAILABLE / 4)) -echo "Using ${THREADS} threads for the compilation" -which cmake - -if cmake ${CROWNFOLDER} \ - -DANALYSIS=${ANALYSIS} \ - -DCONFIG=${CONFIG} \ - -DSAMPLES=${SAMPLES} \ - -DERAS=${ERAS} \ - -DSCOPES=${SCOPE} \ - -DSHIFTS=${SHIFTS} \ - -DTHREADS=${EXECUTABLE_THREADS} \ - -DINSTALLDIR=${INSTALLDIR} \ - -DPRODUCTION=True \ - -B${BUILDDIR} 2>&1 | tee ${BUILDDIR}/cmake.log; then - echo "CMake finished successfully" +[ "$THREADS" -lt 1 ] && THREADS=1 + +echo "Using ${THREADS} threads for compilation" +echo "Active Python: $(which python)" +echo "Active CMake: $(which cmake)" + +# Ensure Build Directory exists +mkdir -p "${BUILDDIR}" + +# --- CMake Configuration --- +# We use the compilers and libraries provided by the container's Conda 'env' +if cmake "${CROWNFOLDER}" \ + -DANALYSIS="${ANALYSIS}" \ + -DCONFIG="${CONFIG}" \ + -DSAMPLES="${SAMPLES}" \ + -DERAS="${ERAS}" \ + -DSCOPES="${SCOPE}" \ + -DSHIFTS="${SHIFTS}" \ + -DTHREADS="${EXECUTABLE_THREADS}" \ + -DINSTALLDIR="${INSTALLDIR}" \ + -DPRODUCTION=True \ + -B"${BUILDDIR}" 2>&1 | tee "${BUILDDIR}/cmake.log"; then + echo "CMake finished successfully" +else + echo "-------------------------------------------------------------------------" + echo "CMake failed, check the log file ${BUILDDIR}/cmake.log" + echo "-------------------------------------------------------------------------" + sleep 0.1 # wait for the log file to be written + exit 1 +fi + +cd "${BUILDDIR}" +echo "Starting compilation..." + +if make install -j "${THREADS}" 2>&1 | tee "${BUILDDIR}/build.log"; then + echo "CROWN library build and installation successful." else - echo "-------------------------------------------------------------------------" - echo "CMake failed, check the log file ${BUILDDIR}/cmake.log for more information" - echo "-------------------------------------------------------------------------" - sleep 0.1 # wait for the log file to be written - exit 1 + echo "ERROR: Build failed. See ${BUILDDIR}/build.log" + exit 1 fi -cd ${BUILDDIR} -echo "Finished preparing the compilation and starting to compile" -make install -j ${THREADS} 2>&1 | tee ${BUILDDIR}/build.log -echo "Finished the compilation" diff --git a/processor/tasks/scripts/compile_crown_friends.sh b/processor/tasks/scripts/compile_crown_friends.sh index eb143f3b..0799ee74 100644 --- a/processor/tasks/scripts/compile_crown_friends.sh +++ b/processor/tasks/scripts/compile_crown_friends.sh @@ -1,4 +1,5 @@ #! /bin/bash + CROWNFOLDER=${1} ANALYSIS=${2} CONFIG=${3} @@ -10,50 +11,63 @@ INSTALLDIR=${8} BUILDDIR=${9} TARBALLNAME=${10} QUANTITIESMAP=${11} -# setup with analysis clone if needed + +echo "--- CROWN Friends Compilation ---" +echo "Crown folder: ${CROWNFOLDER}" +echo "Install dir: ${INSTALLDIR}" +echo "Build dir: ${BUILDDIR}" +echo "Analysis: ${ANALYSIS}" + +# Exit on any error or pipe failure set -o pipefail set -e -source ${ANALYSIS_PATH}/CROWN/init.sh ${ANALYSIS} -# remove conda prefix from ${PATH} so cmakes uses the LCG stack python and not the conda one -if [[ ! -z "${CONDA_PREFIX}" ]]; then - PATH=$(echo ${PATH} | sed "s@${CONDA_PREFIX}@@g") - # PATH=$(echo ${PATH} | sed 's%/cvmfs/etp.kit.edu/[^:]*:%%g') - CONDA_PYTHON_EXE="" - CONDA_EXE="" - CONDA_PREFIX="" -fi -# use a fourth of the machine for compiling -THREADS_AVAILABLE=$(grep -c ^processor /proc/cpuinfo) -# THREADS=$(( THREADS_AVAILABLE / 4 )) + +# Use 2 threads for Friends compilation to avoid OOM during linking THREADS=2 -echo "Using ${THREADS} threads for the compilation" -which cmake - -if cmake ${CROWNFOLDER} \ - -DANALYSIS=${ANALYSIS} \ - -DCONFIG=${CONFIG} \ - -DSAMPLES=${SAMPLES} \ - -DERAS=${ERAS} \ - -DSCOPES=${SCOPE} \ - -DSHIFTS=${SHIFTS} \ - -DINSTALLDIR=${INSTALLDIR} \ - -DPRODUCTION=True \ - -DFRIENDS=true \ - -DQUANTITIESMAP=${QUANTITIESMAP} \ - -B${BUILDDIR} 2>&1 | tee ${BUILDDIR}/cmake.log; then - echo "CMake finished successfully" + +echo "Using ${THREADS} threads for compilation" +echo "Active Python: $(which python)" +echo "Active CMake: $(which cmake)" + +# Ensure Build Directory exists +mkdir -p "${BUILDDIR}" + +# --- CMake Configuration --- +# We use the compilers and libraries provided by the container's Conda 'env' +if cmake "${CROWNFOLDER}" \ + -DANALYSIS="${ANALYSIS}" \ + -DCONFIG="${CONFIG}" \ + -DSAMPLES="${SAMPLES}" \ + -DERAS="${ERAS}" \ + -DSCOPES="${SCOPE}" \ + -DSHIFTS="${SHIFTS}" \ + -DINSTALLDIR="${INSTALLDIR}" \ + -DPRODUCTION=True \ + -DFRIENDS=true \ + -DQUANTITIESMAP="${QUANTITIESMAP}" \ + -B"${BUILDDIR}" 2>&1 | tee "${BUILDDIR}/cmake.log"; then + echo "CMake finished successfully" +else + echo "-------------------------------------------------------------------------" + echo "CMake failed, check the log file ${BUILDDIR}/cmake.log" + echo "-------------------------------------------------------------------------" + sleep 0.1 # wait for the log file to be written + exit 1 +fi + +cd "${BUILDDIR}" +echo "Starting compilation..." + +if make install -j "${THREADS}" 2>&1 | tee "${BUILDDIR}/build.log"; then + echo "CROWN Friends build and installation successful." else - echo "-------------------------------------------------------------------------" - echo "CMake failed, check the log file ${BUILDDIR}/cmake.log for more information" - echo "-------------------------------------------------------------------------" - sleep 0.1 # wait for the log file to be written - exit 1 + echo "ERROR: Build failed. See ${BUILDDIR}/build.log" + exit 1 fi -cd ${BUILDDIR} -echo "Finished preparing the compilation and starting to compile" -make install -j ${THREADS} 2>&1 | tee ${BUILDDIR}/build.log +# --- Post-Processing (Tarball) --- echo "Finished the compilation and starting to make the *.tar.gz archive" -cd ${INSTALLDIR} -touch ${TARBALLNAME} -tar -czvf ${TARBALLNAME} --exclude=${TARBALLNAME} . +cd "${INSTALLDIR}" +# Ensure the tarball isn't trying to archive itself if it already exists +touch "${TARBALLNAME}" +tar -czvf "${TARBALLNAME}" --exclude="${TARBALLNAME}" . diff --git a/processor/tasks/scripts/compile_crown_lib.sh b/processor/tasks/scripts/compile_crown_lib.sh index 98c802ec..8b8e619d 100644 --- a/processor/tasks/scripts/compile_crown_lib.sh +++ b/processor/tasks/scripts/compile_crown_lib.sh @@ -1,44 +1,57 @@ #! /bin/bash + +# Arguments passed by Law/KingMaker CROWNFOLDER=${1} INSTALLDIR=${2} BUILDDIR=${3} ANALYSIS=${4} +echo "--- CROWN Library Compilation ---" echo "Crown folder: ${CROWNFOLDER}" -echo "Install dir: ${INSTALLDIR}" -echo "Build dir: ${BUILDDIR}" -# setup with analysis clone if needed +echo "Install dir: ${INSTALLDIR}" +echo "Build dir: ${BUILDDIR}" +echo "Analysis: ${ANALYSIS}" + +# Exit on any error or pipe failure set -o pipefail set -e -source ${ANALYSIS_PATH}/CROWN/init.sh ${ANALYSIS} -# remove conda prefix from ${PATH} so cmakes uses the LCG stack python and not the conda one -if [[ ! -z "${CONDA_PREFIX}" ]]; then - PATH=$(echo ${PATH} | sed "s@${CONDA_PREFIX}@@g") - # PATH=$(echo ${PATH} | sed 's%/cvmfs/etp.kit.edu/[^:]*:%%g') - CONDA_PYTHON_EXE="" - CONDA_EXE="" - CONDA_PREFIX="" -fi -# use a fourth of the machine for compiling + +# --- Resource Calculation --- +# Use 1/4 of available cores for compilation to avoid overloading the node (min=1) THREADS_AVAILABLE=$(grep -c ^processor /proc/cpuinfo) THREADS=$((THREADS_AVAILABLE / 4)) -echo "Using ${THREADS} threads for the compilation" -which cmake - -if cmake ${CROWNFOLDER} \ - -DBUILD_CROWNLIB_ONLY=ON \ - -DINSTALLDIR=${INSTALLDIR} \ - -DANALYSIS=${ANALYSIS} \ - -B${BUILDDIR} 2>&1 | tee ${BUILDDIR}/cmake.log; then - echo "CMake finished successfully" +[ "$THREADS" -lt 1 ] && THREADS=1 + +echo "Using ${THREADS} threads for compilation." +echo "Active Python: $(which python)" +echo "Active CMake: $(which cmake)" + +# Ensure Build Directory exists +mkdir -p "${BUILDDIR}" + +# --- CMake Configuration --- +# We use the compilers and libraries provided by the container's Conda 'env' +if cmake "${CROWNFOLDER}" \ + -DBUILD_CROWNLIB_ONLY=ON \ + -DINSTALLDIR="${INSTALLDIR}" \ + -DANALYSIS="${ANALYSIS}" \ + -B"${BUILDDIR}" 2>&1 | tee "${BUILDDIR}/cmake.log"; then + echo "CMake finished successful." else echo "-------------------------------------------------------------------------" echo "CMake failed, check the log file ${BUILDDIR}/cmake.log for more information" echo "-------------------------------------------------------------------------" sleep 0.1 # wait for the log file to be written - exit 1 + exit 1 +fi + +# --- Build and Install --- +cd "${BUILDDIR}" +echo "Starting 'make install'..." + +if make install -j "${THREADS}" 2>&1 | tee "${BUILDDIR}/build.log"; then + echo "CROWN library build and installation successful." +else + echo "ERROR: Build failed. See ${BUILDDIR}/build.log" + exit 1 fi -cd ${BUILDDIR} -echo "Finished preparing the compilation and starting to compile" -make install -j ${THREADS} 2>&1 | tee ${BUILDDIR}/build.log -echo "Finished the compilation crownlib build successfully" diff --git a/sample_database b/sample_database index 48aa0e1a..c115fc05 160000 --- a/sample_database +++ b/sample_database @@ -1 +1 @@ -Subproject commit 48aa0e1a6d9248f1ec894ba605a68a53b2796357 +Subproject commit c115fc05d38f58020ead7629e40eaf6d0f12b619 diff --git a/scripts/ProductionStatus.py b/scripts/ProductionStatus.py index 3862c8ac..1920e0a5 100644 --- a/scripts/ProductionStatus.py +++ b/scripts/ProductionStatus.py @@ -21,13 +21,11 @@ def parse_args_from_law(): """ arguments = sys.argv if len(arguments) < 4 or arguments[1] != "law" or arguments[2] != "run": - rprint( - """ + rprint(""" Wrong usage of script, to run it, just add in front of your law command. Usage: monitor_production Example: monitor_production law run ProduceSamples --analysis tau --config config --sample-list samples_18.txt --production-tag best_samples_eu --workers 100 --scopes mt --shifts None - """ - ) + """) raise ValueError("Wrong script usage.") args_dict = {} for i in range(1, len(arguments)): diff --git a/scripts/os-version.sh b/scripts/os-version.sh deleted file mode 100644 index 3ab081ba..00000000 --- a/scripts/os-version.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -if ! command -v lsb_release &>/dev/null; then - source /etc/os-release - distro=${NAME} - os_version=${VERSION_ID} -else - distro=$(lsb_release -i | cut -f2) - os_version=$(lsb_release -r | cut -f2) -fi -distro=${distro//[[:space:]]/} -distro="${distro//Linux/}" -distro="${distro//linux/}" -#echo "Trying to run Kingmaker on ${distro} Version ${os_version}" diff --git a/scripts/python-formatting.sh b/scripts/python-formatting.sh deleted file mode 100644 index 286f35d7..00000000 --- a/scripts/python-formatting.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -FOUND_ISSUE=0 - -for FILENAME in $(find . \( -path ./venv -o -path ./law \) -prune -o -name "*.py"); do - # only run the check if the filename ends with .py - if [[ ${FILENAME} == *.py ]]; then - echo "Checking ${FILENAME}" - black --check ${FILENAME} - RETURN_VALUE=$? - if [ ${RETURN_VALUE} -ne 0 ]; then - black --diff ${FILENAME} 2>/dev/null - FOUND_ISSUE=1 - fi - fi -done - -exit ${FOUND_ISSUE} diff --git a/setup.sh b/setup.sh index 2ee6c733..2dd33dab 100755 --- a/setup.sh +++ b/setup.sh @@ -14,7 +14,7 @@ # List of available workflows -ANA_LIST=("KingMaker" "GPU_example") +WF_LIST=("KingMaker" "KingMaker_lxplus" "GPU_example") _addpy() { [ ! -z "${1}" ] && export PYTHONPATH="${1}:${PYTHONPATH}" @@ -26,33 +26,33 @@ _addbin() { parse_arguments() { # Default values - DEFAULT_ANALYSIS="KingMaker" + DEFAULT_WORKFLOW="KingMaker" DEFAULT_ENV_PATH="" DEFAULT_CROWN_ANALYSIS="" - ANALYSIS=${DEFAULT_ANALYSIS} + WORKFLOW=${DEFAULT_WORKFLOW} ENV_PATH=${DEFAULT_ENV_PATH} CROWN_ANALYSIS=${DEFAULT_CROWN_ANALYSIS} # Parse arguments while [[ $# -gt 0 ]]; do case $1 in - -a|--analysis) - ANALYSIS="$2" + -w|--workflow) + WORKFLOW="$2" shift 2 ;; -e|--env-path) ENV_PATH="$2" shift 2 ;; - -c|--crown-analysis) + -a|--analysis) CROWN_ANALYSIS="$2" shift 2 ;; -l|--list) echo "Available workflows:" echo "-------------------" - for workflow in "${ANA_LIST[@]}"; do - if [[ "${workflow}" == "${DEFAULT_ANALYSIS}" ]]; then + for workflow in "${WF_LIST[@]}"; do + if [[ "${workflow}" == "${DEFAULT_WORKFLOW}" ]]; then echo "* ${workflow} (default)" else echo "* ${workflow}" @@ -64,11 +64,11 @@ parse_arguments() { echo "Usage: source setup.sh [options]" echo "" echo "Options:" - echo " -a, --analysis ANALYSIS Specify the analysis workflow to use" - echo " [default: ${DEFAULT_ANALYSIS}]" + echo " -w, --workflow WORKFLOW Specify the workflow to use" + echo " [default: ${DEFAULT_WORKFLOW}]" echo " -e, --env-path PATH Specify custom environment path" echo " [default: auto-detected]" - echo " -c, --crown-analysis NAME Specify CROWN analysis to check out" + echo " -a, --analysis NAME Specify CROWN analysis to check out" echo " (only with KingMaker workflow)" echo " Available: https://crown.readthedocs.io/en/latest/introduction.html#id1" echo " -l, --list List available workflows" @@ -90,7 +90,7 @@ parse_arguments() { done # Export for use in main script - export PARSED_ANALYSIS="${ANALYSIS}" + export PARSED_WORKFLOW="${WORKFLOW}" export PARSED_ENV_PATH="${ENV_PATH}" export CROWN_ANALYSIS="${CROWN_ANALYSIS}" } @@ -103,12 +103,6 @@ action() { return 1 fi - # Check if law was already set up in this shell - if [[ ! -z ${LAW_IS_SET_UP} ]]; then - echo "KingMaker was already set up in this shell. Please, use a new one." - return 1 - fi - # Check if law already tried to set up in this shell if [[ ! -z ${LAW_TRIED_TO_SET_UP} ]]; then echo "Kingmaker already tried to set up in this shell. This might lead to unintended behaviour." @@ -125,58 +119,28 @@ action() { BASE_DIR="$( cd "$( dirname "${THIS_FILE}" )" && pwd )" - # Check if current OS is supported - source ${BASE_DIR}/scripts/os-version.sh - local VALID_OS="False" - if [[ "${distro}" == "CentOS" ]]; then - if [[ ${os_version:0:1} == "7" ]]; then - VALID_OS="True" - fi - elif [[ "${distro}" == "RedHatEnterprise" || "${distro}" == "Alma" || "${distro}" == "Rocky" ]]; then - if [[ ${os_version:0:1} == "9" ]]; then - VALID_OS="True" - fi - elif [[ "${distro}" == "Ubuntu" ]]; then - if [[ ${os_version:0:2} == "22" ]]; then - VALID_OS="True" - fi - fi - if [[ "${VALID_OS}" == "False" ]]; then - echo "Kingmaker not support on ${distro} ${os_version}" - return 1 - else - echo "Running Kingmaker on ${distro} Version ${os_version} on $(hostname) from dir ${BASE_DIR}" - fi - # Handle analysis selection - if [[ -z "${PARSED_ANALYSIS}" ]]; then + if [[ -z "${PARSED_WORKFLOW}" ]]; then echo "No workflow chosen. Please choose from:" - printf '%s\n' "${ANA_LIST[@]}" + printf '%s\n' "${WF_LIST[@]}" return 1 else # Check if given workflow is in list - if [[ ! " ${ANA_LIST[*]} " =~ " ${PARSED_ANALYSIS} " ]] ; then + if [[ ! " ${WF_LIST[*]} " =~ " ${PARSED_WORKFLOW} " ]] ; then echo "Not a valid name. Allowed choices are:" - printf '%s\n' "${ANA_LIST[@]}" + printf '%s\n' "${WF_LIST[@]}" return 1 else - echo "Using ${PARSED_ANALYSIS} workflow." - export ANA_NAME="${PARSED_ANALYSIS}" + echo "Using ${PARSED_WORKFLOW} workflow." + export WF_NAME="${PARSED_WORKFLOW}" fi fi # Needed for EOS directory parsing export USER_FIRST_LETTER=${USER:0:1} - # Ensure that submodule with KingMaker env files is present - if [ -z "$(ls -A ${BASE_DIR}/kingmaker-images)" ]; then - git submodule update --init --recursive -- kingmaker-images - fi - # Get kingmaker-images submodule hash to find the correct image during job submission - export IMAGE_HASH=$(cd ${BASE_DIR}/kingmaker-images/; git rev-parse --short HEAD) - # Parse the necessary environments from the luigi config files. - PARSED_ENVS=$(python3 ${BASE_DIR}/scripts/ParseNeededVar.py ${BASE_DIR}/lawluigi_configs/${ANA_NAME}_luigi.cfg "ENV_NAME") + PARSED_ENVS=$(python3 ${BASE_DIR}/scripts/ParseNeededVar.py ${BASE_DIR}/lawluigi_configs/${WF_NAME}_luigi.cfg "ENV_NAME") PARSED_ENVS_STATUS=$? if [[ "${PARSED_ENVS_STATUS}" -eq "1" ]]; then IFS='@' read -ra ADDR <<< "${PARSED_ENVS}" @@ -187,9 +151,9 @@ action() { return 1 fi # First listed is env of DEFAULT and will be used as the starting env - # Remaining envs should be sourced via provided docker images + # Remaining envs should be sourced via provided container images export STARTING_ENV=$(echo ${PARSED_ENVS} | head -n1 | awk '{print $1;}') - echo "${STARTING_ENV}_${IMAGE_HASH} will be sourced as the starting env." + echo "${STARTING_ENV} will be sourced as the starting env." # Order of environment locations # 1. Use realpath of provided directory in second argument @@ -200,7 +164,7 @@ action() { ENV_PATH="$(realpath ${PARSED_ENV_PATH})" elif [[ -f "${BASE_DIR}/environment.location" ]]; then ENV_PATH="$(tail -n 1 ${BASE_DIR}/environment.location)" - elif [[ -d "/cvmfs/etp.kit.edu/LAW_envs/miniforge/envs/${STARTING_ENV}_${IMAGE_HASH}" ]]; then + elif [[ -d "/cvmfs/etp.kit.edu/LAW_envs/miniforge/envs/${STARTING_ENV}" ]]; then ENV_PATH="/cvmfs/etp.kit.edu/LAW_envs" else ENV_PATH="${BASE_DIR}" @@ -236,46 +200,45 @@ action() { source ${ENV_PATH}/miniforge/bin/activate '' # Check if correct miniforge env is running - if [ -d "${ENV_PATH}/miniforge/envs/${STARTING_ENV}_${IMAGE_HASH}" ]; then - echo "${STARTING_ENV}_${IMAGE_HASH} env found using miniforge." + if [ -d "${ENV_PATH}/miniforge/envs/${STARTING_ENV}" ]; then + echo "${STARTING_ENV} env found using miniforge." else # Create miniforge env from yaml file if necessary - echo "Creating ${STARTING_ENV}_${IMAGE_HASH} env from kingmaker-images/KingMaker_envs/${STARTING_ENV}_env.yml..." - if [[ ! -f "${BASE_DIR}/kingmaker-images/KingMaker_envs/${STARTING_ENV}_env.yml" ]]; then - echo "${BASE_DIR}/kingmaker-images/KingMaker_envs/${STARTING_ENV}_env.yml not found. Unable to create environment." + echo "Creating ${STARTING_ENV} env from containers/${STARTING_ENV}_env.yml..." + if [[ ! -f "${BASE_DIR}/containers/${STARTING_ENV}_env.yml" ]]; then + echo "${BASE_DIR}/containers/${STARTING_ENV}_env.yml not found. Unable to create environment." return 1 fi - conda env create -f ${BASE_DIR}/kingmaker-images/KingMaker_envs/${STARTING_ENV}_env.yml -n ${STARTING_ENV}_${IMAGE_HASH} - echo "${STARTING_ENV}_${IMAGE_HASH} env built using miniforge." + conda env create -f ${BASE_DIR}/containers/${STARTING_ENV}_env.yml -n ${STARTING_ENV} + echo "${STARTING_ENV} env built using miniforge." fi - echo "Activating starting-env ${STARTING_ENV}_${IMAGE_HASH} from miniforge." - conda activate ${STARTING_ENV}_${IMAGE_HASH} + echo "Activating starting-env ${STARTING_ENV} from miniforge." + conda activate ${STARTING_ENV} # Set up other dependencies based on workflow ############################################ - case ${ANA_NAME} in - KingMaker) + case ${WF_NAME} in + KingMaker|KingMaker_lxplus) echo "Setting up CROWN ..." # Due to frequent updates CROWN is not set up as a submodule - if [ ! -d "${BASE_DIR}/CROWN" ]; then - git clone --recurse-submodules git@github.com:KIT-CMS/CROWN ${BASE_DIR}/CROWN + if [ -z "$(ls -A ${BASE_DIR}/CROWN)" ]; then + git -C "${BASE_DIR}" submodule update --init --recursive -- CROWN fi # Add CROWN analysis checkout option using init.sh if [ ! -z "${CROWN_ANALYSIS}" ]; then ( # Run in subprocess to prevent environment changes - cd "${BASE_DIR}/CROWN" - if [ -f "init.sh" ]; then + if [ -f "${BASE_DIR}/CROWN/init.sh" ]; then echo "Checking out CROWN analysis: ${CROWN_ANALYSIS}" - source init.sh "${CROWN_ANALYSIS}" + source ${BASE_DIR}/CROWN/init.sh -a "${CROWN_ANALYSIS}" --dry-run else - echo "Error: CROWN init.sh not found" + echo "Error: CROWN init.sh not found at ${BASE_DIR}/CROWN/init.sh" return 1 fi ) fi if [ -z "$(ls -A ${BASE_DIR}/sample_database)" ]; then - git submodule update --init --recursive -- sample_database + git -C "${BASE_DIR}" submodule update --init --recursive -- sample_database fi # Set the alias sample_manager () { @@ -290,6 +253,10 @@ action() { # Parse all user arguments and pass them to the python script python3 ${BASE_DIR}/scripts/ProductionStatus.py $@ } + + # Set up ccache + export CCACHE_DIR="${BASE_DIR}/CROWN/.cache/ccache"; + ;; *) ;; @@ -298,21 +265,22 @@ action() { # Check is law was set up, and do so if not if [ -z "$(ls -A ${BASE_DIR}/law)" ]; then - git submodule update --init --recursive -- law + git -C "${BASE_DIR}" submodule update --init --recursive -- law fi - # Remember the previous value of VOMS_USERCONF to overwrite after conda source - export VOMS_USERCONF=${INITIAL_VOMS_USERCONF} - # Check for voms proxy voms-proxy-info -exists &>/dev/null if [[ "$?" -eq "1" ]]; then echo "No valid voms proxy found, remote storage might be inaccessible." echo "Please ensure that it exists and that 'X509_USER_PROXY' is properly set." + else + # Remember the previous value of VOMS_USERCONF to overwrite after conda source + export X509_USER_PROXY=$(voms-proxy-info -path) + echo "Voms proxy found at ${X509_USER_PROXY}" fi # Parse the necessary environments from the luigi config files. - LOCAL_SCHEDULER=$(python3 ${BASE_DIR}/scripts/ParseNeededVar.py ${BASE_DIR}/lawluigi_configs/${ANA_NAME}_luigi.cfg "local_scheduler") + LOCAL_SCHEDULER=$(python3 ${BASE_DIR}/scripts/ParseNeededVar.py ${BASE_DIR}/lawluigi_configs/${WF_NAME}_luigi.cfg "local_scheduler") LOCAL_SCHEDULER_STATUS=$? if [[ "${LOCAL_SCHEDULER_STATUS}" -eq "1" ]]; then IFS='@' read -ra ADDR <<< "${LOCAL_SCHEDULER}" @@ -359,9 +327,9 @@ action() { fi echo "Setting up Luigi/Law ..." - export LAW_HOME="${BASE_DIR}/.law/${ANA_NAME}" - export LAW_CONFIG_FILE="${BASE_DIR}/lawluigi_configs/${ANA_NAME}_law.cfg" - export LUIGI_CONFIG_PATH="${BASE_DIR}/lawluigi_configs/${ANA_NAME}_luigi.cfg" + export LAW_HOME="${BASE_DIR}/.law/${WF_NAME}" + export LAW_CONFIG_FILE="${BASE_DIR}/lawluigi_configs/${WF_NAME}_law.cfg" + export LUIGI_CONFIG_PATH="${BASE_DIR}/lawluigi_configs/${WF_NAME}_luigi.cfg" export ANALYSIS_PATH="${BASE_DIR}" export ANALYSIS_DATA_PATH="${ANALYSIS_PATH}/data" @@ -386,8 +354,5 @@ action() { return 1 fi fi - - export LAW_IS_SET_UP="True" - echo "KingMaker setup was successful" } action "$@"