From 66f608fc702b4ce840b8f176c35589f9a5ca5717 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 30 Apr 2025 10:23:49 +0200 Subject: [PATCH 1/9] Update RDMA Core Userspace Libraries to version 57.0 --- rdma-core.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdma-core.spec b/rdma-core.spec index 1b9ab95d01f..b4c93d66615 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,4 +1,4 @@ -### RPM external rdma-core 50.0 +### RPM external rdma-core 57.0 ## INITENV +PATH LD_LIBRARY_PATH %{i}/lib64 Source: https://github.com/linux-rdma/%{n}/releases/download/v%{realversion}/rdma-core-%{realversion}.tar.gz From 9292dd21899d8ac9ea3d34520ef53bc8f6130a0c Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 30 Apr 2025 10:24:42 +0200 Subject: [PATCH 2/9] RDMA Core: make libibverbs plugins relocatable Allow overriding the libibverbs configuration directory setting the environment variable VERBS_CONFIG_DIR. Set VERBS_CONFIG_DIR in the scram tool to point to the libibverbs configuration directory. Include the libibverbs configuration directory in the package. Update the libibverbs configuration to include the full path to the plugins, and relocate them in the post-install section. Include the user binaries in the package. --- rdma-core-VERBS_CONFIG_DIR.patch | 42 +++++++++++++++++++ rdma-core.spec | 19 +++++++-- .../tools/rdma-core/rdma-core.xml | 2 + 3 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 rdma-core-VERBS_CONFIG_DIR.patch diff --git a/rdma-core-VERBS_CONFIG_DIR.patch b/rdma-core-VERBS_CONFIG_DIR.patch new file mode 100644 index 00000000000..13bc3294780 --- /dev/null +++ b/rdma-core-VERBS_CONFIG_DIR.patch @@ -0,0 +1,42 @@ +diff --git a/libibverbs/dynamic_driver.c b/libibverbs/dynamic_driver.c +index 7fa4233..c2ff2bb 100644 +--- a/libibverbs/dynamic_driver.c ++++ b/libibverbs/dynamic_driver.c +@@ -115,27 +115,33 @@ static void read_config_file(const char *path) + + static void read_config(void) + { ++ char *verbs_config_dir; + DIR *conf_dir; + struct dirent *dent; + char *path; + +- conf_dir = opendir(IBV_CONFIG_DIR); ++ verbs_config_dir = getenv("VERBS_CONFIG_DIR"); ++ if (!verbs_config_dir) { ++ verbs_config_dir = IBV_CONFIG_DIR; ++ } ++ ++ conf_dir = opendir(verbs_config_dir); + if (!conf_dir) { + fprintf(stderr, + PFX "Warning: couldn't open config directory '%s'.\n", +- IBV_CONFIG_DIR); ++ verbs_config_dir); + return; + } + + while ((dent = readdir(conf_dir))) { + struct stat buf; + +- if (asprintf(&path, "%s/%s", IBV_CONFIG_DIR, dent->d_name) < ++ if (asprintf(&path, "%s/%s", verbs_config_dir, dent->d_name) < + 0) { + fprintf(stderr, + PFX + "Warning: couldn't read config file %s/%s.\n", +- IBV_CONFIG_DIR, dent->d_name); ++ verbs_config_dir, dent->d_name); + goto out; + } + diff --git a/rdma-core.spec b/rdma-core.spec index b4c93d66615..0e7443d5dbe 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -2,10 +2,12 @@ ## INITENV +PATH LD_LIBRARY_PATH %{i}/lib64 Source: https://github.com/linux-rdma/%{n}/releases/download/v%{realversion}/rdma-core-%{realversion}.tar.gz +Patch0: rdma-core-VERBS_CONFIG_DIR BuildRequires: cmake ninja %prep %setup -q -n %{n}-%{realversion} +%patch0 -p1 %build rm -rf build @@ -33,10 +35,19 @@ ninja -v %{makeprocesses} install # remove pkg-config to avoid rpm-generated dependency on /usr/bin/pkg-config rm -rf %{i}/lib64/pkgconfig -# keep only the libraries and include files -rm -rf %{i}/bin -rm -rf %{i}/etc +# keep only the user binaries, libibverbs configuration, libraries and include files +rm -rf %{i}/etc/infiniband-diags +rm -rf %{i}/etc/init.d +rm -rf %{i}/etc/modprobe.d +rm -rf %{i}/etc/rdma rm -rf %{i}/lib rm -rf %{i}/libexec rm -rf %{i}/sbin -rm -rf %{i}/share +rm -rf %{i}/share/perl5 + +# update the libibverbs plugins with the full path +sed -e's#driver \(\w\+\)#driver %{i}/lib64/libibverbs/lib\1#' -i %{i}/etc/libibverbs.d/* + +%post +# relocate the libibverbs plugins path +%{relocateConfig}etc/libibverbs.d/* diff --git a/scram-tools.file/tools/rdma-core/rdma-core.xml b/scram-tools.file/tools/rdma-core/rdma-core.xml index 1eb305ffddc..8f2167c08b1 100644 --- a/scram-tools.file/tools/rdma-core/rdma-core.xml +++ b/scram-tools.file/tools/rdma-core/rdma-core.xml @@ -5,5 +5,7 @@ + + From 78abc37bb8a7b84b7c21e24f3a4256b586e13cac Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 30 Apr 2025 07:50:59 +0200 Subject: [PATCH 3/9] Add Libfabric OpenFabrics version 2.1.0 Libfabric, also known as Open Fabrics Interfaces (OFI), defines a communication API for high-performance parallel and distributed applications. It is a low-level communication library that abstracts diverse networking technologies. See https://ofiwg.github.io/libfabric/ for more information. --- cmssw-tools.spec | 1 + libfabric.spec | 79 +++++++++++++++++++ .../tools/libfabric/libfabric.xml | 8 ++ 3 files changed, 88 insertions(+) create mode 100644 libfabric.spec create mode 100644 scram-tools.file/tools/libfabric/libfabric.xml diff --git a/cmssw-tools.spec b/cmssw-tools.spec index 1a81c187f3c..992932e4c6e 100644 --- a/cmssw-tools.spec +++ b/cmssw-tools.spec @@ -77,6 +77,7 @@ Requires: numactl Requires: hwloc Requires: rdma-core Requires: ucx +Requires: libfabric Requires: openmpi Requires: sigcpp Requires: sqlite diff --git a/libfabric.spec b/libfabric.spec new file mode 100644 index 00000000000..83cf31a6c09 --- /dev/null +++ b/libfabric.spec @@ -0,0 +1,79 @@ +### RPM external libfabric 2.1.0 +Source: https://github.com/ofiwg/%{n}/releases/download/v%{realversion}/%{n}-%{realversion}.tar.bz2 +%{!?without_cuda:Requires: cuda gdrcopy} +%{!?without_rocm:Requires: rocm} +Requires: curl +Requires: numactl +Requires: rdma-core +Requires: xpmem + +%prep +%setup -q -n %{n}-%{realversion} + +# regenerate the configure files and Makefiles +./autogen.sh + +./configure \ + --prefix=%i \ + --disable-dependency-tracking \ + --disable-debug \ + --disable-profile \ + --disable-asan \ + --disable-lsan \ + --disable-tsan \ + --disable-ubsan \ + --enable-shared \ + --disable-static \ + --enable-shm \ + --enable-sm2 \ + --enable-xpmem=$XPMEM_ROOT \ + --disable-sockets \ + --enable-tcp \ + --enable-udp \ + --enable-verbs=$RDMA_CORE_ROOT \ + --disable-opx \ + --disable-psm2 \ + --disable-psm3 \ + --disable-usnic \ + --disable-efa \ + --disable-cxi \ + --disable-mrail \ + --disable-lpp \ + --disable-ucx \ + --enable-rxm \ + --enable-lnx \ +%if 0%{!?without_cuda:1} + --enable-cuda-dlopen \ + --enable-gdrcopy-dlopen \ + --with-cuda=$CUDA_ROOT \ + --with-gdrcopy=$GDRCOPY_ROOT \ +%else + --disable-cuda-dlopen \ + --disable-gdrcopy-dlopen \ + --without-cuda \ + --without-gdrcopy \ +%endif +%if 0%{!?without_rocm:1} + --enable-rocr-dlopen \ + --with-rocr=$ROCM_ROOT \ +%else + --disable-rocr-dlopen \ + --without-rocr \ +%endif + --disable-ze-dlopen \ + --without-ze \ + --with-pic \ + --with-dlopen \ + --with-gnu-ld \ + --with-curl=DIR \ + --with-numa=$NUMACTL_ROOT + + # CFLAGS="-Wno-error=array-bounds" + +%build +make %{makeprocesses} + +%install +make install + +%post diff --git a/scram-tools.file/tools/libfabric/libfabric.xml b/scram-tools.file/tools/libfabric/libfabric.xml new file mode 100644 index 00000000000..a33cf8bafdf --- /dev/null +++ b/scram-tools.file/tools/libfabric/libfabric.xml @@ -0,0 +1,8 @@ + + + + + + + + From 777bae307330ddb5d819a5f55adf6e8bc857eb4a Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 30 Apr 2025 08:10:24 +0200 Subject: [PATCH 4/9] Update UCX to version 1.18.1 --- ucx.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ucx.spec b/ucx.spec index 7aa91bfe09d..e40593ae397 100644 --- a/ucx.spec +++ b/ucx.spec @@ -1,4 +1,4 @@ -### RPM external ucx 1.17.0 +### RPM external ucx 1.18.1 Source: https://github.com/openucx/%{n}/archive/refs/tags/v%{realversion}.tar.gz BuildRequires: autotools %{!?without_cuda:Requires: cuda gdrcopy} From e48f93eb731424b88efa98ad0f68c6cd2679eb89 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 29 Apr 2025 23:16:25 +0200 Subject: [PATCH 5/9] Update OpenMPI to version 4.1.8 This is a bug fix release. See the NEWS file at https://raw.githubusercontent.com/open-mpi/ompi/v4.1.x/NEWS for the list of fixes and other changes. --- openmpi.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmpi.spec b/openmpi.spec index 3b50f15f7d0..14cfddc12dd 100644 --- a/openmpi.spec +++ b/openmpi.spec @@ -1,4 +1,4 @@ -### RPM external openmpi 4.1.6 +### RPM external openmpi 4.1.8 ## INITENV SET OPAL_PREFIX %{i} Source: https://download.open-mpi.org/release/open-mpi/v4.1/%{n}-%{realversion}.tar.bz2 BuildRequires: autotools From 5c8b05f940014767dcc1f79149dc656add9f997b Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 30 Apr 2025 07:55:08 +0200 Subject: [PATCH 6/9] OpenMPI: enable support for libfabric OFI --- openmpi.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openmpi.spec b/openmpi.spec index 14cfddc12dd..e5db706c797 100644 --- a/openmpi.spec +++ b/openmpi.spec @@ -3,6 +3,7 @@ Source: https://download.open-mpi.org/release/open-mpi/v4.1/%{n}-%{realversion}.tar.bz2 BuildRequires: autotools %{!?without_cuda:Requires: cuda} +Requires: libfabric Requires: hwloc Requires: rdma-core Requires: xpmem @@ -10,7 +11,6 @@ Requires: ucx Requires: zlib # external libraries are needed for additional protocols: -# --with-ofi: Open Fabric Interface's libfabric # --with-mxm: Mellanox Messaging (depracated, use UCX instead) # --with-fca: Mellanox Fabric Collective Accelerator # --with-hcoll: Mellanox Hierarchical Collectives @@ -33,7 +33,7 @@ Requires: zlib --with-zlib=$ZLIB_ROOT \ %{!?without_cuda:--with-cuda=$CUDA_ROOT} \ --with-hwloc=$HWLOC_ROOT \ - --without-ofi \ + --with-ofi=$LIBFABRIC_ROOT \ --without-portals4 \ --without-psm \ --without-psm2 \ From 75f2b20ea6c4fa1f259c325e47e40b0737d87693 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 8 May 2025 13:39:17 +0200 Subject: [PATCH 7/9] OpenMPI: update to the v4.1.x branch as of 2025.05.5 Set the version name to v4.1.9a1 --- openmpi.spec | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/openmpi.spec b/openmpi.spec index e5db706c797..81ecda2cc4b 100644 --- a/openmpi.spec +++ b/openmpi.spec @@ -1,7 +1,9 @@ -### RPM external openmpi 4.1.8 +### RPM external openmpi 4.1.9a1-20250505 ## INITENV SET OPAL_PREFIX %{i} -Source: https://download.open-mpi.org/release/open-mpi/v4.1/%{n}-%{realversion}.tar.bz2 -BuildRequires: autotools +%define branch v4.1.x +%define tag e6d2cb856f3fc649aa01bd5b688a003b3b33db7d +Source: git+https://github.com/open-mpi/ompi.git?obj=%{branch}/%{tag}&export=%{n}-%{realversion}&output=/%{n}-%{realversion}.tgz +BuildRequires: autotools flex %{!?without_cuda:Requires: cuda} Requires: libfabric Requires: hwloc @@ -20,6 +22,8 @@ Requires: zlib %prep %setup -q -n %{n}-%{realversion} +AUTOMAKE_JOBS=%{compiling_processes} ./autogen.pl + ./configure \ --prefix=%i \ --disable-dependency-tracking \ From a667c008d89781b06784c29047088ebb4f78887a Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 10 May 2025 15:18:20 +0200 Subject: [PATCH 8/9] Add a generic MPI tool, with default to Open MPI 4.1 --- cmssw-tools.spec | 1 + mpi.spec | 10 ++++++++++ scram-tools.file/tools/mpi/mpi.xml | 3 +++ 3 files changed, 14 insertions(+) create mode 100644 mpi.spec create mode 100644 scram-tools.file/tools/mpi/mpi.xml diff --git a/cmssw-tools.spec b/cmssw-tools.spec index 992932e4c6e..f5d3a5cc4a5 100644 --- a/cmssw-tools.spec +++ b/cmssw-tools.spec @@ -79,6 +79,7 @@ Requires: rdma-core Requires: ucx Requires: libfabric Requires: openmpi +Requires: mpi Requires: sigcpp Requires: sqlite Requires: tauolapp diff --git a/mpi.spec b/mpi.spec new file mode 100644 index 00000000000..1a9596de27c --- /dev/null +++ b/mpi.spec @@ -0,0 +1,10 @@ +### RPM external mpi 1.0 +Requires: openmpi + +%prep + +%build + +%install + +%post diff --git a/scram-tools.file/tools/mpi/mpi.xml b/scram-tools.file/tools/mpi/mpi.xml new file mode 100644 index 00000000000..55814f6e01e --- /dev/null +++ b/scram-tools.file/tools/mpi/mpi.xml @@ -0,0 +1,3 @@ + + + From 6d8d6c5b1cd8e51730f6048a8676cf5a641af688 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sun, 11 May 2025 17:56:04 +0200 Subject: [PATCH 9/9] Add MPICH version v4.3.0 MPICH is not selected by default, as it would conflict with Open MPI. --- cmssw-drop-tools.file | 2 +- cmssw-tools.spec | 1 + mpich.spec | 103 +++++++++++++++++++++++++ scram-tools.file/tools/mpich/mpich.xml | 10 +++ 4 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 mpich.spec create mode 100644 scram-tools.file/tools/mpich/mpich.xml diff --git a/cmssw-drop-tools.file b/cmssw-drop-tools.file index c267b7c1a96..7fd8a6d9500 100644 --- a/cmssw-drop-tools.file +++ b/cmssw-drop-tools.file @@ -1 +1 @@ -%define skipreqtools jcompiler icc-cxxcompiler icc-ccompiler icc-f77compiler rivet2 opencl opencl-cpp intel-vtune icx-cxxcompiler icx-ccompiler icx-f77compiler +%define skipreqtools jcompiler icc-cxxcompiler icc-ccompiler icc-f77compiler rivet2 opencl opencl-cpp intel-vtune icx-cxxcompiler icx-ccompiler icx-f77compiler mpich diff --git a/cmssw-tools.spec b/cmssw-tools.spec index f5d3a5cc4a5..81c6872a3a0 100644 --- a/cmssw-tools.spec +++ b/cmssw-tools.spec @@ -79,6 +79,7 @@ Requires: rdma-core Requires: ucx Requires: libfabric Requires: openmpi +Requires: mpich Requires: mpi Requires: sigcpp Requires: sqlite diff --git a/mpich.spec b/mpich.spec new file mode 100644 index 00000000000..75e0d145502 --- /dev/null +++ b/mpich.spec @@ -0,0 +1,103 @@ +### RPM external mpich v4.3.0 +## INCLUDE cuda-flags +## INCLUDE rocm-flags +%define branch 4.3.x +%define tag %{realversion} +Source: git+https://github.com/pmodels/mpich.git?obj=%{branch}/%{tag}&export=%{n}-%{realversion}&submodules=1&output=/%{n}-%{realversion}.tgz +BuildRequires: autotools +%{!?without_cuda:Requires: cuda} +%{!?without_rocm:Requires: rocm} +Requires: libfabric +Requires: ucx +Requires: hwloc +Requires: xpmem + +# external libraries are needed for additional protocols: +# --with-hcoll: Mellanox Hierarchical Collectives +# --with-pmix: PMIx Reference Library (OpenPMIx) +# etc. + +%prep +%setup -q -n %{n}-%{realversion} + +# remove the submodules we do not want to use +rm -rf modules/hwloc +sed -e's/do_hwloc=.*/do_hwloc=no/' -i autogen.sh + +rm -rf modules/libfabric +sed -e's/do_ofi=.*/do_ofi=no/' -i autogen.sh + +rm -rf modules/ucx +sed -e's/do_ucx=.*/do_ucx=no/' -i autogen.sh + +./autogen.sh + +# MPICH communication device: +# --with-device=ch4:ofi +# should work for TCP networks and any high-bandwidth interconnect supported by libfabric. +# --with-device=ch4:ucx +# should work for TCP networks and any high-bandwidth interconnect supported by the UCX library. +# --with-device=ch3 +# the legacy device ch3 + +# MPICH multi-threading support: +# --enable-thread-cs={default, global, per-vci, lock-free} +# Default is global for ch3 and per-vci for ch4 +# --enable-ch4-vci-method={default, zero, communicator, tag, implicit, explicit} +# --enable-ch4-mt={direct, lockless, runtime} +# direct - Each thread directly accesses lower-level fabric (default) +# lockless - Use the thread safe serialization model supported by the provider +# runtime - Determine the model at runtime through a CVAR + +# Note: using --enable-fast=O2,ndebug,alwaysinline,sse2 the compilation hangs (or takes a very long time to complete). + +./configure \ + --prefix=%i \ + --enable-error-checking=all \ + --enable-tag-error-bits=yes \ + --enable-fast=O2,ndebug,sse2 \ + --enable-cxx \ + --enable-romio \ + --disable-mpi-abi \ + --enable-versioning \ + --enable-threads=multiple \ + --enable-thread-cs=default \ + --disable-dependency-tracking \ + --disable-silent-rules \ + --disable-maintainer-mode \ + --enable-shared \ + --disable-static \ + --enable-nemesis-shm-collectives \ +%if 0%{!?without_cuda:1} + --with-cuda=$CUDA_ROOT \ + --with-cuda-sm=%(echo %{cuda_arch} | sed -e's/ \+/,/g') \ +%else + --without-cuda \ +%endif +%if 0%{!?without_rocm:1} + --with-hip=$ROCM_ROOT \ + --with-hip-sm=%(echo %{rocm_archs} | sed -e's/ \+/,/g') \ +%else + --without-hip \ +%endif + --without-ze \ + --with-pic \ + --with-gnu-ld \ + --with-libfabric=$LIBFABRIC_ROOT \ + --with-ucx=$UCX_ROOT \ + --with-hwloc=$HWLOC_ROOT \ + --without-netloc \ + --with-xpmem=$XPMEM_ROOT \ + --with-yaksa=embedded \ + --with-device=ch4:ofi + +%build +make %{makeprocesses} V=1 + +%install +make install + +%post +%{relocateConfig}bin/mpicc +%{relocateConfig}bin/mpicxx +%{relocateConfig}bin/mpifort diff --git a/scram-tools.file/tools/mpich/mpich.xml b/scram-tools.file/tools/mpich/mpich.xml new file mode 100644 index 00000000000..569ef9b5a28 --- /dev/null +++ b/scram-tools.file/tools/mpich/mpich.xml @@ -0,0 +1,10 @@ + + + + + + + + + +