diff --git a/cmssw-drop-tools.file b/cmssw-drop-tools.file index c267b7c1a96..7fd8a6d9500 100644 --- a/cmssw-drop-tools.file +++ b/cmssw-drop-tools.file @@ -1 +1 @@ -%define skipreqtools jcompiler icc-cxxcompiler icc-ccompiler icc-f77compiler rivet2 opencl opencl-cpp intel-vtune icx-cxxcompiler icx-ccompiler icx-f77compiler +%define skipreqtools jcompiler icc-cxxcompiler icc-ccompiler icc-f77compiler rivet2 opencl opencl-cpp intel-vtune icx-cxxcompiler icx-ccompiler icx-f77compiler mpich diff --git a/cmssw-tools.spec b/cmssw-tools.spec index 1a81c187f3c..81c6872a3a0 100644 --- a/cmssw-tools.spec +++ b/cmssw-tools.spec @@ -77,7 +77,10 @@ Requires: numactl Requires: hwloc Requires: rdma-core Requires: ucx +Requires: libfabric Requires: openmpi +Requires: mpich +Requires: mpi Requires: sigcpp Requires: sqlite Requires: tauolapp diff --git a/libfabric.spec b/libfabric.spec new file mode 100644 index 00000000000..83cf31a6c09 --- /dev/null +++ b/libfabric.spec @@ -0,0 +1,79 @@ +### RPM external libfabric 2.1.0 +Source: https://github.com/ofiwg/%{n}/releases/download/v%{realversion}/%{n}-%{realversion}.tar.bz2 +%{!?without_cuda:Requires: cuda gdrcopy} +%{!?without_rocm:Requires: rocm} +Requires: curl +Requires: numactl +Requires: rdma-core +Requires: xpmem + +%prep +%setup -q -n %{n}-%{realversion} + +# regenerate the configure files and Makefiles +./autogen.sh + +./configure \ + --prefix=%i \ + --disable-dependency-tracking \ + --disable-debug \ + --disable-profile \ + --disable-asan \ + --disable-lsan \ + --disable-tsan \ + --disable-ubsan \ + --enable-shared \ + --disable-static \ + --enable-shm \ + --enable-sm2 \ + --enable-xpmem=$XPMEM_ROOT \ + --disable-sockets \ + --enable-tcp \ + --enable-udp \ + --enable-verbs=$RDMA_CORE_ROOT \ + --disable-opx \ + --disable-psm2 \ + --disable-psm3 \ + --disable-usnic \ + --disable-efa \ + --disable-cxi \ + --disable-mrail \ + --disable-lpp \ + --disable-ucx \ + --enable-rxm \ + --enable-lnx \ +%if 0%{!?without_cuda:1} + --enable-cuda-dlopen \ + --enable-gdrcopy-dlopen \ + --with-cuda=$CUDA_ROOT \ + --with-gdrcopy=$GDRCOPY_ROOT \ +%else + --disable-cuda-dlopen \ + --disable-gdrcopy-dlopen \ + --without-cuda \ + --without-gdrcopy \ +%endif +%if 0%{!?without_rocm:1} + --enable-rocr-dlopen \ + --with-rocr=$ROCM_ROOT \ +%else + --disable-rocr-dlopen \ + --without-rocr \ +%endif + --disable-ze-dlopen \ + --without-ze \ + --with-pic \ + --with-dlopen \ + --with-gnu-ld \ + --with-curl=DIR \ + --with-numa=$NUMACTL_ROOT + + # CFLAGS="-Wno-error=array-bounds" + +%build +make %{makeprocesses} + +%install +make install + +%post diff --git a/mpi.spec b/mpi.spec new file mode 100644 index 00000000000..1a9596de27c --- /dev/null +++ b/mpi.spec @@ -0,0 +1,10 @@ +### RPM external mpi 1.0 +Requires: openmpi + +%prep + +%build + +%install + +%post diff --git a/mpich.spec b/mpich.spec new file mode 100644 index 00000000000..75e0d145502 --- /dev/null +++ b/mpich.spec @@ -0,0 +1,103 @@ +### RPM external mpich v4.3.0 +## INCLUDE cuda-flags +## INCLUDE rocm-flags +%define branch 4.3.x +%define tag %{realversion} +Source: git+https://github.com/pmodels/mpich.git?obj=%{branch}/%{tag}&export=%{n}-%{realversion}&submodules=1&output=/%{n}-%{realversion}.tgz +BuildRequires: autotools +%{!?without_cuda:Requires: cuda} +%{!?without_rocm:Requires: rocm} +Requires: libfabric +Requires: ucx +Requires: hwloc +Requires: xpmem + +# external libraries are needed for additional protocols: +# --with-hcoll: Mellanox Hierarchical Collectives +# --with-pmix: PMIx Reference Library (OpenPMIx) +# etc. + +%prep +%setup -q -n %{n}-%{realversion} + +# remove the submodules we do not want to use +rm -rf modules/hwloc +sed -e's/do_hwloc=.*/do_hwloc=no/' -i autogen.sh + +rm -rf modules/libfabric +sed -e's/do_ofi=.*/do_ofi=no/' -i autogen.sh + +rm -rf modules/ucx +sed -e's/do_ucx=.*/do_ucx=no/' -i autogen.sh + +./autogen.sh + +# MPICH communication device: +# --with-device=ch4:ofi +# should work for TCP networks and any high-bandwidth interconnect supported by libfabric. +# --with-device=ch4:ucx +# should work for TCP networks and any high-bandwidth interconnect supported by the UCX library. +# --with-device=ch3 +# the legacy device ch3 + +# MPICH multi-threading support: +# --enable-thread-cs={default, global, per-vci, lock-free} +# Default is global for ch3 and per-vci for ch4 +# --enable-ch4-vci-method={default, zero, communicator, tag, implicit, explicit} +# --enable-ch4-mt={direct, lockless, runtime} +# direct - Each thread directly accesses lower-level fabric (default) +# lockless - Use the thread safe serialization model supported by the provider +# runtime - Determine the model at runtime through a CVAR + +# Note: using --enable-fast=O2,ndebug,alwaysinline,sse2 the compilation hangs (or takes a very long time to complete). + +./configure \ + --prefix=%i \ + --enable-error-checking=all \ + --enable-tag-error-bits=yes \ + --enable-fast=O2,ndebug,sse2 \ + --enable-cxx \ + --enable-romio \ + --disable-mpi-abi \ + --enable-versioning \ + --enable-threads=multiple \ + --enable-thread-cs=default \ + --disable-dependency-tracking \ + --disable-silent-rules \ + --disable-maintainer-mode \ + --enable-shared \ + --disable-static \ + --enable-nemesis-shm-collectives \ +%if 0%{!?without_cuda:1} + --with-cuda=$CUDA_ROOT \ + --with-cuda-sm=%(echo %{cuda_arch} | sed -e's/ \+/,/g') \ +%else + --without-cuda \ +%endif +%if 0%{!?without_rocm:1} + --with-hip=$ROCM_ROOT \ + --with-hip-sm=%(echo %{rocm_archs} | sed -e's/ \+/,/g') \ +%else + --without-hip \ +%endif + --without-ze \ + --with-pic \ + --with-gnu-ld \ + --with-libfabric=$LIBFABRIC_ROOT \ + --with-ucx=$UCX_ROOT \ + --with-hwloc=$HWLOC_ROOT \ + --without-netloc \ + --with-xpmem=$XPMEM_ROOT \ + --with-yaksa=embedded \ + --with-device=ch4:ofi + +%build +make %{makeprocesses} V=1 + +%install +make install + +%post +%{relocateConfig}bin/mpicc +%{relocateConfig}bin/mpicxx +%{relocateConfig}bin/mpifort diff --git a/openmpi.spec b/openmpi.spec index 3b50f15f7d0..81ecda2cc4b 100644 --- a/openmpi.spec +++ b/openmpi.spec @@ -1,8 +1,11 @@ -### RPM external openmpi 4.1.6 +### RPM external openmpi 4.1.9a1-20250505 ## INITENV SET OPAL_PREFIX %{i} -Source: https://download.open-mpi.org/release/open-mpi/v4.1/%{n}-%{realversion}.tar.bz2 -BuildRequires: autotools +%define branch v4.1.x +%define tag e6d2cb856f3fc649aa01bd5b688a003b3b33db7d +Source: git+https://github.com/open-mpi/ompi.git?obj=%{branch}/%{tag}&export=%{n}-%{realversion}&output=/%{n}-%{realversion}.tgz +BuildRequires: autotools flex %{!?without_cuda:Requires: cuda} +Requires: libfabric Requires: hwloc Requires: rdma-core Requires: xpmem @@ -10,7 +13,6 @@ Requires: ucx Requires: zlib # external libraries are needed for additional protocols: -# --with-ofi: Open Fabric Interface's libfabric # --with-mxm: Mellanox Messaging (depracated, use UCX instead) # --with-fca: Mellanox Fabric Collective Accelerator # --with-hcoll: Mellanox Hierarchical Collectives @@ -20,6 +22,8 @@ Requires: zlib %prep %setup -q -n %{n}-%{realversion} +AUTOMAKE_JOBS=%{compiling_processes} ./autogen.pl + ./configure \ --prefix=%i \ --disable-dependency-tracking \ @@ -33,7 +37,7 @@ Requires: zlib --with-zlib=$ZLIB_ROOT \ %{!?without_cuda:--with-cuda=$CUDA_ROOT} \ --with-hwloc=$HWLOC_ROOT \ - --without-ofi \ + --with-ofi=$LIBFABRIC_ROOT \ --without-portals4 \ --without-psm \ --without-psm2 \ diff --git a/rdma-core-VERBS_CONFIG_DIR.patch b/rdma-core-VERBS_CONFIG_DIR.patch new file mode 100644 index 00000000000..13bc3294780 --- /dev/null +++ b/rdma-core-VERBS_CONFIG_DIR.patch @@ -0,0 +1,42 @@ +diff --git a/libibverbs/dynamic_driver.c b/libibverbs/dynamic_driver.c +index 7fa4233..c2ff2bb 100644 +--- a/libibverbs/dynamic_driver.c ++++ b/libibverbs/dynamic_driver.c +@@ -115,27 +115,33 @@ static void read_config_file(const char *path) + + static void read_config(void) + { ++ char *verbs_config_dir; + DIR *conf_dir; + struct dirent *dent; + char *path; + +- conf_dir = opendir(IBV_CONFIG_DIR); ++ verbs_config_dir = getenv("VERBS_CONFIG_DIR"); ++ if (!verbs_config_dir) { ++ verbs_config_dir = IBV_CONFIG_DIR; ++ } ++ ++ conf_dir = opendir(verbs_config_dir); + if (!conf_dir) { + fprintf(stderr, + PFX "Warning: couldn't open config directory '%s'.\n", +- IBV_CONFIG_DIR); ++ verbs_config_dir); + return; + } + + while ((dent = readdir(conf_dir))) { + struct stat buf; + +- if (asprintf(&path, "%s/%s", IBV_CONFIG_DIR, dent->d_name) < ++ if (asprintf(&path, "%s/%s", verbs_config_dir, dent->d_name) < + 0) { + fprintf(stderr, + PFX + "Warning: couldn't read config file %s/%s.\n", +- IBV_CONFIG_DIR, dent->d_name); ++ verbs_config_dir, dent->d_name); + goto out; + } + diff --git a/rdma-core.spec b/rdma-core.spec index 1b9ab95d01f..0e7443d5dbe 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,11 +1,13 @@ -### RPM external rdma-core 50.0 +### RPM external rdma-core 57.0 ## INITENV +PATH LD_LIBRARY_PATH %{i}/lib64 Source: https://github.com/linux-rdma/%{n}/releases/download/v%{realversion}/rdma-core-%{realversion}.tar.gz +Patch0: rdma-core-VERBS_CONFIG_DIR BuildRequires: cmake ninja %prep %setup -q -n %{n}-%{realversion} +%patch0 -p1 %build rm -rf build @@ -33,10 +35,19 @@ ninja -v %{makeprocesses} install # remove pkg-config to avoid rpm-generated dependency on /usr/bin/pkg-config rm -rf %{i}/lib64/pkgconfig -# keep only the libraries and include files -rm -rf %{i}/bin -rm -rf %{i}/etc +# keep only the user binaries, libibverbs configuration, libraries and include files +rm -rf %{i}/etc/infiniband-diags +rm -rf %{i}/etc/init.d +rm -rf %{i}/etc/modprobe.d +rm -rf %{i}/etc/rdma rm -rf %{i}/lib rm -rf %{i}/libexec rm -rf %{i}/sbin -rm -rf %{i}/share +rm -rf %{i}/share/perl5 + +# update the libibverbs plugins with the full path +sed -e's#driver \(\w\+\)#driver %{i}/lib64/libibverbs/lib\1#' -i %{i}/etc/libibverbs.d/* + +%post +# relocate the libibverbs plugins path +%{relocateConfig}etc/libibverbs.d/* diff --git a/scram-tools.file/tools/libfabric/libfabric.xml b/scram-tools.file/tools/libfabric/libfabric.xml new file mode 100644 index 00000000000..a33cf8bafdf --- /dev/null +++ b/scram-tools.file/tools/libfabric/libfabric.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/scram-tools.file/tools/mpi/mpi.xml b/scram-tools.file/tools/mpi/mpi.xml new file mode 100644 index 00000000000..55814f6e01e --- /dev/null +++ b/scram-tools.file/tools/mpi/mpi.xml @@ -0,0 +1,3 @@ + + + diff --git a/scram-tools.file/tools/mpich/mpich.xml b/scram-tools.file/tools/mpich/mpich.xml new file mode 100644 index 00000000000..569ef9b5a28 --- /dev/null +++ b/scram-tools.file/tools/mpich/mpich.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/scram-tools.file/tools/rdma-core/rdma-core.xml b/scram-tools.file/tools/rdma-core/rdma-core.xml index 1eb305ffddc..8f2167c08b1 100644 --- a/scram-tools.file/tools/rdma-core/rdma-core.xml +++ b/scram-tools.file/tools/rdma-core/rdma-core.xml @@ -5,5 +5,7 @@ + + diff --git a/ucx.spec b/ucx.spec index 7aa91bfe09d..e40593ae397 100644 --- a/ucx.spec +++ b/ucx.spec @@ -1,4 +1,4 @@ -### RPM external ucx 1.17.0 +### RPM external ucx 1.18.1 Source: https://github.com/openucx/%{n}/archive/refs/tags/v%{realversion}.tar.gz BuildRequires: autotools %{!?without_cuda:Requires: cuda gdrcopy}