Skip to content

fix slurm var on eos #11

fix slurm var on eos

fix slurm var on eos #11

Workflow file for this run

# Copyright 2025 Stanford University, NVIDIA Corporation
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: ci
on:
push:
branches-ignore:
- gh-pages
pull_request:
branches-ignore:
- gh-pages
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref_name != 'main' && ! startsWith(github.ref, 'refs/tags/v') }}
jobs:
build:
name: ${{ matrix.config.os }}-${{ matrix.config.shared == 'ON' && 'shared' || 'static' }}-${{ matrix.config.cc }}-${{ matrix.config.build }}${{ matrix.config.cuda && format('-cuda{0}', matrix.config.cuda) || '' }}${{ matrix.config.ucx == 'ON' && '-ucx' || '' }}${{ matrix.config.gasnet == 'ON' && '-gasnet' || '' }}${{ matrix.config.kokkos == 'ON' && '-kokkos' || '' }}${{ matrix.config.openmp == 'ON' && '-openmp' || '' }}${{ matrix.config.python == 'ON' && '-python' || '' }}${{ matrix.config.sanitizer != 'NONE' && format('-{0}', matrix.config.sanitizer) || '' }}
runs-on: ${{ matrix.config.os }}
env:
CPM_SOURCE_CACHE: "${{ github.workspace }}/.cache/cpm"
CCACHE_BASEDIR: ${{ github.workspace }}
CCACHE_COMPRESS: "true"
CCACHE_COMPRESS_LEVEL: "6"
strategy:
fail-fast: false
matrix:
config:
- {
os: windows-latest,
build: Release,
cc: "cl", cxx: "cl",
ccache: sccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'OFF',
ucx: 'OFF',
gasnet: 'OFF',
python: 'OFF',
kokkos: 'OFF',
sanitizer: 'NONE'
}
- {
os: macos-latest,
build: Release,
cc: "clang", cxx: "clang++",
ccache: ccache,
shared_lib: 'ON',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'OFF',
kokkos: 'OFF',
sanitizer: 'NONE'
}
- {
os: ubuntu-24.04-arm,
build: Release,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'ON',
kokkos: 'OFF',
sanitizer: 'NONE'
}
- {
os: ubuntu-24.04-arm,
build: Release,
cc: "clang", cxx: "clang++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'ON',
kokkos: 'OFF',
sanitizer: 'NONE'
}
- {
os: ubuntu-latest,
build: Release,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'ON',
python: 'ON',
kokkos: 'ON',
sanitizer: 'NONE'
}
- {
os: ubuntu-latest,
build: Release,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'ON',
ucx: 'ON',
gasnet: 'OFF',
python: 'ON',
kokkos: 'ON',
sanitizer: 'NONE'
}
- {
os: ubuntu-latest,
build: Release,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '13.0.0',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'ON',
kokkos: 'OFF',
sanitizer: 'NONE'
}
- {
os: ubuntu-latest,
build: Release,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '12.9.1',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'ON',
kokkos: 'ON',
sanitizer: 'NONE'
}
- {
os: ubuntu-latest,
build: Release,
cc: "clang", cxx: "clang++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'ON',
kokkos: 'OFF',
sanitizer: 'NONE'
}
- {
os: ubuntu-latest,
build: Debug,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'ON',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'ON',
kokkos: 'OFF',
sanitizer: 'NONE'
}
- {
os: ubuntu-latest,
build: Debug,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'OFF',
kokkos: 'OFF',
sanitizer: 'ASAN'
}
- {
os: ubuntu-latest,
build: Debug,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'OFF',
kokkos: 'OFF',
sanitizer: 'UBSAN'
}
- {
os: ubuntu-latest,
build: Debug,
cc: "gcc", cxx: "g++",
ccache: ccache,
shared_lib: 'OFF',
cuda: '',
openmp: 'ON',
ucx: 'OFF',
gasnet: 'OFF',
python: 'OFF',
kokkos: 'OFF',
sanitizer: 'TSAN'
}
steps:
- uses: actions/[email protected]
with:
fetch-tags: true
fetch-depth: 100
- uses: Kaven-Universe/github-action-current-date-time@v1
name: Get Timestamp
id: timestamp
with:
format: "yyyy-MM-dd-HH-mm-ss-SSS"
- uses: awalsh128/cache-apt-pkgs-action@latest
name: Get Linux Packages
if: ${{ startsWith(matrix.config.os, 'ubuntu') }}
with:
packages: ${{ matrix.config.cc }} ${{ matrix.config.cxx }} ccache abi-compliance-checker abi-dumper
execute_install_scripts: true
# Unfortunately the openmpi package does a bunch of things that doesn't
# allow it to be cached
- name: Get Linux OpenMPI
if: ${{ startsWith(matrix.config.os, 'ubuntu') }}
run: $(which sudo) apt install -yy libopenmpi-dev
- uses: Jimver/[email protected]
name: Get CUDA
if: ${{ matrix.config.cuda != '' }}
with:
cuda: ${{ matrix.config.cuda }}
sub-packages: '["nvcc", "cudart-dev", "cupti-dev", "nvtx", "nvml-dev"]'
method: 'network'
- uses: actions/[email protected]
name: Restore CPM source cache
with:
path: .cache/cpm
key: cpm-${{ steps.timestamp.outputs.time }}
restore-keys: |
cpm-
- uses: actions/[email protected]
name: Restore deps cache
with:
path: |
build/_deps/ucx
build/_deps/ucc
build/_deps/gasnet
key: deps-${{ matrix.config.os }}-${{ matrix.config.cc }}-${{ matrix.config.cuda }}-${{ steps.timestamp.outputs.time }}
restore-keys: |
deps-${{ matrix.config.os }}-${{ matrix.config.cc }}-${{ matrix.config.cuda }}-
- uses: hendrikmuhs/[email protected]
name: Restore ccache Cache
with:
variant: ${{ matrix.config.ccache }}
append-timestamp: true
key: ${{ matrix.config.os }}-${{ matrix.config.cc }}-${{ matrix.config.cuda }}-${{ matrix.config.build }}-${{ github.ref_name }}
restore-keys: |
${{ matrix.config.os }}-${{ matrix.config.cc }}-${{ matrix.config.cuda }}-${{ matrix.config.build }}-${{ github.ref_name }}
${{ matrix.config.os }}-${{ matrix.config.cc }}-${{ matrix.config.cuda }}-${{ matrix.config.build }}-main
- uses: microsoft/setup-msbuild@v2
name: Setup Windows Build Environment
if: ${{ matrix.config.os == 'windows-latest' }}
- uses: lukka/get-cmake@latest
- name: Checkout kokkos
if: matrix.config.kokkos == 'ON'
uses: actions/[email protected]
with:
repository: kokkos/kokkos
ref: 4.0.01
path: kokkos
- name: Build Kokkos
if: matrix.config.kokkos == 'ON'
run: |
cmake -S kokkos -B kokkos/build \
${{ matrix.config.cuda != '' && '-DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=OFF -DCMAKE_CXX_FLAGS=-DKOKKOS_IMPL_TURN_OFF_CUDA_HOST_INIT_CHECK' || ''}} \
-DKokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=ON \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-DKokkos_ENABLE_SERIAL=ON \
-DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/kokkos/install
cmake --build kokkos/build --parallel 4 --config Release
cmake --install kokkos/build --parallel 4 --prefix kokkos/install --config Release
echo REALM_MIN_CUDA_ARCH=60 >> "${GITHUB_ENV}"
- name: Check for GPU
if: ${{ matrix.config.cuda != '' }}
id: check_gpu
run: |
set +e
which nvidia-smi && (nvidia-smi -L | grep 'GPU 0')
echo no_gpu=$? >> $GITHUB_OUTPUT
- name: Configure
run:
cmake -S . -B build
-DCMAKE_C_COMPILER=${{ matrix.config.cc }}
-DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }}
-DCMAKE_C_COMPILER_LAUNCHER=${{ matrix.config.ccache }}
-DCMAKE_CXX_COMPILER_LAUNCHER=${{ matrix.config.ccache }}
-DCMAKE_BUILD_TYPE=${{ matrix.config.build }}
-DBUILD_SHARED_LIBS=${{ matrix.config.shared_lib }}
-DREALM_BUILD_TESTS=ON -DREALM_BUILD_BENCHMARKS=ON -DREALM_BUILD_TUTORIALS=ON -DREALM_BUILD_EXAMPLES=ON
-DREALM_ENABLE_OPENMP=${{ matrix.config.openmp }}
-DREALM_ENABLE_UCX=${{ matrix.config.ucx }}
-DREALM_ENABLE_PYTHON=${{ matrix.config.python }}
-DREALM_ENABLE_CUDA=${{ matrix.config.cuda != '' && 'ON' || 'OFF' }}
-DREALM_MIN_CUDA_ARCH=$REALM_MIN_CUDA_ARCH
-DREALM_ENABLE_KOKKOS=${{ matrix.config.kokkos }}
-DKokkos_ROOT=kokkos/install
-DGASNET_CONDUIT=mpi
-DREALM_ENABLE_GASNETEX=${{ matrix.config.gasnet }}
-DREALM_SANITIZER=${{ matrix.config.sanitizer }}
- name: Build
run:
cmake --build build --parallel 4 --config ${{ matrix.config.build }} --verbose | tee build-new.log
- uses: actions/[email protected]
name: Restore build log cache
id: build-log-cache
with:
path: build.log
key: build-log-${{ matrix.config.os }}-${{ matrix.config.cc }}-${{ matrix.config.cuda }}-${{ matrix.config.build }}-${{ github.ref_name }}
restore-keys: |
build-log-${{ matrix.config.os }}-${{ matrix.config.cc }}-${{ matrix.config.cuda }}-${{ matrix.config.build }}-${{ github.ref_name }}
build-log-${{ matrix.config.os }}-${{ matrix.config.cc }}-${{ matrix.config.cuda }}-${{ matrix.config.build }}-main
- name: Check build log for new warnings
if: steps.build-log-cache.outputs.cache-hit == 'true'
shell: sh
run: |
old_num=`grep 'warning' build.log | wc -l`
new_num=`grep 'warning' build-new.log | wc -l`
echo "::debug::Found $new_num warnings, previously $old_num"
if [ "$new_num" -gt "$old_num" ]; then
echo "::warning::New compiler warnings found (old=$old_num, new=$new_num)"
fi
- name: Update build log cache
if: success()
run: |
cmake -E copy_if_different build-new.log build.log
- name: Run Tests
id: run-tests
if: matrix.config.cuda == '' || steps.check_gpu.outputs.no_gpu != '1'
run:
ctest --output-junit ${{ github.workspace }}/test-results.xml
--test-dir build
--build-config ${{ matrix.config.build }}
--output-on-failure
--timeout 240
--parallel 4
- uses: test-summary/action@v2
name: Upload Test Results
if: always() && hashFiles('test-results.xml')
with:
paths: "test-results.xml"
- name: Upload test results to Codecov
if: always() && hashFiles('test-results.xml')
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
- name: Run Install
run:
cmake --install build --parallel 4 --prefix install --config ${{ matrix.config.build }}
# Check there are no ABI regressions on the installed librealm.so
- name: Dump ABI
if: runner.os == 'Linux' && matrix.config.build == 'Debug' && matrix.config.shared_lib == 'ON'
id: dump-abi
run:
abi-dumper ${{ github.workspace }}/install/lib/librealm.so -o abi-new.dump
- name: Retrieve Current ABI
id: cache-abi
if: steps.dump-abi.conclusion == 'success'
uses: actions/[email protected]
with:
path: ${{ github.workspace }}/abi.dump
key: ${{ runner.os }}-abi-dump
- name: Compare ABI dumps
id: compare-abi
if: steps.cache-abi.outputs.cache-hit == 'true'
# Temporarily ignore errors here
continue-on-error: true
run:
abi-compliance-checker -lib realm -old ${{ github.workspace }}/abi.dump -new ${{ github.workspace }}/abi-new.dump
- name: Update ABI dump
if: steps.compare-abi.outcome == 'success'
run:
mv ${{ github.workspace }}/abi-new.dump ${{ github.workspace }}/abi.dump
- name: Upload ABI failure artifact
if: steps.compare-abi.outcome == 'failure'
uses: actions/upload-artifact@v4
with:
name: abi-failure-log
path: compat_reports/**/compat_report.html
cross_build:
# Job for cross compiling non-native architectures and basic testing with qemu
name: ubuntu-24.04-${{ matrix.config.arch }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
config:
- {
arch: ppc64le,
gcc_arch: powerpc64le
}
env:
CPM_SOURCE_CACHE: "${{ github.workspace }}/.cache/cpm"
CCACHE_BASEDIR: ${{ github.workspace }}
CCACHE_COMPRESS: "true"
CCACHE_COMPRESS_LEVEL: "6"
steps:
- uses: actions/[email protected]
with:
fetch-tags: true
fetch-depth: 100
- uses: Kaven-Universe/github-action-current-date-time@v1
name: Get Timestamp
id: timestamp
with:
format: "yyyy-MM-dd-HH-mm-ss-SSS"
- uses: awalsh128/cache-apt-pkgs-action@latest
name: Get Linux Packages
with:
packages: gcc-${{ matrix.config.gcc_arch }}-linux-gnu g++-${{ matrix.config.gcc_arch }}-linux-gnu ccache
- uses: actions/[email protected]
name: Restore CPM source cache
with:
path: .cache/cpm
key: cpm-${{ steps.timestamp.outputs.time }}
restore-keys: |
cpm-
- uses: hendrikmuhs/[email protected]
name: Restore ccache Cache
with:
variant: ccache
append-timestamp: true
key: ubuntu-24.04-${{ matrix.config.arch }}-${{ github.ref_name }}
restore-keys: |
ubuntu-24.04-${{ matrix.config.arch }}-${{ github.ref_name }}
ubuntu-24.04-${{ matrix.config.arch }}-main
- uses: lukka/get-cmake@latest
- uses: docker/[email protected]
name: Setup qemu
with:
platforms: linux/${{ matrix.config.arch }}
- name: Configure
run:
cmake -S . -B build
-DCMAKE_TOOLCHAIN_FILE=cmake/toolchains/${{ matrix.config.gcc_arch }}-linux-gnu.cmake
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_C_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DCPPTRACE_GET_SYMBOLS_WITH_NOTHING=ON
-DCPPTRACE_UNWIND_WITH_NOTHING=ON
-DREALM_BUILD_TESTS=ON -DREALM_BUILD_BENCHMARKS=ON -DREALM_BUILD_TUTORIALS=ON -DREALM_BUILD_EXAMPLES=ON
-DREALM_ENABLE_UCX=OFF
-DREALM_ENABLE_GASNETEX=OFF
-DREALM_ENABLE_HIP=OFF
- name: Build
run: |
cmake --build build --parallel 4 --config Release
- name: Run Tests
id: run-tests
env:
QEMU_LD_PREFIX: "/usr/${{ matrix.config.gcc_arch }}-linux-gnu"
run:
ctest --output-junit ${{ github.workspace }}/test-results.xml
--test-dir build
--build-config Release
--output-on-failure
--timeout 240
--parallel 4
- uses: test-summary/action@v2
name: Upload Test Results
if: always() && hashFiles('test-results.xml')
with:
paths: "test-results.xml"
- name: Run Install
run:
cmake --install build --parallel 4 --prefix install --config Release
reuse:
# https://reuse.software/
name: Check REUSE & Copyright compliance
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- uses: actions/[email protected]
- name: REUSE Compliance Check
uses: fsfe/reuse-action@v4
- uses: tj-actions/[email protected]
id: changed-files
- name: Check copyright year is current for changed files
if: steps.changed-files.outputs.any_changed
run: |
grep -H "Copyright " ${{ steps.changed-files.outputs.all_changed_files }} | grep -v $(date +%Y) > /tmp/copyright_lines.txt || true
if [ -s copyright_lines.txt ]; then
echo "Copyright information out of date for files:";
cut -d':' -f1 /tmp/copyright_lines.txt | sort | uniq
exit 1
fi
docs:
name: Build Docs
runs-on: ubuntu-latest
needs: [ build, cross_build ]
permissions:
contents: write
strategy:
fail-fast: false
steps:
- uses: awalsh128/cache-apt-pkgs-action@latest
name: Get Linux Packages
with:
packages: doxygen doxygen-latex graphviz cmake
- uses: actions/[email protected]
with:
fetch-tags: true
fetch-depth: 100
path: realm-src
- name: Get Safe Ref Name
id: safe
run: |
echo ref=${GITHUB_REF_NAME//\//-} >> ${GITHUB_OUTPUT}
- name: Build
run: |
cmake -S realm-src -B build -DCMAKE_BUILD_TYPE=Release -DREALM_ENABLE_UCX=OFF -DREALM_ENABLE_GASNETEX=OFF -DREALM_BUILD_DOCS=ON -DREALM_GENERATE_HTML=ON
make -C build docs
cmake --install build --component Realm_doc --prefix install
- uses: actions/upload-artifact@v4
with:
name: docs-${{ steps.safe.outputs.ref }}
path: install
if-no-files-found: error
retention-days: 2
overwrite: true
- uses: actions/[email protected]
if: ${{ github.event_name == 'push' && (github.ref_name == 'main' || startsWith(github.ref, 'refs/tags/v')) }}
with:
ref: gh-pages
path: gh-pages
- name: Publish Docs and Tutorials
if: ${{ github.event_name == 'push' && (github.ref_name == 'main' || startsWith(github.ref, 'refs/tags/v')) }}
run: |
rm -rf gh-pages/doc/${{ steps.safe.outputs.ref }}*
mv install/share/doc/Realm/realm-* gh-pages/doc/${{ steps.safe.outputs.ref }}
cp realm-src/tutorials/*/*.md gh-pages/_tutorials
cd gh-pages
(cd doc && find * -maxdepth 0 -type d | sort --version-sort) > doc/doc-versions
git config --global user.name "GitHub Docs Builder"
git config --global user.email "[email protected]"
git add doc/${{ steps.safe.outputs.ref }} doc/doc-versions _tutorials
git status
git commit --allow-empty -m "Update docs and tutorials for ${{ github.ref_name }}"
git fetch origin
git rebase --strategy-option=theirs origin/gh-pages --verbose
git push --verbose