Skip to content

[PAL] Support Device API Transport #1722

[PAL] Support Device API Transport

[PAL] Support Device API Transport #1722

Workflow file for this run

name: Unit Tests in Container
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
unit-test:
runs-on: [self-hosted, cx-build]
container:
image: localhost:5000/flagscale:cuda12.8.1-cudnn9.7.1-python3.12-torch2.7.0-time2507111538
options: --gpus all --privileged --ipc=host --ulimit memlock=-1 --ulimit stack=67108864
steps:
- name: Checkout Code
uses: actions/checkout@v6
with:
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.ref }}
ssh-strict: true
ssh-user: git
persist-credentials: true
clean: true
sparse-checkout-cone-mode: true
fetch-tags: false
show-progress: true
lfs: false
submodules: true
set-safe-directory: true
- name: Set up Python and Install Dependencies
run: |
apt update -y
apt-get install -y python3 python3-pip python3-venv git rdma-core ibverbs-utils perftest infiniband-diags libibverbs-dev librdmacm-dev
python3 -m venv venv
cd /__w/FlagCX/FlagCX
. venv/bin/activate
git config --global --add safe.directory /__w/FlagCX/FlagCX
git status
pip install setuptools pre-commit
pre-commit install
- name: Run Code Format Check with pre-commit
run: |
cd /__w/FlagCX/FlagCX
. venv/bin/activate
apt update -y
apt-get install clang-format -y
git fetch --all
if [ -n "$GITHUB_HEAD_REF" ] && [ -n "$GITHUB_BASE_REF" ]; then
from_ref="origin/$GITHUB_HEAD_REF"
to_ref="origin/$GITHUB_BASE_REF"
echo "From reference: $from_ref; To reference: $to_ref"
pre-commit run --from-ref "$from_ref" --to-ref "$to_ref"
fi
continue-on-error: false
- name: Build Google Test
run: |
cd /__w/FlagCX/FlagCX/third-party/googletest
mkdir -p build
cd build
cmake ..
make -j$(nproc)
- name: Run `make` to build the project
run: |
cd /__w/FlagCX/FlagCX
export MPI_HOME=/usr/local/mpi
make -j$(nproc) USE_NVIDIA=1 COMPILE_KERNEL=1
- name: Build unit tests
run: |
cd /__w/FlagCX/FlagCX/test/unittest
export MPI_HOME=/usr/local/mpi
make -j$(nproc)
- name: Run single-cluster tests with mpirun
run: |
cd /__w/FlagCX/FlagCX/test/unittest/runner
export MPI_HOME=/usr/local/mpi
export PATH=$MPI_HOME/bin:$PATH
mpirun -np 8 --allow-run-as-root ./build/bin/runner_mpi_tests
- name: Run multi-cluster tests with mpirun
run: |
cd /__w/FlagCX/FlagCX/test/unittest/runner
export MPI_HOME=/usr/local/mpi
export PATH=$MPI_HOME/bin:$PATH
mpirun -np 8 --allow-run-as-root -x FLAGCX_MEM_ENABLE=1 -x FLAGCX_CLUSTER_SPLIT_LIST=2 -x FLAGCX_IB_HCA=mlx5 ./build/bin/runner_mpi_tests
- name: Run kernel tests with mpirun
run: |
cd /__w/FlagCX/FlagCX/test/unittest/kernel
export MPI_HOME=/usr/local/mpi
export PATH=$MPI_HOME/bin:$PATH
mpirun -np 8 --allow-run-as-root -x FLAGCX_MEM_ENABLE=1 -x FLAGCX_CLUSTER_SPLIT_LIST=2 -x FLAGCX_USE_HETERO_COMM=1 ./build/bin/kernel_mpi_tests