|
11 | 11 | - "branch-*"
|
12 | 12 | - "main"
|
13 | 13 | jobs:
|
14 |
| - build: |
| 14 | + build-test: |
15 | 15 | defaults:
|
16 | 16 | run:
|
17 | 17 | shell: bash -el {0}
|
18 |
| - runs-on: linux-amd64-cpu8 |
| 18 | + runs-on: linux-amd64-gpu-v100-latest-1 |
19 | 19 | env:
|
20 | 20 | CONDA_PREFIX: /opt/conda
|
21 | 21 | container:
|
22 | 22 | image: rapidsai/devcontainers:23.10-cpp-cuda11.8-mambaforge-ubuntu22.04
|
23 | 23 | env:
|
24 |
| - DEFAULT_CONDA_ENV: legate |
| 24 | + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} # GPU jobs must set this container env variable |
25 | 25 | steps:
|
26 |
| - - name: Get Date |
27 |
| - id: get-date |
28 |
| - run: echo "week=$(/bin/date -u '+%Y.week%U')" >> $GITHUB_OUTPUT |
29 |
| - - name: Cache Conda env |
30 |
| - uses: actions/cache@v3 |
31 |
| - with: |
32 |
| - path: /opt/conda/envs/legate |
33 |
| - # renew cache weekly with legate.core/cunumeric builds |
34 |
| - key: |
35 |
| - conda--internal--${{ |
36 |
| - steps.get-date.outputs.week }} |
37 |
| - id: cache |
38 |
| - - name: Checkout legate core |
39 |
| - uses: actions/checkout@v4 |
40 |
| - with: |
41 |
| - repository: nv-legate/legate.core.internal |
42 |
| - path: legate.core.internal |
43 |
| - token: ${{ secrets.NV_LEGATE_TOKEN }} |
44 |
| - ref: ba57a43800a00d57d2c6e19b74b00f36ba921b48 |
45 |
| - - name: Install legate core |
46 |
| - working-directory: ./legate.core.internal |
| 26 | + - name: Install legate/cunumeric |
47 | 27 | run: |
|
48 |
| - python scripts/generate-conda-envs.py --ctk 11.8 --os linux --compilers --openmpi --ucx; |
49 |
| - echo " - python=3.10" >> environment-test-linux-cuda11.8-compilers-openmpi-ucx.yaml; |
50 |
| - mamba env create -n legate -f environment-test-linux-cuda11.8-compilers-openmpi-ucx.yaml; |
51 |
| - mamba activate legate; |
52 |
| - # Downgrade NumPy; as of 2024-07-01 it triggers a mypy bug |
53 |
| - mamba install -y build scikit-learn hypothesis 'pytest<8' notebook 'numpy<2'; |
| 28 | + mamba install -y -c legate -c conda-forge legate-core=24.06 cunumeric=24.06 build cmake scikit-build scikit-learn hypothesis 'pytest<8' notebook 'numpy<2' mypy openblas |
54 | 29 | pip install matplotlib seaborn xgboost
|
55 |
| - export LEGATE_CORE_ARCH='arch-linux-py-relwithdebinfo'; |
56 |
| - ./configure --with-python --cuda-arch 70 --with-cuda --build-type=relwithdebinfo --with-nccl --with-ucx && make && pip install .; |
57 |
| - if: steps.cache.outputs.cache-hit != 'true' |
58 |
| - - name: Checkout cunumeric |
59 |
| - uses: actions/checkout@v4 |
60 |
| - with: |
61 |
| - repository: nv-legate/cunumeric.internal |
62 |
| - path: cunumeric.internal |
63 |
| - token: ${{ secrets.NV_LEGATE_TOKEN }} |
64 |
| - ref: f407887f5d43adaeeec1211fed1e8a1b58e9d3ab |
65 |
| - - name: Install cunumeric |
66 |
| - working-directory: ./cunumeric.internal |
67 |
| - run: | |
68 |
| - ./install.py |
69 |
| - # make the conda directory smaller for easier caching |
70 |
| - # this removes unneeded files from the env |
71 |
| - mamba clean -all -y |
72 |
| - if: steps.cache.outputs.cache-hit != 'true' |
73 | 30 | - name: Checkout legateboost
|
74 | 31 | uses: actions/checkout@v4
|
75 | 32 | with:
|
|
85 | 42 | with:
|
86 | 43 | name: legateboost-wheel
|
87 | 44 | path: dist/legateboost*.whl
|
88 |
| - |
89 |
| - |
90 |
| - test: |
91 |
| - needs: build |
92 |
| - defaults: |
93 |
| - run: |
94 |
| - shell: bash -el {0} |
95 |
| - runs-on: linux-amd64-gpu-v100-latest-1 |
96 |
| - container: |
97 |
| - image: rapidsai/devcontainers:23.06-cpp-cuda11.8-mambaforge-ubuntu22.04 |
98 |
| - env: |
99 |
| - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} # GPU jobs must set this container env variable |
100 |
| - DEFAULT_CONDA_ENV: legate |
101 |
| - steps: |
102 |
| - - name: Get Date |
103 |
| - id: get-date |
104 |
| - run: echo "week=$(/bin/date -u '+%Y.week%U')" >> $GITHUB_OUTPUT |
105 |
| - - name: Restore cached environment |
106 |
| - id: cache-restore |
107 |
| - uses: actions/cache/restore@v3 |
108 |
| - with: |
109 |
| - path: /opt/conda/envs/legate |
110 |
| - key: conda--internal--${{ |
111 |
| - steps.get-date.outputs.today }} |
112 |
| - - uses: actions/download-artifact@v3 |
113 |
| - with: |
114 |
| - name: legateboost-wheel |
115 |
| - - name: Install legateboost artifact |
116 |
| - run: | |
117 |
| - mamba activate legate |
118 |
| - pip install $(find -name legateboost*.whl)[test] --prefix . |
119 |
| - - name: Checkout legateboost |
120 |
| - uses: actions/checkout@v4 |
121 |
| - with: |
122 |
| - lfs: true |
123 |
| - - name: Build legateboost |
124 |
| - run: | |
125 |
| - # build again so we have a local editable install for the purpose of building docs |
126 |
| - pip install -e . |
127 | 45 | - name: Run cpu tests
|
128 | 46 | run: |
|
129 | 47 | legate --sysmem 28000 --module pytest legateboost/test/[!_]**.py -sv --durations=0
|
130 |
| - - name: Run cpu multi-node tests |
131 |
| - run: | |
132 |
| - # rapids container is run as root, tell mpi it is ok to run as root (we are in a container) |
133 |
| - # Only run one test because CPU with MPI is slowwwww |
134 |
| - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 legate --launcher mpirun --ranks-per-node 2 --cpu-bind 0-9/10-19 --module pytest legateboost/test/test_estimator.py::test_regressor[base_models0-squared_error-5] |
135 | 48 | - name: Run gpu tests
|
136 | 49 | run: |
|
137 | 50 | nvidia-smi
|
|
148 | 61 |
|
149 | 62 |
|
150 | 63 | deploy:
|
151 |
| - needs: test |
| 64 | + needs: build-test |
152 | 65 | # only main branch uploads docs
|
153 | 66 | if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
154 | 67 |
|
|
0 commit comments