Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions .github/workflows/build-examples-aicpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,12 @@ jobs:
- arch: x86_64
python_version: 3.11
npu_device: 910b
- arch: x86_64
python_version: 3.12
npu_device: 910b
- arch: x86_64
python_version: 3.11
npu_device: a3
- arch: x86_64
python_version: 3.12
npu_device: a3
npu_device: 950

container:
image: quay.io/ascend/cann:8.5.1-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
image: quay.io/ascend/cann:9.0.0-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
steps:
- name: Show OS
run: cat /etc/os-release
Expand Down
24 changes: 6 additions & 18 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,12 @@ jobs:
- arch: x86_64
python_version: 3.11
npu_device: 910b
- arch: x86_64
python_version: 3.12
npu_device: 910b
- arch: x86_64
python_version: 3.11
npu_device: a3
- arch: x86_64
python_version: 3.12
npu_device: a3
npu_device: 950

container:
image: quay.io/ascend/cann:8.5.1-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
image: quay.io/ascend/cann:9.0.0-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
steps:
- name: Show OS
run: cat /etc/os-release
Expand All @@ -96,7 +90,7 @@ jobs:
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib # to avoid libascend_hal.so issue
pip3 install pyyaml setuptools pytest packaging pybind11[global]
pip3 install -r requirements.txt
pip3 install torch-npu==2.8.0.post2 --extra-index-url https://download.pytorch.org/whl/cpu
pip3 install torch-npu==2.8.0.post4 --extra-index-url https://download.pytorch.org/whl/cpu
make clean build_cmake

build_wheel_docker:
Expand All @@ -110,17 +104,11 @@ jobs:
- arch: x86_64
python_version: 3.11
npu_device: 910b
- arch: x86_64
python_version: 3.12
npu_device: 910b
- arch: x86_64
python_version: 3.11
npu_device: a3
- arch: x86_64
python_version: 3.12
npu_device: a3
npu_device: 950
container:
image: quay.io/ascend/cann:8.5.1-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
image: quay.io/ascend/cann:9.0.0-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
steps:
- name: Show OS
run: cat /etc/os-release
Expand All @@ -138,5 +126,5 @@ jobs:
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib # to avoid libascend_hal.so issue
pip3 install pyyaml setuptools pytest packaging pybind11[global]
pip3 install -r requirements.txt
pip3 install torch-npu==2.8.0.post2 --extra-index-url https://download.pytorch.org/whl/cpu
pip3 install torch-npu==2.8.0.post4 --extra-index-url https://download.pytorch.org/whl/cpu
make clean build_wheel
30 changes: 8 additions & 22 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ stages:
- build_in_docker
- test
- test_aarch64_910B2_py3.11
- test_aarch64_910B2_py3.12 # Serialize the py3.11 and py3.12 tests otherwise CI fails!


# Define the workflow rules manually to make sure only a single pipeline
Expand All @@ -23,11 +22,11 @@ workflow:


.build_config: &build_config
image: quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-${PYTHON_VERSION}
image: quay.io/ascend/cann:9.0.0-${NPU}-ubuntu22.04-py3.11
parallel:
matrix:
- {PYTHON_VERSION: py3.11}
- {PYTHON_VERSION: py3.12}
- {NPU: 910b}
- {NPU: 950}
before_script:
- source /usr/local/Ascend/ascend-toolkit/set_env.sh
- pip3 install pyyaml setuptools pytest packaging
Expand All @@ -42,20 +41,9 @@ build_docker:
- make build_wheel

build_and_test_x86_910B4:
image: quay.io/ascend/cann:9.0.0-910b-ubuntu22.04-py3.11
tags:
- docker_npu # Build and test on-device NPU (x86-64 / 910B4)
<<: *build_config
script:
- make setup_once
- make build_wheel
- pip install pto_kernels*.whl
- python3 -m pytest -v tests/

build_and_test_aarch64_910B2_py3.11:
image: quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.11
stage: test_aarch64_910B2_py3.11
tags:
- docker_npu_910B2 # Build and test on-device (aarch64 / 910B2)
before_script:
- source /usr/local/Ascend/ascend-toolkit/set_env.sh
- pip3 install pyyaml setuptools pytest packaging
Expand All @@ -65,12 +53,10 @@ build_and_test_aarch64_910B2_py3.11:
- make build_wheel
- pip install pto_kernels*.whl
- python3 -m pytest -v tests/
when: manual
allow_failure: true

build_and_test_aarch64_910B2_py3.12:
image: quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.12
stage: test_aarch64_910B2_py3.12
build_and_test_aarch64_910B2_py3.11:
image: quay.io/ascend/cann:9.0.0-910b-ubuntu22.04-py3.11
stage: test_aarch64_910B2_py3.11
tags:
- docker_npu_910B2 # Build and test on-device (aarch64 / 910B2)
before_script:
Expand All @@ -89,7 +75,7 @@ build_and_test_aarch64_910B2_py3.12:
# On-device test of examples/jit_cpp #
######################################
test_examples_on_x86_910B4:
image: quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.11
image: quay.io/ascend/cann:9.0.0-910b-ubuntu22.04-py3.11
tags:
- docker_npu # Build and test on-device NPU (x86-64 / 910B4)
parallel:
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ include(FetchContent)
FetchContent_Declare(
libpto_isa_headers
GIT_REPOSITORY https://gitcode.com/cann/pto-isa.git
GIT_TAG "313817be696792a4e16a7ea5994ec98e34391613")
GIT_TAG v9.0.0)

FetchContent_Populate(libpto_isa_headers)

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ clean:

setup_once:
pip3 install -r requirements.txt
pip3 install torch-npu==2.8.0.post2 --extra-index-url https://download.pytorch.org/whl/cpu
pip3 install torch-npu==2.8.0.post4 --extra-index-url https://download.pytorch.org/whl/cpu

build_cmake: clean
bash scripts/build.sh
Expand Down
5 changes: 4 additions & 1 deletion csrc/kernel/kernel_csr_gather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,13 @@ AICORE void runTCsrGather(__gm__ T* values, __gm__ int32_t* indices,
TileDataVal wTiles(remaining_elements);
TileDataVal zTiles(remaining_elements);
TileDataIdx idxTiles(remaining_elements);
TileDataIdx tmpTiles(remaining_elements);

// Assign the UB address for each tile
TASSIGN(valTiles, V_T_ADDR + stage * TILE_SIZE_IN_BYTES);
TASSIGN(wTiles, W_T_ADDR + stage * 2 * TILE_SIZE_IDX_IN_BYTES);
TASSIGN(tmpTiles, W_T_ADDR + stage * 2 * TILE_SIZE_IDX_IN_BYTES +
TILE_SIZE_IDX_IN_BYTES);
TASSIGN(zTiles, Z_T_ADDR + stage * TILE_SIZE_IN_BYTES);
TASSIGN(idxTiles, IDX_T_ADDR + stage * TILE_SIZE_IDX_IN_BYTES);

Expand Down Expand Up @@ -147,7 +150,7 @@ AICORE void runTCsrGather(__gm__ T* values, __gm__ int32_t* indices,
pipe_barrier(PIPE_V);

// Gather
TGATHER(wTiles, xTiles, idxTiles);
TGATHER(wTiles, xTiles, idxTiles, tmpTiles);
Comment thread
zouzias marked this conversation as resolved.

// Signal end of gather to MTE2 (next load)
set_flag(PIPE_V, PIPE_MTE2, ev1);
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ requires = [
"psutil==6.1.0",
"PyYAML==6.0.2",
"attrs==24.2.0",
"torch-npu==2.8.0.post2",
"torch-npu==2.8.0.post4",
]
build-backend = "scikit_build_core.build"

Expand Down Expand Up @@ -51,7 +51,7 @@ dependencies = [

[project.optional-dependencies]
torch = [
"torch-npu==2.8.0.post2",
"torch-npu==2.8.0.post4",
]

[tool.scikit-build]
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ setuptools==80.9.0
packaging
wheel
scikit-build-core==0.11.6
pip==26.0.1
pip==26.1.1
pytest==8.3.4
numpy==1.26.4
scipy==1.13.1
Expand Down
Loading