Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/build-examples-aicpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ jobs:
python_version: 3.11
npu_device: 910b
- arch: x86_64
python_version: 3.12
npu_device: 910b
python_version: 3.11
npu_device: 950
- arch: x86_64
python_version: 3.11
npu_device: a3
npu_device: 910b
- arch: x86_64
python_version: 3.12
npu_device: a3
python_version: 3.11
Comment thread
zouzias marked this conversation as resolved.
Outdated
npu_device: 950

container:
image: quay.io/ascend/cann:8.5.1-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
image: quay.io/ascend/cann:9.0.0-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
steps:
- name: Show OS
run: cat /etc/os-release
Expand Down
20 changes: 4 additions & 16 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,12 @@ jobs:
- arch: x86_64
python_version: 3.11
npu_device: 910b
- arch: x86_64
python_version: 3.12
npu_device: 910b
- arch: x86_64
python_version: 3.11
npu_device: a3
- arch: x86_64
python_version: 3.12
npu_device: a3
npu_device: 950

container:
image: quay.io/ascend/cann:8.5.1-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
image: quay.io/ascend/cann:9.0.0-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
steps:
- name: Show OS
run: cat /etc/os-release
Expand Down Expand Up @@ -110,17 +104,11 @@ jobs:
- arch: x86_64
python_version: 3.11
npu_device: 910b
- arch: x86_64
python_version: 3.12
npu_device: 910b
- arch: x86_64
python_version: 3.11
npu_device: a3
- arch: x86_64
python_version: 3.12
npu_device: a3
npu_device: 950
container:
image: quay.io/ascend/cann:8.5.1-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
image: quay.io/ascend/cann:9.0.0-${{ matrix.npu_device }}-ubuntu22.04-py${{ matrix.python_version }}
steps:
- name: Show OS
run: cat /etc/os-release
Expand Down
30 changes: 8 additions & 22 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ stages:
- build_in_docker
- test
- test_aarch64_910B2_py3.11
- test_aarch64_910B2_py3.12 # Serialize the py3.11 and py3.12 tests otherwise CI fails!


# Define the workflow rules manually to make sure only a single pipeline
Expand All @@ -23,11 +22,11 @@ workflow:


.build_config: &build_config
image: quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-${PYTHON_VERSION}
image: quay.io/ascend/cann:9.0.0-${NPU}-ubuntu22.04-py3.11
parallel:
matrix:
- {PYTHON_VERSION: py3.11}
- {PYTHON_VERSION: py3.12}
- {NPU: 910b}
- {NPU: 950}
before_script:
- source /usr/local/Ascend/ascend-toolkit/set_env.sh
- pip3 install pyyaml setuptools pytest packaging
Expand All @@ -42,20 +41,9 @@ build_docker:
- make build_wheel

build_and_test_x86_910B4:
image: quay.io/ascend/cann:9.0.0-910b-ubuntu22.04-py3.11
tags:
- docker_npu # Build and test on-device NPU (x86-64 / 910B4)
<<: *build_config
script:
- make setup_once
- make build_wheel
- pip install pto_kernels*.whl
- python3 -m pytest -v tests/

build_and_test_aarch64_910B2_py3.11:
image: quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.11
stage: test_aarch64_910B2_py3.11
tags:
- docker_npu_910B2 # Build and test on-device (aarch64 / 910B2)
before_script:
- source /usr/local/Ascend/ascend-toolkit/set_env.sh
- pip3 install pyyaml setuptools pytest packaging
Expand All @@ -65,12 +53,10 @@ build_and_test_aarch64_910B2_py3.11:
- make build_wheel
- pip install pto_kernels*.whl
- python3 -m pytest -v tests/
when: manual
allow_failure: true

build_and_test_aarch64_910B2_py3.12:
image: quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.12
stage: test_aarch64_910B2_py3.12
build_and_test_aarch64_910B2_py3.11:
image: quay.io/ascend/cann:9.0.0-910b-ubuntu22.04-py3.11
stage: test_aarch64_910B2_py3.11
tags:
- docker_npu_910B2 # Build and test on-device (aarch64 / 910B2)
before_script:
Expand All @@ -89,7 +75,7 @@ build_and_test_aarch64_910B2_py3.12:
# On-device test of examples/jit_cpp #
######################################
test_examples_on_x86_910B4:
image: quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.11
image: quay.io/ascend/cann:9.0.0-910b-ubuntu22.04-py3.11
tags:
- docker_npu # Build and test on-device NPU (x86-64 / 910B4)
parallel:
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ include(FetchContent)
FetchContent_Declare(
libpto_isa_headers
GIT_REPOSITORY https://gitcode.com/cann/pto-isa.git
GIT_TAG "313817be696792a4e16a7ea5994ec98e34391613")
GIT_TAG v9.0.0)

FetchContent_Populate(libpto_isa_headers)

Expand Down
5 changes: 4 additions & 1 deletion csrc/kernel/kernel_csr_gather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,13 @@ AICORE void runTCsrGather(__gm__ T* values, __gm__ int32_t* indices,
TileDataVal wTiles(remaining_elements);
TileDataVal zTiles(remaining_elements);
TileDataIdx idxTiles(remaining_elements);
TileDataIdx tmpTiles(remaining_elements);

// Assign the UB address for each tile
TASSIGN(valTiles, V_T_ADDR + stage * TILE_SIZE_IN_BYTES);
TASSIGN(wTiles, W_T_ADDR + stage * 2 * TILE_SIZE_IDX_IN_BYTES);
TASSIGN(tmpTiles, W_T_ADDR + stage * 2 * TILE_SIZE_IDX_IN_BYTES +
TILE_SIZE_IDX_IN_BYTES);
TASSIGN(zTiles, Z_T_ADDR + stage * TILE_SIZE_IN_BYTES);
TASSIGN(idxTiles, IDX_T_ADDR + stage * TILE_SIZE_IDX_IN_BYTES);

Expand Down Expand Up @@ -147,7 +150,7 @@ AICORE void runTCsrGather(__gm__ T* values, __gm__ int32_t* indices,
pipe_barrier(PIPE_V);

// Gather
TGATHER(wTiles, xTiles, idxTiles);
TGATHER(wTiles, xTiles, idxTiles, tmpTiles);
Comment thread
zouzias marked this conversation as resolved.

// Signal end of gather to MTE2 (next load)
set_flag(PIPE_V, PIPE_MTE2, ev1);
Expand Down
Loading