From 19f862f5d5914fdcca340699d1e621b6dd28be55 Mon Sep 17 00:00:00 2001 From: anastasios Date: Fri, 15 May 2026 13:25:55 +0000 Subject: [PATCH 1/8] WIP --- Makefile | 11 +- csrc/examples/data_utils.h | 256 +++++++++++++++++++++++++++++++++++++ csrc/examples/main_abs.cpp | 76 +++++++++++ scripts/build_abs.sh | 64 ++++++++++ scripts/data_gen_abs.py | 14 ++ 5 files changed, 419 insertions(+), 2 deletions(-) create mode 100644 csrc/examples/data_utils.h create mode 100644 csrc/examples/main_abs.cpp create mode 100755 scripts/build_abs.sh create mode 100644 scripts/data_gen_abs.py diff --git a/Makefile b/Makefile index aee5176c..80a29b45 100644 --- a/Makefile +++ b/Makefile @@ -27,13 +27,14 @@ wheel: # 'make compile_abs' compiles 'kernel_abs.cpp' into 'libkernel_abs.so' without building the whole wheel package. # This is useful for development and debugging of individual kernels. compile_%: - bisheng -fPIC -shared -xcce -DMEMORY_BASE -O2 -std=c++17 \ + mkdir -p build/ + bisheng -fPIC -shared -xcce -DREGISTER -O2 -std=c++20 \ -I$(CSRC_KERNEL_DIR) \ -I$(PTO_LIB_PATH)/include \ --npu-arch=dav-2201 \ -Wno-ignored-attributes \ $(CSRC_KERNEL_DIR)/kernel_$*.cpp \ - -o libkernel_$*.so + -o build/libkernel_$*.so install: @@ -47,3 +48,9 @@ test: test_tri_inv: pytest tests/test_tri_inv_*.py + + +run_abs: compile_abs + python scripts/data_gen_abs.py + g++ -o build/main_abs csrc/examples/main_abs.cpp -L$(shell pwd)/build/ -L$(ASCEND_TOOLKIT_HOME)/lib64/ -lkernel_abs -lacl_rt -I$(ASCEND_TOOLKIT_HOME)/include/ -I$(CSRC_KERNEL_DIR) -Wno-ignored-attributes + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(shell pwd)/build/:$(shell pwd)/build/lib/ cannsim record --soc=Ascend950 ./main_abs diff --git a/csrc/examples/data_utils.h b/csrc/examples/data_utils.h new file mode 100644 index 00000000..ea45595f --- /dev/null +++ b/csrc/examples/data_utils.h @@ -0,0 +1,256 @@ +/** + * @file data_utils.h + * @brief Common functions used to read, write and print data. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +enum class PrintDataType { + DT_UNDEFINED = -1, + FLOAT = 0, + HALF = 1, + INT8_T = 2, + INT32_T = 3, + UINT8_T = 4, + INT16_T = 6, + UINT16_T = 7, + UINT32_T = 8, + INT64_T = 9, + UINT64_T = 10, + DOUBLE = 11, + BOOL = 12, + STRING = 13, + COMPLEX64 = 16, + COMPLEX128 = 17, + BF16 = 27 +}; + +#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) +#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) +#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args) +#define CHECK_ACL(x) \ + do { \ + const aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret \ + << std::endl; \ + } \ + } while (0); + +/** + * @brief Read data from file. + * + * @param [in] file_path File path. + * @param [out] buffer Pointer to the buffer where the data is read. + * @param [in] size Size of the file and the buffer. + * @return Boolean indicating if the data read was successful or not. + */ +bool ReadFile(const std::string& file_path, void* buffer, size_t size) { + struct stat s_buf; + const int file_status = stat(file_path.data(), &s_buf); + if (file_status == -1) { + ERROR_LOG("Failed to read file."); + return false; + } + if (S_ISREG(s_buf.st_mode) == 0) { + ERROR_LOG("File does not exist: %s", file_path.c_str()); + return false; + } + + std::ifstream file; + file.open(file_path, std::ios::binary); + if (!file.is_open()) { + ERROR_LOG("Failed to open file. Path = %s", file_path.c_str()); + return false; + } + + std::filebuf* const buf = file.rdbuf(); + const size_t read_size = buf->pubseekoff(0, std::ios::end, std::ios::in); + if (read_size == 0) { + ERROR_LOG("%s: File is empty.", file_path.c_str()); + file.close(); + return false; + } + if (read_size > size) { + ERROR_LOG("%s: File size is larger than the buffer size.", + file_path.c_str()); + file.close(); + return false; + } + buf->pubseekpos(0, std::ios::in); + buf->sgetn(static_cast(buffer), read_size); + file.close(); + return true; +} + +/** + * @brief Write data to file. + * + * @param [in] file_path File path. + * @param [in] buffer Data to write to file. + * @param [in] size Size to write. + * @return Boolean indicating if the data write was successful or not. + */ +bool WriteFile(const std::string& file_path, const void* buffer, size_t size) { + if (buffer == nullptr) { + ERROR_LOG("Cannot write file from a nullptr buffer."); + return false; + } + + const int fd = + open(file_path.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); + if (fd < 0) { + ERROR_LOG("Failed to open file. Path = %s", file_path.c_str()); + return false; + } + + const size_t writeSize = write(fd, buffer, size); + (void)close(fd); + if (writeSize != size) { + ERROR_LOG("Failed to write file."); + return false; + } + + return true; +} + +/// @private +template +void DoPrintData(const T* data, size_t count, size_t elements_per_row) { + assert(elements_per_row != 0); + for (size_t i = 0; i < count; ++i) { + if constexpr (std::is_same::value || + std::is_same::value) { + // cout treats int8 as char and doesn't output its numeric + // representation + std::cout << std::setw(10) << static_cast(data[i]); + } else { + std::cout << std::setw(10) << data[i]; + } + if (i % elements_per_row == elements_per_row - 1) { + std::cout << std::endl; + } + } +} + +/// @private +void DoPrintHalfData(const aclFloat16* data, size_t count, + size_t elements_per_row) { + assert(elements_per_row != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << std::setprecision(6) + << aclFloat16ToFloat(data[i]); + if (i % elements_per_row == elements_per_row - 1) { + std::cout << std::endl; + } + } +} + +/** + * @brief Print array content. + * + * @param [in] data Pointer to the array. + * @param [in] count Number of elements to print. + * @param [in] data_type Data type of the elements. + * @param [in] elements_per_row Number of elements to be printed in a single + * row. + */ +void PrintData(const void* data, size_t count, PrintDataType data_type, + size_t elements_per_row = 16) { + if (data == nullptr) { + ERROR_LOG("Cannot print a nullptr buffer."); + return; + } + + switch (data_type) { + case PrintDataType::BOOL: + DoPrintData(reinterpret_cast(data), count, elements_per_row); + break; + case PrintDataType::INT8_T: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::UINT8_T: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::INT16_T: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::UINT16_T: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::INT32_T: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::UINT32_T: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::INT64_T: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::UINT64_T: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::HALF: + DoPrintHalfData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::FLOAT: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + case PrintDataType::DOUBLE: + DoPrintData(reinterpret_cast(data), count, + elements_per_row); + break; + default: + ERROR_LOG("Unsupported type."); + } + std::cout << std::endl; +} + +/** + * @brief Prints beginning and end of a given vector. + * + * @param [in] data Pointer to the array. + * @param [in] dt Data type of the elements. + * @param [in] elems_to_print Number of elements to print both from the + * beginning and end. + * @param [in] vector_len Total number of elements in the vector. + * @param [in] msg Additional message printed at the beginning. + */ +template +void PrintVector(const T* data, PrintDataType dt, size_t elems_to_print, + size_t vector_len, std::string msg = "") { + std::cout << "==========================================" << std::endl; + if (msg != "") { + std::cout << msg << std::endl; + } + if (2 * elems_to_print >= vector_len) { + PrintData(data, vector_len, dt); + } else { + PrintData(data, elems_to_print, dt); + std::cout << "\t..." << std::endl; + const size_t tail_start = vector_len - elems_to_print; + PrintData(data + tail_start, elems_to_print, dt); + } + std::cout << "==========================================" << std::endl; +} diff --git a/csrc/examples/main_abs.cpp b/csrc/examples/main_abs.cpp new file mode 100644 index 00000000..a2050095 --- /dev/null +++ b/csrc/examples/main_abs.cpp @@ -0,0 +1,76 @@ +/** + * + * @file main_abs.cpp + * @brief Example of using the `abs` kernel. + */ + +#include + +#include "data_utils.h" + +extern "C" void call_vabs_fp16(uint32_t blockDim, aclrtStream stream, void* x, + void* y, uint32_t num_elements); + +/// Number of elements in input vectors. +constexpr size_t VABS_TOTAL_LENGTH = 8 * 128; + +int32_t main(int32_t argc, char* argv[]) { + uint32_t blockDim; + if (argc > 2) { + std::cerr << "Usage: ./" << argv[0] << " " << std::endl; + return 1; + } else if (argc == 2) { + blockDim = std::stoul(argv[1]); + std::cout << "[vabs] Use input BlockDim: " << blockDim << std::endl; + } else { + std::cout << "[vabs] Use default BlockDim: 4" << std::endl; + blockDim = 4; + } + + constexpr size_t inputByteSize = VABS_TOTAL_LENGTH * sizeof(uint16_t); + constexpr size_t outputByteSize = VABS_TOTAL_LENGTH * sizeof(uint16_t); + + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + const int32_t device_id = 0; + CHECK_ACL(aclrtSetDevice(device_id)); + CHECK_ACL(aclrtCreateContext(&context, device_id)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *xHost, *zHost; + uint8_t *xDevice, *zDevice; + CHECK_ACL(aclrtMallocHost((void**)&xHost, inputByteSize)); + CHECK_ACL(aclrtMallocHost((void**)&zHost, outputByteSize)); + CHECK_ACL( + aclrtMalloc((void**)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL( + aclrtMalloc((void**)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + + ReadFile("./input/input_x.bin", xHost, inputByteSize); + PrintVector((uint16_t*)xHost, PrintDataType::HALF, 16, VABS_TOTAL_LENGTH, + "Input X"); + CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, + ACL_MEMCPY_HOST_TO_DEVICE)); + + std::cout << "Init vabs_fp16 kernel" << std::endl; + call_vabs_fp16(blockDim, stream, xDevice, zDevice, VABS_TOTAL_LENGTH); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, + ACL_MEMCPY_DEVICE_TO_HOST)); + PrintVector((uint16_t*)zHost, PrintDataType::HALF, 16, VABS_TOTAL_LENGTH, + "Output"); + WriteFile("vabs_output.bin", zHost, outputByteSize); + + CHECK_ACL(aclrtFree(xDevice)); + CHECK_ACL(aclrtFree(zDevice)); + CHECK_ACL(aclrtFreeHost(xHost)); + CHECK_ACL(aclrtFreeHost(zHost)); + + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(device_id)); + CHECK_ACL(aclFinalize()); + return 0; +} diff --git a/scripts/build_abs.sh b/scripts/build_abs.sh new file mode 100755 index 00000000..6c607a94 --- /dev/null +++ b/scripts/build_abs.sh @@ -0,0 +1,64 @@ +#!/bin/bash + + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PARENT_DIR="$(dirname "$SCRIPT_DIR")" + +SHORT=v:, +LONG=soc-version:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" +SOC_VERSION="ascend950pr_9599" + +while :; do + case "$1" in + -v | --soc-version) + SOC_VERSION="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "[ERROR] Unexpected option: $1" + break + ;; + esac +done + +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH="$ASCEND_INSTALL_PATH" +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH="$ASCEND_HOME_PATH" +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH="$HOME"/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi +# shellcheck source=/dev/null +source "$_ASCEND_INSTALL_PATH"/bin/setenv.bash +echo "Current compile soc version is ${SOC_VERSION}" + +# See https://docs.pytorch.org/cppdocs/installing.html +export TORCH_DEVICE_BACKEND_AUTOLOAD=0 +CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:$(python -c 'import torch; print(torch.utils.cmake_prefix_path)') +export CMAKE_PREFIX_PATH + + +echo "CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}" + + +set -e +rm -rf build +mkdir -p build +cmake -S "${PARENT_DIR}" \ + -B build \ + -DSOC_VERSION="${SOC_VERSION}" \ + -DBASE_MODE=REGISTER \ + -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ + -DASCEND_CANN_PACKAGE_PATH="${_ASCEND_INSTALL_PATH}" + +cmake --build build -j --target main_abs diff --git a/scripts/data_gen_abs.py b/scripts/data_gen_abs.py new file mode 100644 index 00000000..2ab2b5ab --- /dev/null +++ b/scripts/data_gen_abs.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 +# -*- coding:utf-8 -*- +# Copyright 2026 Huawei Technologies Co., Ltd +from pathlib import Path + +import numpy as np + +if __name__ == "__main__": + shape = [8, 128] + + rng = np.random.default_rng(seed=42) + input_x = rng.uniform(-100, 100, shape).astype(np.float16) + Path("./input").mkdir(parents=True, exist_ok=True) + input_x.tofile("./input_x.bin") From e6d22dab3170aac5c461fdd859f6aa05b0d42621 Mon Sep 17 00:00:00 2001 From: anastasios Date: Fri, 15 May 2026 13:30:32 +0000 Subject: [PATCH 2/8] remove file --- scripts/build_abs.sh | 64 -------------------------------------------- 1 file changed, 64 deletions(-) delete mode 100755 scripts/build_abs.sh diff --git a/scripts/build_abs.sh b/scripts/build_abs.sh deleted file mode 100755 index 6c607a94..00000000 --- a/scripts/build_abs.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PARENT_DIR="$(dirname "$SCRIPT_DIR")" - -SHORT=v:, -LONG=soc-version:, -OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") -eval set -- "$OPTS" -SOC_VERSION="ascend950pr_9599" - -while :; do - case "$1" in - -v | --soc-version) - SOC_VERSION="$2" - shift 2 - ;; - --) - shift - break - ;; - *) - echo "[ERROR] Unexpected option: $1" - break - ;; - esac -done - -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH="$ASCEND_INSTALL_PATH" -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH="$ASCEND_HOME_PATH" -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH="$HOME"/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -# shellcheck source=/dev/null -source "$_ASCEND_INSTALL_PATH"/bin/setenv.bash -echo "Current compile soc version is ${SOC_VERSION}" - -# See https://docs.pytorch.org/cppdocs/installing.html -export TORCH_DEVICE_BACKEND_AUTOLOAD=0 -CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:$(python -c 'import torch; print(torch.utils.cmake_prefix_path)') -export CMAKE_PREFIX_PATH - - -echo "CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}" - - -set -e -rm -rf build -mkdir -p build -cmake -S "${PARENT_DIR}" \ - -B build \ - -DSOC_VERSION="${SOC_VERSION}" \ - -DBASE_MODE=REGISTER \ - -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \ - -DASCEND_CANN_PACKAGE_PATH="${_ASCEND_INSTALL_PATH}" - -cmake --build build -j --target main_abs From af3b937dc011d7f7c01c8fc7ef2b662bdbf7a4c2 Mon Sep 17 00:00:00 2001 From: anastasios Date: Fri, 15 May 2026 14:03:05 +0000 Subject: [PATCH 3/8] fix --- Makefile | 4 ++-- csrc/examples/main_abs.cpp | 6 +++--- csrc/kernel/kernel_abs.cpp | 7 ++++++- scripts/data_gen_abs.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 80a29b45..e2319e4d 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ wheel: # This is useful for development and debugging of individual kernels. compile_%: mkdir -p build/ - bisheng -fPIC -shared -xcce -DREGISTER -O2 -std=c++20 \ + bisheng -fPIC -shared -xcce -DREGISTER_BASE -O2 -std=c++20 \ -I$(CSRC_KERNEL_DIR) \ -I$(PTO_LIB_PATH)/include \ --npu-arch=dav-2201 \ @@ -50,7 +50,7 @@ test_tri_inv: pytest tests/test_tri_inv_*.py -run_abs: compile_abs +run_abs_a5: compile_abs python scripts/data_gen_abs.py g++ -o build/main_abs csrc/examples/main_abs.cpp -L$(shell pwd)/build/ -L$(ASCEND_TOOLKIT_HOME)/lib64/ -lkernel_abs -lacl_rt -I$(ASCEND_TOOLKIT_HOME)/include/ -I$(CSRC_KERNEL_DIR) -Wno-ignored-attributes LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(shell pwd)/build/:$(shell pwd)/build/lib/ cannsim record --soc=Ascend950 ./main_abs diff --git a/csrc/examples/main_abs.cpp b/csrc/examples/main_abs.cpp index a2050095..045c49e7 100644 --- a/csrc/examples/main_abs.cpp +++ b/csrc/examples/main_abs.cpp @@ -9,7 +9,7 @@ #include "data_utils.h" extern "C" void call_vabs_fp16(uint32_t blockDim, aclrtStream stream, void* x, - void* y, uint32_t num_elements); + void* z, uint32_t num_elements); /// Number of elements in input vectors. constexpr size_t VABS_TOTAL_LENGTH = 8 * 128; @@ -23,8 +23,8 @@ int32_t main(int32_t argc, char* argv[]) { blockDim = std::stoul(argv[1]); std::cout << "[vabs] Use input BlockDim: " << blockDim << std::endl; } else { - std::cout << "[vabs] Use default BlockDim: 4" << std::endl; - blockDim = 4; + std::cout << "[vabs] Use default BlockDim: 8" << std::endl; + blockDim = 8; } constexpr size_t inputByteSize = VABS_TOTAL_LENGTH * sizeof(uint16_t); diff --git a/csrc/kernel/kernel_abs.cpp b/csrc/kernel/kernel_abs.cpp index f7792cff..3d4d54c3 100644 --- a/csrc/kernel/kernel_abs.cpp +++ b/csrc/kernel/kernel_abs.cpp @@ -37,6 +37,11 @@ AICORE void runTAbs(__gm__ T* x, __gm__ T* z, uint32_t total_size) { const uint32_t num_aiv_cores = get_block_num(); const uint32_t aiv_core_id = get_block_idx(); + if (get_subblockid() != 0) { + // Only subblock 0 is used in this kernel + return; + } + constexpr uint32_t UB_ZERO_ADDR = 0; constexpr uint32_t TILE_SIZE_IN_BYTES = TILE_SIZE * sizeof(T); const uint32_t num_tiles = (total_size + TILE_SIZE - 1) / TILE_SIZE; @@ -132,5 +137,5 @@ extern "C" __global__ AICORE void vabs_fp32(GM_ADDR x, GM_ADDR z, extern "C" void call_vabs_fp16(uint32_t blockDim, void* stream, uint8_t* x, uint8_t* y, uint32_t in_length) { - vabs_fp16<<>>(x, y, in_length); + vabs_fp16<<>>(x, y, in_length); } diff --git a/scripts/data_gen_abs.py b/scripts/data_gen_abs.py index 2ab2b5ab..d688d6cd 100644 --- a/scripts/data_gen_abs.py +++ b/scripts/data_gen_abs.py @@ -11,4 +11,4 @@ rng = np.random.default_rng(seed=42) input_x = rng.uniform(-100, 100, shape).astype(np.float16) Path("./input").mkdir(parents=True, exist_ok=True) - input_x.tofile("./input_x.bin") + input_x.tofile("./input/input_x.bin") From cb45182163e460a7fb074a82bb33d8cde17e7d9a Mon Sep 17 00:00:00 2001 From: anastasios Date: Fri, 15 May 2026 17:44:19 +0000 Subject: [PATCH 4/8] fix --- Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index e2319e4d..16f6bd64 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ compile_%: -I$(CSRC_KERNEL_DIR) \ -I$(PTO_LIB_PATH)/include \ --npu-arch=dav-2201 \ - -Wno-ignored-attributes \ + -Wno-ignored-attributes \ $(CSRC_KERNEL_DIR)/kernel_$*.cpp \ -o build/libkernel_$*.so @@ -52,5 +52,8 @@ test_tri_inv: run_abs_a5: compile_abs python scripts/data_gen_abs.py - g++ -o build/main_abs csrc/examples/main_abs.cpp -L$(shell pwd)/build/ -L$(ASCEND_TOOLKIT_HOME)/lib64/ -lkernel_abs -lacl_rt -I$(ASCEND_TOOLKIT_HOME)/include/ -I$(CSRC_KERNEL_DIR) -Wno-ignored-attributes - LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(shell pwd)/build/:$(shell pwd)/build/lib/ cannsim record --soc=Ascend950 ./main_abs + g++ -o build/main_abs csrc/examples/main_abs.cpp \ + -L$(shell pwd)/build/ -L$(ASCEND_TOOLKIT_HOME)/lib64/ \ + -lkernel_abs -lacl_rt -I$(ASCEND_TOOLKIT_HOME)/include/ \ + -I$(CSRC_KERNEL_DIR) -Wno-ignored-attributes + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(shell pwd)/build/:$(shell pwd)/build/lib/ cannsim record --soc=Ascend950 ./build/main_abs From ad0b0af1f66394104b843dc46ea0e108122d7b7b Mon Sep 17 00:00:00 2001 From: anastasios Date: Mon, 18 May 2026 08:16:29 +0000 Subject: [PATCH 5/8] WIP DOUBLECHECK --- Makefile | 13 +++++++------ {csrc/examples => examples/a5}/data_utils.h | 0 {csrc/examples => examples/a5}/main_abs.cpp | 0 3 files changed, 7 insertions(+), 6 deletions(-) rename {csrc/examples => examples/a5}/data_utils.h (100%) rename {csrc/examples => examples/a5}/main_abs.cpp (100%) diff --git a/Makefile b/Makefile index da22d86a..7e95986c 100644 --- a/Makefile +++ b/Makefile @@ -27,14 +27,14 @@ wheel: # 'make compile_abs' compiles 'kernel_abs.cpp' into 'libkernel_abs.so' without building the whole wheel package. # This is useful for development and debugging of individual kernels. compile_%: - mkdir -p build/ + mkdir -p build/lib/ bisheng -fPIC -shared -xcce -DREGISTER_BASE -O2 -std=c++20 \ -I$(CSRC_KERNEL_DIR) \ -I$(PTO_LIB_PATH)/include \ --npu-arch=dav-2201 \ -Wno-ignored-attributes \ $(CSRC_KERNEL_DIR)/kernel_$*.cpp \ - -o build/libkernel_$*.so + -o build/lib/libkernel_$*.so install: @@ -52,8 +52,9 @@ test_tri_inv: run_abs_a5: compile_abs python scripts/data_gen_abs.py - g++ -o build/main_abs csrc/examples/main_abs.cpp \ - -L$(shell pwd)/build/ -L$(ASCEND_TOOLKIT_HOME)/lib64/ \ + g++ -o build/main_abs examples/a5/main_abs.cpp \ + -L$(shell pwd)/build/lib/ -L$(ASCEND_TOOLKIT_HOME)/lib64/ \ -lkernel_abs -lacl_rt -I$(ASCEND_TOOLKIT_HOME)/include/ \ - -I$(CSRC_KERNEL_DIR) -Wno-ignored-attributes - LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(shell pwd)/build/:$(shell pwd)/build/lib/ cannsim record --soc=Ascend950 ./build/main_abs + -I$(shell pwd)/examples/a5/ -Wno-ignored-attributes + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(shell pwd)/build/lib/ cannsim record \ + --soc=Ascend950 -g ./build/main_abs diff --git a/csrc/examples/data_utils.h b/examples/a5/data_utils.h similarity index 100% rename from csrc/examples/data_utils.h rename to examples/a5/data_utils.h diff --git a/csrc/examples/main_abs.cpp b/examples/a5/main_abs.cpp similarity index 100% rename from csrc/examples/main_abs.cpp rename to examples/a5/main_abs.cpp From ce3e0728abb8407e1e22584c06f97311560c4343 Mon Sep 17 00:00:00 2001 From: anastasios Date: Tue, 19 May 2026 12:16:55 +0000 Subject: [PATCH 6/8] wip --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 7e95986c..e3718e34 100644 --- a/Makefile +++ b/Makefile @@ -52,9 +52,9 @@ test_tri_inv: run_abs_a5: compile_abs python scripts/data_gen_abs.py - g++ -o build/main_abs examples/a5/main_abs.cpp \ + g++ -o main_abs examples/a5/main_abs.cpp \ -L$(shell pwd)/build/lib/ -L$(ASCEND_TOOLKIT_HOME)/lib64/ \ -lkernel_abs -lacl_rt -I$(ASCEND_TOOLKIT_HOME)/include/ \ -I$(shell pwd)/examples/a5/ -Wno-ignored-attributes LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(shell pwd)/build/lib/ cannsim record \ - --soc=Ascend950 -g ./build/main_abs + --soc=Ascend950 -g ./main_abs From c99dacebbd0b7c88424d9141b04c7b6131fd5ff6 Mon Sep 17 00:00:00 2001 From: anastasios Date: Tue, 19 May 2026 15:52:37 +0000 Subject: [PATCH 7/8] cannsim works --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e3718e34..74343171 100644 --- a/Makefile +++ b/Makefile @@ -28,10 +28,12 @@ wheel: # This is useful for development and debugging of individual kernels. compile_%: mkdir -p build/lib/ - bisheng -fPIC -shared -xcce -DREGISTER_BASE -O2 -std=c++20 \ + bisheng -fPIC -shared -xcce -DREGISTER_BASE -O2 -std=gnu++17 \ -I$(CSRC_KERNEL_DIR) \ -I$(PTO_LIB_PATH)/include \ - --npu-arch=dav-2201 \ + --cce-aicore-arch=dav-c310 \ + -mllvm -cce-aicore-stack-size=0x8000 \ + -mllvm -cce-aicore-function-stack-size=0x8000 \ -Wno-ignored-attributes \ $(CSRC_KERNEL_DIR)/kernel_$*.cpp \ -o build/lib/libkernel_$*.so From 064652c88ec6b1539ffdd4c8edb5d873d2379a9d Mon Sep 17 00:00:00 2001 From: anastasios Date: Tue, 19 May 2026 16:22:52 +0000 Subject: [PATCH 8/8] fix --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 74343171..efcc2d47 100644 --- a/Makefile +++ b/Makefile @@ -59,4 +59,4 @@ run_abs_a5: compile_abs -lkernel_abs -lacl_rt -I$(ASCEND_TOOLKIT_HOME)/include/ \ -I$(shell pwd)/examples/a5/ -Wno-ignored-attributes LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(shell pwd)/build/lib/ cannsim record \ - --soc=Ascend950 -g ./main_abs + --soc=Ascend950 ./main_abs