Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmake/FindATB.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ else()
CACHE STRING "atb toolkit default home")
endif()

# Extract cxx_abi version from ATB_HOME_PATH (last path component)
get_filename_component(ATB_CXX_ABI_VERSION ${ATB_HOME_PATH} NAME)
message(STATUS "ATB_CXX_ABI_VERSION: ${ATB_CXX_ABI_VERSION}")

# Include directories.
find_path(ATB_INCLUDE_DIRS
NAMES atb/atb_infer.h
Expand Down
7 changes: 4 additions & 3 deletions cmake/ascend.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ execute_process(
)

execute_process(
COMMAND python -c "import torch; from packaging import version; \
COMMAND python -c "import torch; from packaging import version; \
torch_version = version.parse(torch.__version__).base_version; \
print('1' if version.parse(torch_version) > version.parse('2.3.1') else '0', end='')"
OUTPUT_VARIABLE Torch_npu_VERSION_HIGHER_THAN_231
print(torch_version, end='')"
OUTPUT_VARIABLE TORCH_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE
)

find_package(Torch REQUIRED)
Expand Down
7 changes: 7 additions & 0 deletions dlinfer/graph/dicp/dynamo_bridge/torch_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
is_torch_220 = False
is_torch_231 = False
is_torch_251 = False
is_torch_260 = False
is_torch_271 = False


if torch_version.startswith("2.0"):
is_torch_200 = True
Expand All @@ -19,6 +22,10 @@
is_torch_231 = True
elif torch_version.startswith("2.5.1"):
is_torch_251 = True
elif torch_version.startswith("2.6.0"):
is_torch_260 = True
elif torch_version.startswith("2.7.1"):
is_torch_271 = True
else:
raise ValueError(f"unsupported dicp torch version: {torch.__version__}")

Expand Down
27 changes: 27 additions & 0 deletions dlinfer/graph/dicp/vendor/AtbGraph/atb_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,3 +825,30 @@ def __init__(self):

def infer_result(self, x, dim, keep_dim, dtype, ascend_dtype):
return x.sum(dim, keep_dim=keep_dim, dtype=dtype)


class CustomFusedLora(Operator):
def __init__(self):
super().__init__("CustomFusedLora")

def infer_result(
self, x, lora_a, lora_b, scaling, ranks, seq_lens, adapter_ids, dtype
):
M, K = x.shape
ranks = lora_a.size(0)
N = lora_b.size(1)
output = torch.empty((M, N), dtype=x.dtype, device=x.device)
# assuem totalRank is the max rank
internal_output_x_lora_a = torch.empty(
(M, ranks * M), dtype=x.dtype, device=x.device
)
internal_lora_a_transpose = torch.empty_like(lora_a)
return output, internal_output_x_lora_a, internal_lora_a_transpose


class AclNnInplaceAdd(Operator):
def __init__(self):
super().__init__("AclNnInplaceAdd")

def infer_result(self, a, b, dtype):
return a + b
13 changes: 13 additions & 0 deletions dlinfer/graph/dicp/vendor/AtbGraph/codegen/atb_infer_param.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,19 @@ class AclNnReduceSumParam:
dtype: str = "FLOAT"


@dataclass
class CustomFusedLoraParam:
name: str = ""
dtype: str = "FLOAT"


@dataclass
class AclNnInplaceAddParam:
name: str = ""
alpha: float = 1.0
dtype: str = "FLOAT"


def custom_asdict_factory(data):
def convert_value(obj):
if isinstance(obj, IntEnum):
Expand Down
34 changes: 33 additions & 1 deletion dlinfer/graph/dicp/vendor/AtbGraph/codegen/atb_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ def SplitWithSize(name, x, sizes, dim):
op = Operation(name, "AclNnSplitWithSizeOperation")
param = infer_param.SplitParam()
param.splitDim = dim
param.splitSizes = sizes
param.splitSizes = [str(s) for s in sizes]
op.set_param(param)
op.set_input([x])
for idx, _ in enumerate(sizes):
Expand Down Expand Up @@ -1246,3 +1246,35 @@ def AclNnReduceSum(name, x, dim, keep_dim, dtype, ascend_dtype):
op.set_param(param)
op.set_output([name])
return op

def CustomFusedLora(
name, x, lora_a, lora_b, scaling, ranks, seq_lens, adapter_ids, dtype
):
op = Operation(name, "CustomFusedLoraOperation")
param = infer_param.CustomFusedLoraParam()
param.name = name
param.dtype = get_ascend_dtype(dtype)
seq_lens_cpu = seq_lens
op.set_input(
[x, lora_a, lora_b, scaling, ranks, seq_lens, adapter_ids, seq_lens_cpu]
)
op.set_param(param)
op.set_output([f"{name}__0", f"{name}__1", f"{name}__2"])

op.has_host_inputs = True
op.host_inputs.append(ranks)
op.host_inputs.append(adapter_ids)
op.host_inputs.append(seq_lens_cpu)
return op

def AclNnInplaceAdd(name, a, b, dtype):
op = Operation(name, "AclNnInplaceAddOperation")
param = infer_param.AclNnInplaceAddParam()
param.name = name
param.dtype = get_ascend_dtype(dtype)
op.set_input([a, b])
op.set_param(param)
op.set_output([name])
op.has_inplace_output = True
op.add_inplace_output(0, 0)
return op
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@ set(COMPILE_OPTIONS
-Wno-unused-variable
-Wno-unused-parameter
-Wno-attributes
-D_GLIBCXX_USE_CXX11_ABI=0
)

# Add CXX11 ABI flag based on ATB_CXX_ABI_VERSION
if(ATB_CXX_ABI_VERSION STREQUAL "cxx_abi_0")
list(APPEND COMPILE_OPTIONS -D_GLIBCXX_USE_CXX11_ABI=0)
endif()

set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")

add_library(dicp_model SHARED ${SOURCES})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ atb::Status Model::ExecuteNode(int nodeId) {
if (st != 0) {
DICP_LOG(ERROR) << "execute node[" << nodeId << "] fail, error code: " << st;
}
DICP_LOG(INFO) << modelId_ << "execute node[" << nodeId << "] end";
return st;
}

Expand Down Expand Up @@ -424,6 +425,10 @@ void Model::CreateGraphOperation(const nlohmann::json& paramJson, Node& node) {
graph_param.internalTensorNum = internalNames.size();
graph_param.nodes.resize(nodeSize);

if (paramJson.contains("name")) {
graph_param.name = paramJson["name"].get<std::string>();
}

// graph local tensor ids
std::unordered_map<std::string, int> graph_tensor_ids;
int tensorCount = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ atb::Status AclNnCatOperation::InferShape(const atb::SVector<atb::TensorDesc>& i
outTensorDescs.at(0).dtype = inTensorDescs.at(0).dtype;

int64_t concatDimSize = 0;
int64_t dim = this->concatDim > 0 ? this->concatDim : inTensorDescs.at(0).shape.dimNum + this->concatDim;
int64_t dim = this->concatDim >= 0 ? this->concatDim : inTensorDescs.at(0).shape.dimNum + this->concatDim;
for (size_t i = 0; i < inTensorDescs.size(); ++i) {
concatDimSize += inTensorDescs.at(i).shape.dims[dim];
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "inplace_add_operation.h"

#include <algorithm>

#include "aclnnop/aclnn_add.h"
#include "utils/log.h"
#include "utils/misc.h"

namespace dicp {

const int NUM1 = 1;
const int NUM2 = 2;

AclNnInplaceAddOperation::AclNnInplaceAddOperation(const std::string& name, float aplpha, const std::string& dtype) : AclNnOperation(name) {
alpha_ = DICPScalar(aplpha, dtype);
aclAlpha_ = aclCreateScalar(alpha_.getValuePtr(), alpha_.getDataType());
}

AclNnInplaceAddOperation::~AclNnInplaceAddOperation() {
if (aclAlpha_ != nullptr) {
aclDestroyScalar(aclAlpha_);
}
}

atb::Status AclNnInplaceAddOperation::InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const {
outTensorDescs.at(0).format = inTensorDescs.at(0).format;
outTensorDescs.at(0).shape.dimNum = inTensorDescs.at(0).shape.dimNum;
outTensorDescs.at(0).dtype = inTensorDescs.at(0).dtype;
for (size_t i = 0; i < outTensorDescs.at(0).shape.dimNum; ++i) {
outTensorDescs.at(0).shape.dims[i] = inTensorDescs.at(0).shape.dims[i];
}
return 0;
}

uint32_t AclNnInplaceAddOperation::GetInputNum() const { return NUM2; }

uint32_t AclNnInplaceAddOperation::GetOutputNum() const { return NUM1; }

int AclNnInplaceAddOperation::SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) {
int ret = aclnnInplaceAddGetWorkspaceSize(aclInTensors_.at(0).tensor, aclInTensors_.at(1).tensor, aclAlpha_, &workspaceSize, &aclExecutor_);
DICP_LOG(INFO) << opName_ << " aclnnInplaceAddGetWorkspaceSize end, ret:" << ret << ", workspaceSize:" << workspaceSize << ", aclExecutor:" << aclExecutor_;
return ret;
}

int AclNnInplaceAddOperation::CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) {
int ret = aclnnInplaceAdd(workspace, workspaceSize, aclExecutor, stream);
DICP_LOG(INFO) << opName_ << " aclnnInplaceAdd end, ret:" << ret;
return ret;
}

atb::Operation* AclNnInplaceAddOperationCreate(const nlohmann::json& paramJson) {
std::string opName;
float aplpha;
std::string dtype;
if (paramJson.contains("name")) {
opName = paramJson["name"].get<std::string>();
}
if (paramJson.contains("aplpha")) {
aplpha = paramJson["aplpha"].get<float>();
}
if (paramJson.contains("dtype")) {
dtype = paramJson["dtype"].get<std::string>();
}
DICP_LOG(INFO) << "AclNnInplaceAddOperation: name: " << opName;
atb::Operation* op = new AclNnInplaceAddOperation(opName, aplpha, dtype);
return op;
}

REGISTER_OPERATION(AclNnInplaceAddOperation, AclNnInplaceAddOperationCreate);

} // namespace dicp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#pragma once

#include "acl_nn_operation.h"
#include "utils/scalar.h"

namespace dicp {
class AclNnInplaceAddOperation : public AclNnOperation {
public:
explicit AclNnInplaceAddOperation(const std::string& name, float aplpha, const std::string& dtype);
~AclNnInplaceAddOperation() override;
atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
uint32_t GetInputNum() const override;
uint32_t GetOutputNum() const override;

private:
DICPScalar alpha_;
aclScalar* aclAlpha_ = nullptr;

std::string dtype_;
int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
};

} // namespace dicp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,29 @@

#include "acl/acl.h"
#include "aclnnop/aclnn_split_with_size.h"
#include "utils/common.h"
#include "utils/global_dict.h"
#include "utils/log.h"

namespace dicp {

const int NUM1 = 1;

AclNnSplitWithSizeOperation::AclNnSplitWithSizeOperation(const std::string& name, int64_t splitDim, std::vector<int64_t> splitSizes)
: AclNnOperation(name), splitDim_(splitDim), splitSizes_(std::move(splitSizes)) {}
AclNnSplitWithSizeOperation::AclNnSplitWithSizeOperation(const std::string& name, int64_t splitDim, std::vector<std::string> splitSizes)
: AclNnOperation(name), splitDim_(splitDim) {
splitSizes_.resize(splitSizes.size());
for (size_t i = 0; i < splitSizes_.size(); ++i) {
bool isDynamic = !std::isdigit(splitSizes[i][0]);
if (isDynamic) {
dynamicSplitSizesMap_[i] = splitSizes[i];
} else {
splitSizes_[i] = std::stol(splitSizes[i]);
}
}
if (dynamicSplitSizesMap_.size() == 0) {
aclSplitSizes_ = aclCreateIntArray(splitSizes_.data(), splitSizes_.size());
}
}

AclNnSplitWithSizeOperation::~AclNnSplitWithSizeOperation() {}

Expand All @@ -32,17 +47,31 @@ atb::Status AclNnSplitWithSizeOperation::InferShape(const atb::SVector<atb::Tens
const auto inputDtype = inputTensorDesc.dtype;

const auto& inputDims = inputTensorDesc.shape.dims;

int64_t splitDim = splitDim_ >= 0 ? splitDim_ : inputDimNum + splitDim_;

auto& globalDict = GetGlobalDictData();
for (size_t i = 0; i < splitSizes_.size(); ++i) {
auto& outputTensorDesc = outTensorDescs.at(i);
outputTensorDesc.format = inputFormat;
outputTensorDesc.shape.dimNum = inputDimNum;
outputTensorDesc.dtype = inputDtype;
auto& outputDims = outputTensorDesc.shape.dims;

int64_t targetDimValue = -1;
auto dynamicSize = dynamicSplitSizesMap_.find(i);
if (dynamicSize != dynamicSplitSizesMap_.end()) {
auto it = globalDict.find(dynamicSize->second);
if (it != globalDict.end()) {
targetDimValue = static_cast<int64_t>(it->second);
} else {
DICP_LOG(ERROR) << "Cannot find key " << dynamicSize->second << " in global_dict";
}
} else {
targetDimValue = splitSizes_[i];
}

for (size_t j = 0; j < inputDimNum; ++j) {
outputDims[j] = (j != splitDim) ? inputDims[j] : splitSizes_[i];
outputDims[j] = (j != splitDim) ? inputDims[j] : targetDimValue;
}
}

Expand All @@ -62,8 +91,24 @@ int AclNnSplitWithSizeOperation::SetAclNnWorkspaceExecutor(uint64_t& workspaceSi
tmp[i] = aclOutTensors_.at(i).tensor;
}
aclTensorList* tensorList = aclCreateTensorList(tmp.data(), tmp.size());
aclIntArray* sizes = aclCreateIntArray(splitSizes_.data(), splitSizes_.size());
int ret = aclnnSplitWithSizeGetWorkspaceSize(aclInTensors_.at(0).tensor, sizes, splitDim_, tensorList, &workspaceSize, &aclExecutor_);

if (dynamicSplitSizesMap_.size() > 0) {
auto& globalDict = GetGlobalDictData();
for (auto& [key, value] : dynamicSplitSizesMap_) {
auto it = globalDict.find(value);
if (it != globalDict.end()) {
splitSizes_[key] = static_cast<int64_t>(it->second);
} else {
DICP_LOG(ERROR) << "Cannot find key " << value << " in global dict";
}
}
if (aclSplitSizes_ != nullptr) {
aclDestroyIntArray(aclSplitSizes_);
aclSplitSizes_ = nullptr;
}
aclSplitSizes_ = aclCreateIntArray(splitSizes_.data(), splitSizes_.size());
}
int ret = aclnnSplitWithSizeGetWorkspaceSize(aclInTensors_.at(0).tensor, aclSplitSizes_, splitDim_, tensorList, &workspaceSize, &aclExecutor_);
DICP_LOG(INFO) << opName_ << " aclnnSplitWithSizeGetWorkspaceSize end, ret:" << ret << ", workspaceSize:" << workspaceSize
<< ", aclExecutor:" << aclExecutor_;

Expand All @@ -80,15 +125,15 @@ int AclNnSplitWithSizeOperation::CallAclExecute(uint8_t* workspace, uint64_t wor
atb::Operation* AclNnSplitWithSizeOperationCreate(const nlohmann::json& paramJson) {
std::string opName;
int64_t splitDim;
std::vector<int64_t> splitSizes;
std::vector<std::string> splitSizes;
if (paramJson.contains("name")) {
opName = paramJson["name"].get<std::string>();
}
if (paramJson.contains("splitDim")) {
splitDim = paramJson["splitDim"].get<int64_t>();
}
if (paramJson.contains("splitSizes")) {
splitSizes = paramJson["splitSizes"].get<std::vector<int64_t>>();
splitSizes = paramJson["splitSizes"].get<std::vector<std::string>>();
}
DICP_LOG(INFO) << "AclNnSplitWithSizeOperation: name: " << opName;
atb::Operation* op = new AclNnSplitWithSizeOperation(opName, splitDim, splitSizes);
Expand Down
Loading
Loading