diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d7f61f59..3818cbb5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,6 +186,7 @@ set(PYPTO_SOURCES src/ir/transforms/utils/parent_stmt_analysis.cpp src/ir/transforms/utils/stmt_dependency_analysis.cpp src/ir/transforms/utils/transform_utils.cpp + src/ir/transforms/utils/wrapper_call_utils.cpp src/ir/transforms/visitor.cpp # IR - Reporter diff --git a/include/pypto/ir/transforms/utils/wrapper_call_utils.h b/include/pypto/ir/transforms/utils/wrapper_call_utils.h new file mode 100644 index 000000000..73d89ae15 --- /dev/null +++ b/include/pypto/ir/transforms/utils/wrapper_call_utils.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) PyPTO Contributors. + * This program is free software, you can redistribute it and/or modify it under the terms and conditions of + * CANN Open Software License Agreement Version 2.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + * ----------------------------------------------------------------------------------------------------------- + */ + +#ifndef PYPTO_IR_TRANSFORMS_UTILS_WRAPPER_CALL_UTILS_H_ +#define PYPTO_IR_TRANSFORMS_UTILS_WRAPPER_CALL_UTILS_H_ + +#include +#include + +#include "pypto/ir/expr.h" +#include "pypto/ir/function.h" +#include "pypto/ir/program.h" + +namespace pypto { +namespace ir { + +/** + * @brief Result of a wrapper / inner-call lookup. + * + * Both fields are nullptr if no matching call was found. + */ +struct WrapperCallInfo { + CallPtr inner_call; + FunctionPtr inner_callee; +}; + +/** + * @brief Find the first non-builtin Call inside @p wrapper that resolves to a + * Function in @p program. + * + * "Non-builtin" here means the Call's op is a GlobalVar that names an + * existing user-level Function in the program. Builtin op calls + * (`tile.*`, `tensor.*`, `system.*`) carry no GlobalVar and are skipped. + * + * @return {call, callee} for the first match, or {nullptr, nullptr} if none. + */ +WrapperCallInfo FindFirstInnerCall(const FunctionPtr& wrapper, const ProgramPtr& program); + +/** + * @brief Result of a Group-function callee scan. + * + * - `aic_name` / `aiv_name` — the names of the first AIC / AIV callees + * encountered (empty if none). + * - `inner_call` / `inner_callee` — the **first** AIC, AIV, or InCore call + * in source order, regardless of type. Used by orchestration codegen as + * the parameter-order reference for wrapper arg reconciliation. After + * `ExpandMixedKernel`, Group bodies are emitted as `AIC → AIV` so the + * AIC call is naturally first in practice; the function does not enforce + * a type priority. + */ +struct GroupCalleeInfo { + std::string aic_name; + std::string aiv_name; + CallPtr inner_call; + FunctionPtr inner_callee; +}; + +/** + * @brief Group-specific scan: locate the AIC / AIV callees and the first + * AIC/AIV/InCore inner call inside @p group_func. + * + * @return aggregated info; any field may be empty / nullptr if not present. + */ +GroupCalleeInfo FindGroupCallees(const FunctionPtr& group_func, const ProgramPtr& program); + +/** + * @brief Collect every Call inside @p wrapper that resolves to a Function + * of a non-Orchestration, non-Opaque type. + * + * Used by cross-function direction propagation in `ComputeGroupEffectiveDirections`. + * Visits the body in order; each inner Call appears once even if its callee is + * called from multiple sites. + */ +std::vector CollectInnerCalls(const FunctionPtr& wrapper, const ProgramPtr& program); + +} // namespace ir +} // namespace pypto + +#endif // PYPTO_IR_TRANSFORMS_UTILS_WRAPPER_CALL_UTILS_H_ diff --git a/src/codegen/orchestration/orchestration_analysis.cpp b/src/codegen/orchestration/orchestration_analysis.cpp index 7de74d607..736218454 100644 --- a/src/codegen/orchestration/orchestration_analysis.cpp +++ b/src/codegen/orchestration/orchestration_analysis.cpp @@ -29,6 +29,7 @@ #include "pypto/ir/stmt.h" #include "pypto/ir/transforms/base/visitor.h" #include "pypto/ir/transforms/utils/auto_name_utils.h" +#include "pypto/ir/transforms/utils/wrapper_call_utils.h" #include "pypto/ir/type.h" namespace pypto { @@ -336,35 +337,14 @@ std::vector ComputeGroupEffectiveDirections(const FunctionPtr& g return declared; } - class InnerCallFinder : public IRVisitor { - public: - explicit InnerCallFinder(const ProgramPtr& program) : program_(program) {} - const ProgramPtr& program_; - std::vector> inner_calls; - - protected: - void VisitExpr_(const CallPtr& call) override { - if (auto gv = As(call->op_)) { - auto callee = program_->GetFunction(gv->name_); - if (callee && callee->func_type_ != FunctionType::Orchestration && - callee->func_type_ != FunctionType::Opaque) { - inner_calls.emplace_back(call, callee); - return; - } - } - IRVisitor::VisitExpr_(call); - } - }; - - InnerCallFinder finder(program); - finder.VisitStmt(func->body_); - if (!finder.inner_calls.empty()) { + auto inner_calls = ir::CollectInnerCalls(func, program); + if (!inner_calls.empty()) { std::unordered_map param_to_index; for (size_t i = 0; i < func->params_.size(); ++i) { param_to_index[func->params_[i].get()] = i; } - for (const auto& [inner_call, inner_callee] : finder.inner_calls) { + for (const auto& [inner_call, inner_callee] : inner_calls) { const auto& inner_args = inner_call->args_; std::vector inner_dirs; if (inner_callee->func_type_ == FunctionType::Group || diff --git a/src/codegen/orchestration/orchestration_codegen.cpp b/src/codegen/orchestration/orchestration_codegen.cpp index e928b31e9..2a782797d 100644 --- a/src/codegen/orchestration/orchestration_codegen.cpp +++ b/src/codegen/orchestration/orchestration_codegen.cpp @@ -48,6 +48,7 @@ #include "pypto/ir/transforms/utils/op_predicates.h" #include "pypto/ir/transforms/utils/transform_utils.h" #include "pypto/ir/transforms/utils/var_collectors.h" +#include "pypto/ir/transforms/utils/wrapper_call_utils.h" #include "pypto/ir/type.h" namespace pypto { @@ -56,43 +57,25 @@ namespace codegen { using namespace pypto::ir; // NOLINT(build/namespaces) CoreType InferFunctionCoreType(const FunctionPtr& func) { - if (func->func_type_ == FunctionType::AIC) return CoreType::CUBE; - if (func->func_type_ == FunctionType::AIV) return CoreType::VECTOR; - - class CoreTypeCollector : public IRVisitor { - public: - bool has_cube_ = false; - bool has_vector_ = false; - - void VisitExpr_(const CallPtr& call) override { - for (const auto& arg : call->args_) { - if (auto tile = As(arg->GetType())) { - auto memory_space = tile->GetMemorySpace(); - if (!memory_space.has_value()) { - continue; - } - if (IsCubeMemorySpace(*memory_space)) { - has_cube_ = true; - } else if (*memory_space == MemorySpace::Vec) { - has_vector_ = true; - } - } - } - IRVisitor::VisitExpr_(call); - } - }; - - CoreTypeCollector collector; - collector.VisitStmt(func->body_); - - CHECK(!(collector.has_cube_ && collector.has_vector_)) - << "Function " << func->name_ << " contains both CUBE and VECTOR memory spaces. " - << "A function can only use one core type."; - - if (collector.has_cube_) { - return CoreType::CUBE; + // After ExpandMixedKernel runs (part of every Default / DebugTileOptimization + // pipeline), every InCore function reaching codegen has been split into AIC, + // AIV, or Group / Spmd wrappers. The two callers of this function + // (GenerateFunctionCallCode and GenerateSpmdCallCode) both filter Spmd / + // Group out before invoking it. Tests that bypass the pipeline must declare + // their kernels with the appropriate AIC / AIV type explicitly so codegen + // sees the concrete core type without re-deriving from body memory spaces. + switch (func->func_type_) { + case FunctionType::AIC: + return CoreType::CUBE; + case FunctionType::AIV: + return CoreType::VECTOR; + default: + INTERNAL_UNREACHABLE_SPAN(func->span_) + << "InferFunctionCoreType expects AIC or AIV (Spmd/Group are filtered upstream); got " + << FunctionTypeToString(func->func_type_) << " on function '" << func->name_ + << "'. Either run ExpandMixedKernel before codegen or declare the function " + << "with @pl.function(type=pl.FunctionType.AIC|AIV) directly."; } - return CoreType::VECTOR; } namespace { @@ -899,76 +882,16 @@ class OrchestrationStmtCodegen : public CodegenBase { }; WrapperCallInfo FindWrapperInnerCall(const FunctionPtr& wrapper_func) { - class InnerCallFinder : public IRVisitor { - public: - explicit InnerCallFinder(const ProgramPtr& program) : program_(program) {} - const ProgramPtr& program_; - CallPtr inner_call; - FunctionPtr inner_callee; - - protected: - void VisitExpr_(const CallPtr& call) override { - if (inner_call) return; - if (auto gv = As(call->op_)) { - auto callee = program_->GetFunction(gv->name_); - if (callee) { - inner_call = call; - inner_callee = callee; - return; - } - } - IRVisitor::VisitExpr_(call); - } - }; - - InnerCallFinder finder(program_); - finder.VisitStmt(wrapper_func->body_); - return {std::move(finder.inner_call), std::move(finder.inner_callee)}; + auto info = ir::FindFirstInnerCall(wrapper_func, program_); + return {std::move(info.inner_call), std::move(info.inner_callee)}; } /// Walk the Group function body to find the AIC and AIV callee names /// and the inner InCore call (needed for param reordering). GroupCalleeInfo FindGroupCallees(const FunctionPtr& group_func) { - class CalleeFinder : public IRVisitor { - public: - explicit CalleeFinder(const ProgramPtr& program) : program_(program) {} - const ProgramPtr& program_; - std::string aic_name; - std::string aiv_name; - CallPtr inner_call; - FunctionPtr inner_callee; - - protected: - void VisitExpr_(const CallPtr& call) override { - if (auto gv = As(call->op_)) { - auto callee = program_->GetFunction(gv->name_); - if (callee) { - if (callee->func_type_ == FunctionType::AIC && aic_name.empty()) { - aic_name = callee->name_; - if (!inner_call) { - inner_call = call; - inner_callee = callee; - } - } else if (callee->func_type_ == FunctionType::AIV && aiv_name.empty()) { - aiv_name = callee->name_; - if (!inner_call) { - inner_call = call; - inner_callee = callee; - } - } else if (callee->func_type_ == FunctionType::InCore && !inner_call) { - inner_call = call; - inner_callee = callee; - } - } - } - IRVisitor::VisitExpr_(call); - } - }; - - CalleeFinder finder(program_); - finder.VisitStmt(group_func->body_); - return {std::move(finder.aic_name), std::move(finder.aiv_name), std::move(finder.inner_call), - std::move(finder.inner_callee)}; + auto info = ir::FindGroupCallees(group_func, program_); + return {std::move(info.aic_name), std::move(info.aiv_name), std::move(info.inner_call), + std::move(info.inner_callee)}; } /// Build task params for a wrapper function call, reordered to match the diff --git a/src/ir/transforms/utils/wrapper_call_utils.cpp b/src/ir/transforms/utils/wrapper_call_utils.cpp new file mode 100644 index 000000000..9f291806e --- /dev/null +++ b/src/ir/transforms/utils/wrapper_call_utils.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) PyPTO Contributors. + * This program is free software, you can redistribute it and/or modify it under the terms and conditions of + * CANN Open Software License Agreement Version 2.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + * ----------------------------------------------------------------------------------------------------------- + */ + +#include "pypto/ir/transforms/utils/wrapper_call_utils.h" + +#include +#include +#include + +#include "pypto/ir/kind_traits.h" +#include "pypto/ir/transforms/base/visitor.h" + +namespace pypto { +namespace ir { + +namespace { + +/// Shared scaffold: visit every Call in the body, resolve its op via +/// `GlobalVar` lookup, invoke @p on_match for each resolved (call, callee) +/// pair. Returning `true` from @p on_match terminates the walk early. +class CallVisitor : public IRVisitor { + public: + using OnMatchFn = std::function; + + CallVisitor(const ProgramPtr& program, OnMatchFn on_match) + : program_(program), on_match_(std::move(on_match)) {} + + protected: + void VisitExpr_(const CallPtr& call) override { + if (stop_) return; + if (auto gv = As(call->op_)) { + if (auto callee = program_->GetFunction(gv->name_)) { + if (on_match_(call, callee)) { + stop_ = true; + return; + } + } + } + IRVisitor::VisitExpr_(call); + } + + private: + const ProgramPtr& program_; + OnMatchFn on_match_; + bool stop_ = false; +}; + +} // namespace + +WrapperCallInfo FindFirstInnerCall(const FunctionPtr& wrapper, const ProgramPtr& program) { + WrapperCallInfo info; + if (!wrapper || !wrapper->body_ || !program) return info; + CallVisitor visitor(program, [&](const CallPtr& call, const FunctionPtr& callee) { + info.inner_call = call; + info.inner_callee = callee; + return true; // first match wins; stop the walk + }); + visitor.VisitStmt(wrapper->body_); + return info; +} + +GroupCalleeInfo FindGroupCallees(const FunctionPtr& group_func, const ProgramPtr& program) { + GroupCalleeInfo info; + if (!group_func || !group_func->body_ || !program) return info; + // `aic_name` / `aiv_name` are first-match-per-type. `inner_call` is + // first-match in source order regardless of type — this matches the + // behavior of the original CalleeFinder in orchestration_codegen.cpp + // and is what BuildWrapperReorderedParams expects (the call whose arg + // order it reorders against). Group bodies emitted by ExpandMixedKernel + // place AIC before AIV in source order, so the AIC call wins in practice. + CallVisitor visitor(program, [&](const CallPtr& call, const FunctionPtr& callee) { + if (callee->func_type_ == FunctionType::AIC && info.aic_name.empty()) { + info.aic_name = callee->name_; + if (!info.inner_call) { + info.inner_call = call; + info.inner_callee = callee; + } + } else if (callee->func_type_ == FunctionType::AIV && info.aiv_name.empty()) { + info.aiv_name = callee->name_; + if (!info.inner_call) { + info.inner_call = call; + info.inner_callee = callee; + } + } else if (callee->func_type_ == FunctionType::InCore && !info.inner_call) { + info.inner_call = call; + info.inner_callee = callee; + } + return false; // collect all matches + }); + visitor.VisitStmt(group_func->body_); + return info; +} + +std::vector CollectInnerCalls(const FunctionPtr& wrapper, const ProgramPtr& program) { + std::vector result; + if (!wrapper || !wrapper->body_ || !program) return result; + CallVisitor visitor(program, [&](const CallPtr& call, const FunctionPtr& callee) { + if (callee->func_type_ != FunctionType::Orchestration && callee->func_type_ != FunctionType::Opaque) { + result.push_back({call, callee}); + } + return false; + }); + visitor.VisitStmt(wrapper->body_); + return result; +} + +} // namespace ir +} // namespace pypto diff --git a/tests/ut/codegen/test_orchestration_codegen.py b/tests/ut/codegen/test_orchestration_codegen.py index 0675d5964..c64009e19 100644 --- a/tests/ut/codegen/test_orchestration_codegen.py +++ b/tests/ut/codegen/test_orchestration_codegen.py @@ -80,7 +80,7 @@ def test_basic_structure(self): @pl.program class BasicProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -164,7 +164,7 @@ def test_tensor_read(self): @pl.program class TensorReadProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -203,7 +203,7 @@ def test_config_file(self): @pl.program class ConfigProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -238,7 +238,7 @@ def test_independent_tasks(self): @pl.program class IndependentProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -292,7 +292,7 @@ def test_vector_example_dag(self): @pl.program class VectorExampleProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -305,7 +305,7 @@ def kernel_add( out: pl.Tensor[[16, 16], pl.FP32] = pl.store(result, [0, 0], output) return out - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add_scalar( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -317,7 +317,7 @@ def kernel_add_scalar( out: pl.Tensor[[16, 16], pl.FP32] = pl.store(result, [0, 0], output) return out - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_mul( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -437,7 +437,7 @@ def test_tuple_intermediate(self): @pl.program class TupleIntermediateProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_pair( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -453,7 +453,7 @@ def kernel_pair( rd: pl.Tensor[[16, 16], pl.FP32] = pl.store(d, [0, 0], out_d) return rs, rd - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -502,7 +502,7 @@ def test_tuple_output(self): @pl.program class TupleOutputProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_pair( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -550,7 +550,7 @@ def test_four_element_tuple(self): @pl.program class FourTupleProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def online_update( self, mij: pl.Tensor[[16, 1], pl.FP32], @@ -576,7 +576,7 @@ def online_update( dst_out: pl.Tensor[[16, 16], pl.FP32] = pl.store(dst_tile, [0, 0], dst) return mi_out, li_out, oi_out, dst_out - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -641,7 +641,7 @@ def test_tensor_create(self): @pl.program class TensorCreateProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_fill( self, a: pl.Tensor[[32, 32], pl.FP16], @@ -682,7 +682,7 @@ def test_inplace_tensor(self): @pl.program class InplaceProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def online_update( self, mij: pl.Tensor[[16, 1], pl.FP32], @@ -783,7 +783,7 @@ def test_tensor_dim(self): @pl.program class TensorDimProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -823,7 +823,7 @@ def test_for_loop_with_slice(self): @pl.program class ForViewProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_add( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -1011,7 +1011,7 @@ def test_if_statement(self): @pl.program class IfProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_process( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -1061,7 +1061,7 @@ def test_multiple_tuple_calls(self): @pl.program class MultipleTupleProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_a( self, x: pl.InOut[pl.Tensor[[16, 16], pl.FP32]], @@ -1076,7 +1076,7 @@ def kernel_a( y_out: pl.Tensor[[16, 16], pl.FP32] = pl.store(yt, [0, 0], y) return x_out, y_out - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_b( self, a: pl.InOut[pl.Tensor[[16, 16], pl.FP32]], @@ -1154,7 +1154,7 @@ def test_tuple_in_for_loop(self): @pl.program class TupleForLoopProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_init( self, a: pl.InOut[pl.Tensor[[16, 16], pl.FP32]], @@ -1169,7 +1169,7 @@ def kernel_init( b_out: pl.Tensor[[16, 16], pl.FP32] = pl.store(bt, [0, 0], b) return a_out, b_out - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel_update( self, x: pl.Tensor[[16, 16], pl.FP32], @@ -1445,7 +1445,7 @@ def test_param_with_numeric_suffix(self): @pl.program class NumericSuffixProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel( self, x: pl.InOut[pl.Tensor[[16, 16], pl.FP32]], @@ -1790,7 +1790,7 @@ def test_scalar_taskarg(self): @pl.program class MultiScalarProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -2428,7 +2428,7 @@ def test_alloc_tensor_two_loops_gets_inout(self): @pl.program class TwoLoopAllocProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def task_init( self, x: pl.Tensor[[16, 16], pl.FP32], @@ -2438,7 +2438,7 @@ def task_init( out: pl.Tensor[[16, 16], pl.FP32] = pl.store(t, [0, 0], buf) return out - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def task_compute( self, x: pl.Tensor[[16, 16], pl.FP32], @@ -2448,7 +2448,7 @@ def task_compute( out: pl.Tensor[[16, 16], pl.FP32] = pl.store(t, [0, 0], buf) return out - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def task_read( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -2490,7 +2490,7 @@ def test_external_tensor_keeps_add_output(self): @pl.program class ExternalOutProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kernel( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -2528,7 +2528,7 @@ def test_parallel_loop_local_buf_keeps_add_output(self): @pl.program class SingleParallelProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def task( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -2573,7 +2573,7 @@ def test_two_parallel_loops_promote_only_second(self): @pl.program class TwoParallelProgram: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def task( self, a: pl.Tensor[[16, 16], pl.FP32], @@ -2753,7 +2753,7 @@ def test_form_a_direct_return_no_alias(self): @pl.program class FormA: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kern( self, x: pl.Tensor[[16, 16], pl.FP32], @@ -2782,7 +2782,7 @@ def test_form_b_single_bind_no_alias(self): @pl.program class FormB: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kern( self, x: pl.Tensor[[16, 16], pl.FP32], @@ -2817,7 +2817,7 @@ def test_form_c_chained_alias_drops_no_op(self): @pl.program class FormC: - @pl.function(type=pl.FunctionType.InCore) + @pl.function(type=pl.FunctionType.AIV) def kern( self, x: pl.Tensor[[16, 16], pl.FP32],