diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6d7f61f59..3818cbb5a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -186,6 +186,7 @@ set(PYPTO_SOURCES
     src/ir/transforms/utils/parent_stmt_analysis.cpp
     src/ir/transforms/utils/stmt_dependency_analysis.cpp
     src/ir/transforms/utils/transform_utils.cpp
+    src/ir/transforms/utils/wrapper_call_utils.cpp
     src/ir/transforms/visitor.cpp
 
     # IR - Reporter
diff --git a/include/pypto/ir/transforms/utils/wrapper_call_utils.h b/include/pypto/ir/transforms/utils/wrapper_call_utils.h
new file mode 100644
index 000000000..73d89ae15
--- /dev/null
+++ b/include/pypto/ir/transforms/utils/wrapper_call_utils.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) PyPTO Contributors.
+ * This program is free software, you can redistribute it and/or modify it under the terms and conditions of
+ * CANN Open Software License Agreement Version 2.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ * -----------------------------------------------------------------------------------------------------------
+ */
+
+#ifndef PYPTO_IR_TRANSFORMS_UTILS_WRAPPER_CALL_UTILS_H_
+#define PYPTO_IR_TRANSFORMS_UTILS_WRAPPER_CALL_UTILS_H_
+
+#include <string>
+#include <vector>
+
+#include "pypto/ir/expr.h"
+#include "pypto/ir/function.h"
+#include "pypto/ir/program.h"
+
+namespace pypto {
+namespace ir {
+
+/**
+ * @brief Result of a wrapper / inner-call lookup.
+ *
+ * Both fields are nullptr if no matching call was found.
+ */
+struct WrapperCallInfo {
+  CallPtr inner_call;
+  FunctionPtr inner_callee;
+};
+
+/**
+ * @brief Find the first non-builtin Call inside @p wrapper that resolves to a
+ *        Function in @p program.
+ *
+ * "Non-builtin" here means the Call's op is a GlobalVar that names an
+ * existing user-level Function in the program. Builtin op calls
+ * (`tile.*`, `tensor.*`, `system.*`) carry no GlobalVar and are skipped.
+ *
+ * @return {call, callee} for the first match, or {nullptr, nullptr} if none.
+ */
+WrapperCallInfo FindFirstInnerCall(const FunctionPtr& wrapper, const ProgramPtr& program);
+
+/**
+ * @brief Result of a Group-function callee scan.
+ *
+ * - `aic_name` / `aiv_name` — the names of the first AIC / AIV callees
+ *   encountered (empty if none).
+ * - `inner_call` / `inner_callee` — the **first** AIC, AIV, or InCore call
+ *   in source order, regardless of type. Used by orchestration codegen as
+ *   the parameter-order reference for wrapper arg reconciliation. After
+ *   `ExpandMixedKernel`, Group bodies are emitted as `AIC → AIV` so the
+ *   AIC call is naturally first in practice; the function does not enforce
+ *   a type priority.
+ */
+struct GroupCalleeInfo {
+  std::string aic_name;
+  std::string aiv_name;
+  CallPtr inner_call;
+  FunctionPtr inner_callee;
+};
+
+/**
+ * @brief Group-specific scan: locate the AIC / AIV callees and the first
+ *        AIC/AIV/InCore inner call inside @p group_func.
+ *
+ * @return aggregated info; any field may be empty / nullptr if not present.
+ */
+GroupCalleeInfo FindGroupCallees(const FunctionPtr& group_func, const ProgramPtr& program);
+
+/**
+ * @brief Collect every Call inside @p wrapper that resolves to a Function
+ *        of a non-Orchestration, non-Opaque type.
+ *
+ * Used by cross-function direction propagation in `ComputeGroupEffectiveDirections`.
+ * Visits the body in order; each inner Call appears once even if its callee is
+ * called from multiple sites.
+ */
+std::vector<WrapperCallInfo> CollectInnerCalls(const FunctionPtr& wrapper, const ProgramPtr& program);
+
+}  // namespace ir
+}  // namespace pypto
+
+#endif  // PYPTO_IR_TRANSFORMS_UTILS_WRAPPER_CALL_UTILS_H_
diff --git a/src/codegen/orchestration/orchestration_analysis.cpp b/src/codegen/orchestration/orchestration_analysis.cpp
index 7de74d607..736218454 100644
--- a/src/codegen/orchestration/orchestration_analysis.cpp
+++ b/src/codegen/orchestration/orchestration_analysis.cpp
@@ -29,6 +29,7 @@
 #include "pypto/ir/stmt.h"
 #include "pypto/ir/transforms/base/visitor.h"
 #include "pypto/ir/transforms/utils/auto_name_utils.h"
+#include "pypto/ir/transforms/utils/wrapper_call_utils.h"
 #include "pypto/ir/type.h"
 
 namespace pypto {
@@ -336,35 +337,14 @@ std::vector<ParamDirection> ComputeGroupEffectiveDirections(const FunctionPtr& g
       return declared;
     }
 
-    class InnerCallFinder : public IRVisitor {
-     public:
-      explicit InnerCallFinder(const ProgramPtr& program) : program_(program) {}
-      const ProgramPtr& program_;
-      std::vector<std::pair<CallPtr, FunctionPtr>> inner_calls;
-
-     protected:
-      void VisitExpr_(const CallPtr& call) override {
-        if (auto gv = As<GlobalVar>(call->op_)) {
-          auto callee = program_->GetFunction(gv->name_);
-          if (callee && callee->func_type_ != FunctionType::Orchestration &&
-              callee->func_type_ != FunctionType::Opaque) {
-            inner_calls.emplace_back(call, callee);
-            return;
-          }
-        }
-        IRVisitor::VisitExpr_(call);
-      }
-    };
-
-    InnerCallFinder finder(program);
-    finder.VisitStmt(func->body_);
-    if (!finder.inner_calls.empty()) {
+    auto inner_calls = ir::CollectInnerCalls(func, program);
+    if (!inner_calls.empty()) {
       std::unordered_map<const Var*, size_t> param_to_index;
       for (size_t i = 0; i < func->params_.size(); ++i) {
         param_to_index[func->params_[i].get()] = i;
       }
 
-      for (const auto& [inner_call, inner_callee] : finder.inner_calls) {
+      for (const auto& [inner_call, inner_callee] : inner_calls) {
         const auto& inner_args = inner_call->args_;
         std::vector<ParamDirection> inner_dirs;
         if (inner_callee->func_type_ == FunctionType::Group ||
diff --git a/src/codegen/orchestration/orchestration_codegen.cpp b/src/codegen/orchestration/orchestration_codegen.cpp
index e928b31e9..2a782797d 100644
--- a/src/codegen/orchestration/orchestration_codegen.cpp
+++ b/src/codegen/orchestration/orchestration_codegen.cpp
@@ -48,6 +48,7 @@
 #include "pypto/ir/transforms/utils/op_predicates.h"
 #include "pypto/ir/transforms/utils/transform_utils.h"
 #include "pypto/ir/transforms/utils/var_collectors.h"
+#include "pypto/ir/transforms/utils/wrapper_call_utils.h"
 #include "pypto/ir/type.h"
 
 namespace pypto {
@@ -56,43 +57,25 @@ namespace codegen {
 using namespace pypto::ir;  // NOLINT(build/namespaces)
 
 CoreType InferFunctionCoreType(const FunctionPtr& func) {
-  if (func->func_type_ == FunctionType::AIC) return CoreType::CUBE;
-  if (func->func_type_ == FunctionType::AIV) return CoreType::VECTOR;
-
-  class CoreTypeCollector : public IRVisitor {
-   public:
-    bool has_cube_ = false;
-    bool has_vector_ = false;
-
-    void VisitExpr_(const CallPtr& call) override {
-      for (const auto& arg : call->args_) {
-        if (auto tile = As<TileType>(arg->GetType())) {
-          auto memory_space = tile->GetMemorySpace();
-          if (!memory_space.has_value()) {
-            continue;
-          }
-          if (IsCubeMemorySpace(*memory_space)) {
-            has_cube_ = true;
-          } else if (*memory_space == MemorySpace::Vec) {
-            has_vector_ = true;
-          }
-        }
-      }
-      IRVisitor::VisitExpr_(call);
-    }
-  };
-
-  CoreTypeCollector collector;
-  collector.VisitStmt(func->body_);
-
-  CHECK(!(collector.has_cube_ && collector.has_vector_))
-      << "Function " << func->name_ << " contains both CUBE and VECTOR memory spaces. "
-      << "A function can only use one core type.";
-
-  if (collector.has_cube_) {
-    return CoreType::CUBE;
+  // After ExpandMixedKernel runs (part of every Default / DebugTileOptimization
+  // pipeline), every InCore function reaching codegen has been split into AIC,
+  // AIV, or Group / Spmd wrappers. The two callers of this function
+  // (GenerateFunctionCallCode and GenerateSpmdCallCode) both filter Spmd /
+  // Group out before invoking it. Tests that bypass the pipeline must declare
+  // their kernels with the appropriate AIC / AIV type explicitly so codegen
+  // sees the concrete core type without re-deriving from body memory spaces.
+  switch (func->func_type_) {
+    case FunctionType::AIC:
+      return CoreType::CUBE;
+    case FunctionType::AIV:
+      return CoreType::VECTOR;
+    default:
+      INTERNAL_UNREACHABLE_SPAN(func->span_)
+          << "InferFunctionCoreType expects AIC or AIV (Spmd/Group are filtered upstream); got "
+          << FunctionTypeToString(func->func_type_) << " on function '" << func->name_
+          << "'. Either run ExpandMixedKernel before codegen or declare the function "
+          << "with @pl.function(type=pl.FunctionType.AIC|AIV) directly.";
   }
-  return CoreType::VECTOR;
 }
 
 namespace {
@@ -899,76 +882,16 @@ class OrchestrationStmtCodegen : public CodegenBase {
   };
 
   WrapperCallInfo FindWrapperInnerCall(const FunctionPtr& wrapper_func) {
-    class InnerCallFinder : public IRVisitor {
-     public:
-      explicit InnerCallFinder(const ProgramPtr& program) : program_(program) {}
-      const ProgramPtr& program_;
-      CallPtr inner_call;
-      FunctionPtr inner_callee;
-
-     protected:
-      void VisitExpr_(const CallPtr& call) override {
-        if (inner_call) return;
-        if (auto gv = As<GlobalVar>(call->op_)) {
-          auto callee = program_->GetFunction(gv->name_);
-          if (callee) {
-            inner_call = call;
-            inner_callee = callee;
-            return;
-          }
-        }
-        IRVisitor::VisitExpr_(call);
-      }
-    };
-
-    InnerCallFinder finder(program_);
-    finder.VisitStmt(wrapper_func->body_);
-    return {std::move(finder.inner_call), std::move(finder.inner_callee)};
+    auto info = ir::FindFirstInnerCall(wrapper_func, program_);
+    return {std::move(info.inner_call), std::move(info.inner_callee)};
   }
 
   /// Walk the Group function body to find the AIC and AIV callee names
   /// and the inner InCore call (needed for param reordering).
   GroupCalleeInfo FindGroupCallees(const FunctionPtr& group_func) {
-    class CalleeFinder : public IRVisitor {
-     public:
-      explicit CalleeFinder(const ProgramPtr& program) : program_(program) {}
-      const ProgramPtr& program_;
-      std::string aic_name;
-      std::string aiv_name;
-      CallPtr inner_call;
-      FunctionPtr inner_callee;
-
-     protected:
-      void VisitExpr_(const CallPtr& call) override {
-        if (auto gv = As<GlobalVar>(call->op_)) {
-          auto callee = program_->GetFunction(gv->name_);
-          if (callee) {
-            if (callee->func_type_ == FunctionType::AIC && aic_name.empty()) {
-              aic_name = callee->name_;
-              if (!inner_call) {
-                inner_call = call;
-                inner_callee = callee;
-              }
-            } else if (callee->func_type_ == FunctionType::AIV && aiv_name.empty()) {
-              aiv_name = callee->name_;
-              if (!inner_call) {
-                inner_call = call;
-                inner_callee = callee;
-              }
-            } else if (callee->func_type_ == FunctionType::InCore && !inner_call) {
-              inner_call = call;
-              inner_callee = callee;
-            }
-          }
-        }
-        IRVisitor::VisitExpr_(call);
-      }
-    };
-
-    CalleeFinder finder(program_);
-    finder.VisitStmt(group_func->body_);
-    return {std::move(finder.aic_name), std::move(finder.aiv_name), std::move(finder.inner_call),
-            std::move(finder.inner_callee)};
+    auto info = ir::FindGroupCallees(group_func, program_);
+    return {std::move(info.aic_name), std::move(info.aiv_name), std::move(info.inner_call),
+            std::move(info.inner_callee)};
   }
 
   /// Build task params for a wrapper function call, reordered to match the
diff --git a/src/ir/transforms/utils/wrapper_call_utils.cpp b/src/ir/transforms/utils/wrapper_call_utils.cpp
new file mode 100644
index 000000000..9f291806e
--- /dev/null
+++ b/src/ir/transforms/utils/wrapper_call_utils.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) PyPTO Contributors.
+ * This program is free software, you can redistribute it and/or modify it under the terms and conditions of
+ * CANN Open Software License Agreement Version 2.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ * -----------------------------------------------------------------------------------------------------------
+ */
+
+#include "pypto/ir/transforms/utils/wrapper_call_utils.h"
+
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "pypto/ir/kind_traits.h"
+#include "pypto/ir/transforms/base/visitor.h"
+
+namespace pypto {
+namespace ir {
+
+namespace {
+
+/// Shared scaffold: visit every Call in the body, resolve its op via
+/// `GlobalVar` lookup, invoke @p on_match for each resolved (call, callee)
+/// pair. Returning `true` from @p on_match terminates the walk early.
+class CallVisitor : public IRVisitor {
+ public:
+  using OnMatchFn = std::function<bool(const CallPtr&, const FunctionPtr&)>;
+
+  CallVisitor(const ProgramPtr& program, OnMatchFn on_match)
+      : program_(program), on_match_(std::move(on_match)) {}
+
+ protected:
+  void VisitExpr_(const CallPtr& call) override {
+    if (stop_) return;
+    if (auto gv = As<GlobalVar>(call->op_)) {
+      if (auto callee = program_->GetFunction(gv->name_)) {
+        if (on_match_(call, callee)) {
+          stop_ = true;
+          return;
+        }
+      }
+    }
+    IRVisitor::VisitExpr_(call);
+  }
+
+ private:
+  const ProgramPtr& program_;
+  OnMatchFn on_match_;
+  bool stop_ = false;
+};
+
+}  // namespace
+
+WrapperCallInfo FindFirstInnerCall(const FunctionPtr& wrapper, const ProgramPtr& program) {
+  WrapperCallInfo info;
+  if (!wrapper || !wrapper->body_ || !program) return info;
+  CallVisitor visitor(program, [&](const CallPtr& call, const FunctionPtr& callee) {
+    info.inner_call = call;
+    info.inner_callee = callee;
+    return true;  // first match wins; stop the walk
+  });
+  visitor.VisitStmt(wrapper->body_);
+  return info;
+}
+
+GroupCalleeInfo FindGroupCallees(const FunctionPtr& group_func, const ProgramPtr& program) {
+  GroupCalleeInfo info;
+  if (!group_func || !group_func->body_ || !program) return info;
+  // `aic_name` / `aiv_name` are first-match-per-type. `inner_call` is
+  // first-match in source order regardless of type — this matches the
+  // behavior of the original CalleeFinder in orchestration_codegen.cpp
+  // and is what BuildWrapperReorderedParams expects (the call whose arg
+  // order it reorders against). Group bodies emitted by ExpandMixedKernel
+  // place AIC before AIV in source order, so the AIC call wins in practice.
+  CallVisitor visitor(program, [&](const CallPtr& call, const FunctionPtr& callee) {
+    if (callee->func_type_ == FunctionType::AIC && info.aic_name.empty()) {
+      info.aic_name = callee->name_;
+      if (!info.inner_call) {
+        info.inner_call = call;
+        info.inner_callee = callee;
+      }
+    } else if (callee->func_type_ == FunctionType::AIV && info.aiv_name.empty()) {
+      info.aiv_name = callee->name_;
+      if (!info.inner_call) {
+        info.inner_call = call;
+        info.inner_callee = callee;
+      }
+    } else if (callee->func_type_ == FunctionType::InCore && !info.inner_call) {
+      info.inner_call = call;
+      info.inner_callee = callee;
+    }
+    return false;  // collect all matches
+  });
+  visitor.VisitStmt(group_func->body_);
+  return info;
+}
+
+std::vector<WrapperCallInfo> CollectInnerCalls(const FunctionPtr& wrapper, const ProgramPtr& program) {
+  std::vector<WrapperCallInfo> result;
+  if (!wrapper || !wrapper->body_ || !program) return result;
+  CallVisitor visitor(program, [&](const CallPtr& call, const FunctionPtr& callee) {
+    if (callee->func_type_ != FunctionType::Orchestration && callee->func_type_ != FunctionType::Opaque) {
+      result.push_back({call, callee});
+    }
+    return false;
+  });
+  visitor.VisitStmt(wrapper->body_);
+  return result;
+}
+
+}  // namespace ir
+}  // namespace pypto
diff --git a/tests/ut/codegen/test_orchestration_codegen.py b/tests/ut/codegen/test_orchestration_codegen.py
index 0675d5964..c64009e19 100644
--- a/tests/ut/codegen/test_orchestration_codegen.py
+++ b/tests/ut/codegen/test_orchestration_codegen.py
@@ -80,7 +80,7 @@ def test_basic_structure(self):
 
         @pl.program
         class BasicProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -164,7 +164,7 @@ def test_tensor_read(self):
 
         @pl.program
         class TensorReadProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -203,7 +203,7 @@ def test_config_file(self):
 
         @pl.program
         class ConfigProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -238,7 +238,7 @@ def test_independent_tasks(self):
 
         @pl.program
         class IndependentProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -292,7 +292,7 @@ def test_vector_example_dag(self):
 
         @pl.program
         class VectorExampleProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -305,7 +305,7 @@ def kernel_add(
                 out: pl.Tensor[[16, 16], pl.FP32] = pl.store(result, [0, 0], output)
                 return out
 
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add_scalar(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -317,7 +317,7 @@ def kernel_add_scalar(
                 out: pl.Tensor[[16, 16], pl.FP32] = pl.store(result, [0, 0], output)
                 return out
 
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_mul(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -437,7 +437,7 @@ def test_tuple_intermediate(self):
 
         @pl.program
         class TupleIntermediateProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_pair(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -453,7 +453,7 @@ def kernel_pair(
                 rd: pl.Tensor[[16, 16], pl.FP32] = pl.store(d, [0, 0], out_d)
                 return rs, rd
 
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -502,7 +502,7 @@ def test_tuple_output(self):
 
         @pl.program
         class TupleOutputProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_pair(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -550,7 +550,7 @@ def test_four_element_tuple(self):
 
         @pl.program
         class FourTupleProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def online_update(
                 self,
                 mij: pl.Tensor[[16, 1], pl.FP32],
@@ -576,7 +576,7 @@ def online_update(
                 dst_out: pl.Tensor[[16, 16], pl.FP32] = pl.store(dst_tile, [0, 0], dst)
                 return mi_out, li_out, oi_out, dst_out
 
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -641,7 +641,7 @@ def test_tensor_create(self):
 
         @pl.program
         class TensorCreateProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_fill(
                 self,
                 a: pl.Tensor[[32, 32], pl.FP16],
@@ -682,7 +682,7 @@ def test_inplace_tensor(self):
 
         @pl.program
         class InplaceProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def online_update(
                 self,
                 mij: pl.Tensor[[16, 1], pl.FP32],
@@ -783,7 +783,7 @@ def test_tensor_dim(self):
 
         @pl.program
         class TensorDimProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -823,7 +823,7 @@ def test_for_loop_with_slice(self):
 
         @pl.program
         class ForViewProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_add(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -1011,7 +1011,7 @@ def test_if_statement(self):
 
         @pl.program
         class IfProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_process(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -1061,7 +1061,7 @@ def test_multiple_tuple_calls(self):
 
         @pl.program
         class MultipleTupleProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_a(
                 self,
                 x: pl.InOut[pl.Tensor[[16, 16], pl.FP32]],
@@ -1076,7 +1076,7 @@ def kernel_a(
                 y_out: pl.Tensor[[16, 16], pl.FP32] = pl.store(yt, [0, 0], y)
                 return x_out, y_out
 
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_b(
                 self,
                 a: pl.InOut[pl.Tensor[[16, 16], pl.FP32]],
@@ -1154,7 +1154,7 @@ def test_tuple_in_for_loop(self):
 
         @pl.program
         class TupleForLoopProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_init(
                 self,
                 a: pl.InOut[pl.Tensor[[16, 16], pl.FP32]],
@@ -1169,7 +1169,7 @@ def kernel_init(
                 b_out: pl.Tensor[[16, 16], pl.FP32] = pl.store(bt, [0, 0], b)
                 return a_out, b_out
 
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel_update(
                 self,
                 x: pl.Tensor[[16, 16], pl.FP32],
@@ -1445,7 +1445,7 @@ def test_param_with_numeric_suffix(self):
 
         @pl.program
         class NumericSuffixProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel(
                 self,
                 x: pl.InOut[pl.Tensor[[16, 16], pl.FP32]],
@@ -1790,7 +1790,7 @@ def test_scalar_taskarg(self):
 
         @pl.program
         class MultiScalarProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -2428,7 +2428,7 @@ def test_alloc_tensor_two_loops_gets_inout(self):
 
         @pl.program
         class TwoLoopAllocProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def task_init(
                 self,
                 x: pl.Tensor[[16, 16], pl.FP32],
@@ -2438,7 +2438,7 @@ def task_init(
                 out: pl.Tensor[[16, 16], pl.FP32] = pl.store(t, [0, 0], buf)
                 return out
 
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def task_compute(
                 self,
                 x: pl.Tensor[[16, 16], pl.FP32],
@@ -2448,7 +2448,7 @@ def task_compute(
                 out: pl.Tensor[[16, 16], pl.FP32] = pl.store(t, [0, 0], buf)
                 return out
 
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def task_read(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -2490,7 +2490,7 @@ def test_external_tensor_keeps_add_output(self):
 
         @pl.program
         class ExternalOutProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kernel(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -2528,7 +2528,7 @@ def test_parallel_loop_local_buf_keeps_add_output(self):
 
         @pl.program
         class SingleParallelProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def task(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -2573,7 +2573,7 @@ def test_two_parallel_loops_promote_only_second(self):
 
         @pl.program
         class TwoParallelProgram:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def task(
                 self,
                 a: pl.Tensor[[16, 16], pl.FP32],
@@ -2753,7 +2753,7 @@ def test_form_a_direct_return_no_alias(self):
 
         @pl.program
         class FormA:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kern(
                 self,
                 x: pl.Tensor[[16, 16], pl.FP32],
@@ -2782,7 +2782,7 @@ def test_form_b_single_bind_no_alias(self):
 
         @pl.program
         class FormB:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kern(
                 self,
                 x: pl.Tensor[[16, 16], pl.FP32],
@@ -2817,7 +2817,7 @@ def test_form_c_chained_alias_drops_no_op(self):
 
         @pl.program
         class FormC:
-            @pl.function(type=pl.FunctionType.InCore)
+            @pl.function(type=pl.FunctionType.AIV)
             def kern(
                 self,
                 x: pl.Tensor[[16, 16], pl.FP32],