Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions .github/workflows/ci_sim.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ jobs:
VPTO_SIM_WORKSPACE: ${{ github.workspace }}/.work/vpto-sim-ci
TILELANG_DSL_WORKSPACE: ${{ github.workspace }}/.work/tilelang-dsl-ci
TILELANG_DSL_UT_WORKSPACE: ${{ github.workspace }}/.work/tilelang-dsl-ut-ci
PYPTO_REF: main
PYPTO_WORKSPACE: ${{ github.workspace }}/.work/pypto-ci
PYPTO_RUN_WORKSPACE: ${{ github.workspace }}/.work/pypto-run-ci
PTO_ISA_COMMIT: 016396b57e2c17093f1194e6acd89bb112b0ab24
PTO_ISA_ROOT: ${{ github.workspace }}/.work/pto-isa-ci
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down Expand Up @@ -202,6 +207,9 @@ jobs:
rm -rf "${PTO_INSTALL_DIR}"
rm -rf "${VPTO_SIM_WORKSPACE}"
rm -rf "${TILELANG_DSL_WORKSPACE}"
rm -rf "${PYPTO_WORKSPACE}"
rm -rf "${PYPTO_RUN_WORKSPACE}"
rm -rf "${PTO_ISA_ROOT}"

- name: Prepare LLVM source
shell: bash
Expand Down Expand Up @@ -295,6 +303,156 @@ jobs:
echo "ASCEND_HOME_PATH=${ASCEND_HOME_PATH_DETECTED}" >> "${GITHUB_ENV}"
echo "PTOAS_BIN=${GITHUB_WORKSPACE}/build/tools/ptoas/ptoas" >> "${GITHUB_ENV}"

- name: Checkout PyPTO
uses: actions/checkout@v4
with:
repository: hw-native-sys/pypto
ref: ${{ env.PYPTO_REF }}
path: ${{ env.PYPTO_WORKSPACE }}
fetch-depth: 1
persist-credentials: false

- name: Checkout PTO-ISA
uses: actions/checkout@v4
with:
repository: hw-native-sys/pto-isa
ref: ${{ env.PTO_ISA_COMMIT }}
path: ${{ env.PTO_ISA_ROOT }}
fetch-depth: 1
persist-credentials: false

- name: Ensure GCC 15 for PyPTO simulator
shell: bash
run: |
set -euo pipefail

compiler_major() {
"$1" -dumpfullversion -dumpversion | cut -d. -f1
}

if command -v g++-15 >/dev/null 2>&1; then
if [[ "$(compiler_major "$(command -v g++-15)")" != "15" ]]; then
echo "ERROR: g++-15 exists but is not GCC 15: $(g++-15 --version | head -1)" >&2
exit 1
fi
if ! command -v gcc-15 >/dev/null 2>&1; then
echo "ERROR: g++-15 exists but gcc-15 is missing." >&2
exit 1
fi
exit 0
fi

if ! command -v x86_64-conda-linux-gnu-g++ >/dev/null 2>&1 || \
! command -v x86_64-conda-linux-gnu-gcc >/dev/null 2>&1 || \
[[ "$(compiler_major "$(command -v x86_64-conda-linux-gnu-g++)")" != "15" ]]; then
if ! command -v conda >/dev/null 2>&1; then
echo "ERROR: PyPTO simulator smoke requires GCC/G++ 15 and conda is unavailable to install it." >&2
exit 1
fi
conda install -y -c conda-forge gcc_linux-64=15 gxx_linux-64=15
hash -r
fi

mkdir -p "${PYPTO_WORKSPACE}/.work/bin"
ln -sf "$(command -v x86_64-conda-linux-gnu-g++)" "${PYPTO_WORKSPACE}/.work/bin/g++-15"
ln -sf "$(command -v x86_64-conda-linux-gnu-gcc)" "${PYPTO_WORKSPACE}/.work/bin/gcc-15"
echo "${PYPTO_WORKSPACE}/.work/bin" >> "${GITHUB_PATH}"

- name: Prepare PyPTO Python dependencies
shell: bash
run: |
set -euo pipefail

python3 -m pip install --upgrade pip
python3 -m pip install --index-url https://download.pytorch.org/whl/cpu torch
python3 -m pip install scikit-build-core nanobind ninja pytest cloudpickle

- name: Install PyPTO frontend and runtime
shell: bash
env:
PTO_ISA_ROOT: ${{ env.PTO_ISA_ROOT }}
run: |
set -euo pipefail
rm -rf "${PYPTO_WORKSPACE}/build" "${PYPTO_WORKSPACE}/_skbuild" "${PYPTO_WORKSPACE}/runtime/build"
python3 -m pip install --no-build-isolation --no-deps "${PYPTO_WORKSPACE}"
# ci_sim only runs PyPTO simulator tests. Keep the runtime install from
# auto-detecting onboard platforms from the runner's CANN environment.
env -u ASCEND_HOME_PATH python3 -m pip install --no-build-isolation --no-deps "${PYPTO_WORKSPACE}/runtime"

- name: Run PyPTO PTOAS end-to-end simulator smoke
shell: bash
env:
PTOAS_ROOT: ${{ github.workspace }}/build/tools/ptoas
PTO_ISA_ROOT: ${{ env.PTO_ISA_ROOT }}
run: |
set -euo pipefail
mkdir -p "${PYPTO_RUN_WORKSPACE}"
cd "${PYPTO_WORKSPACE}"

run_pypto_pytest() {
local platform="$1"
local suite_name="$2"
local kernels_dir="${PYPTO_RUN_WORKSPACE}/${suite_name}_${platform}"
local log_file="${PYPTO_RUN_WORKSPACE}/${suite_name}_${platform}.log"
shift 2

python3 -m pytest "$@" \
-v \
--platform="${platform}" \
--pto-isa-commit="${PTO_ISA_COMMIT}" \
--save-kernels \
--kernels-dir="${kernels_dir}" \
2>&1 | tee "${log_file}"
}

# All suites below compile through PyPTO's PTO backend and run ptoas.
# They are split into separate pytest processes to avoid backend global-state conflicts.
run_pypto_core_smoke() {
local platform="$1"
local dyn_valid_shape="tests/st/runtime/control_flow/test_dyn_orch_shape.py::TestDynOrchShapeOperations::test_dyn_orch_valid_shape_add[shape0-valid_shape0-${platform}]"
local cross_core="tests/st/runtime/cross_core/test_cross_core.py::TestCrossCore::test_tpush_tpop_v2c_updown[${platform}]"
local pypto_smoke_tests=(
tests/st/examples/00_hello_world/test_hello_world.py
tests/st/examples/02_intermediate/test_softmax.py::TestTileSoftmax::test_tile_softmax
tests/st/examples/02_intermediate/test_rms_norm.py::TestRMSNormCore::test_rms_norm_core
tests/st/runtime/ops/test_elementwise.py
tests/st/runtime/ops/test_assemble.py
tests/st/runtime/framework_and_models/test_jit.py::TestJITExecution::test_cache_hit_reuses_compiled_program
tests/st/runtime/framework_and_models/test_jit.py::TestJITDynamicBatch::test_one_artifact_serves_multiple_batches
tests/st/runtime/framework_and_models/test_compiled_program.py::TestManualWorkerExtraction::test_block_dim_override_runs
"${dyn_valid_shape}"
"${cross_core}"
)
run_pypto_pytest "${platform}" core_ptoas "${pypto_smoke_tests[@]}"
}

run_pypto_fa_ptoas_smoke() {
local platform="$1"
local pypto_fa_tests=(
tests/st/runtime/ops/test_cast.py::TestCast::test_tile_cast_col_major_narrow
tests/st/runtime/framework_and_models/test_paged_attention.py::TestPagedAttentionKernels::test_qk_matmul_ptoas[16-128-128]
tests/st/runtime/framework_and_models/test_paged_attention.py::TestPagedAttentionKernels::test_softmax_prepare_ptoas[16-128]
tests/st/runtime/framework_and_models/test_paged_attention.py::TestPagedAttentionKernels::test_softmax_prepare_unaligned_ptoas[16-128-100]
tests/st/runtime/framework_and_models/test_paged_attention.py::TestPagedAttentionKernels::test_pv_matmul_ptoas[16-128-128]
tests/st/runtime/framework_and_models/test_paged_attention.py::TestPagedAttentionKernels::test_online_update_ptoas[16-128-0-1]
)
run_pypto_pytest "${platform}" fa_ptoas "${pypto_fa_tests[@]}"
}

run_pypto_int8_codegen_smoke() {
local platform="$1"
local pypto_int8_tests=(
tests/st/codegen/dsl/test_batch_matmul_pipeline.py::test_no_mat_to_mat_tmov
)
run_pypto_pytest "${platform}" int8_ptoas_codegen "${pypto_int8_tests[@]}"
}

run_pypto_core_smoke a5sim
run_pypto_fa_ptoas_smoke a5sim
run_pypto_core_smoke a2a3sim
run_pypto_fa_ptoas_smoke a2a3sim
run_pypto_int8_codegen_smoke a2a3sim

- name: Run VPTO SIM validation
if: ${{ true }}
shell: bash
Expand All @@ -313,6 +471,8 @@ jobs:
run: |
set -euo pipefail
mkdir -p "${TILELANG_DSL_WORKSPACE}"
export LLVM_BUILD_DIR="${LLVM_DIR}"
export MLIR_PYTHON_ROOT="${MLIR_PYTHONPATH}"
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
ASCEND_HOME_PATH="${ASCEND_HOME_PATH}" \
PTOAS_BIN="${PTOAS_BIN}" \
Expand Down Expand Up @@ -374,3 +534,12 @@ jobs:
${{ env.VPTO_SIM_WORKSPACE }}/parallel-runner.log
${{ env.VPTO_SIM_WORKSPACE }}/parallel-summary.tsv
if-no-files-found: warn

- name: Upload PyPTO SIM logs
if: always()
uses: actions/upload-artifact@v4
with:
name: pypto-sim-smoke-${{ github.run_id }}
path: |
${{ env.PYPTO_RUN_WORKSPACE }}/*.log
if-no-files-found: warn
14 changes: 5 additions & 9 deletions include/PTO/IR/PTO.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ class ScopedPTOParserTargetArch {
};


/// Function attribute that marks an explicit PTO kernel entry.
/// Function attributes that mark an explicit PTO kernel entry.
inline constexpr llvm::StringLiteral kPTOEntryAttrName = "pto.entry";
inline constexpr llvm::StringLiteral kLegacyHACCEntryAttrName = "hacc.entry";
inline constexpr llvm::StringLiteral kPTOKernelAttrName = "pto.kernel";
Expand All @@ -188,13 +188,9 @@ inline constexpr llvm::StringLiteral kPTOSimtMaxThreadsAttrName =
inline constexpr llvm::StringLiteral kPTOSimtMaxRegistersAttrName =
"pto.simt_max_regs";

/// Return true if the operation carries a PTO kernel marker.
bool hasPTOKernelAttr(Operation *op);

/// Return true if the function is a PTO kernel definition.
bool isPTOKernelFunction(func::FuncOp func);

/// Return true if the function carries an explicit entry marker.
/// Return true if the function carries an explicit entry marker. PTO accepts
/// both the EmitC naming (`pto.entry`) and VPTO naming (`pto.kernel`) as entry
/// aliases; `hacc.entry` and `pto.aicore` are legacy aliases.
bool hasExplicitPTOEntryAttr(func::FuncOp func);

/// Return true if the function should be emitted as an AICORE entry.
Expand All @@ -203,7 +199,7 @@ bool isPTOEntryFunction(func::FuncOp func);
/// Validate module-level PTO entry configuration before EmitC lowering.
LogicalResult validatePTOEntryFunctions(ModuleOp module);

/// Materialize the effective PTO entry selection onto function attributes.
/// Clear internal PTO entry selection markers from function attributes.
void annotatePTOEntryFunctions(ModuleOp module);

} // namespace pto
Expand Down
52 changes: 5 additions & 47 deletions lib/PTO/IR/PTO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2511,47 +2511,20 @@ bool mlir::pto::isScalarPtrOrMemRef(Type type) {
return false;
}

bool mlir::pto::hasPTOKernelAttr(Operation *op) {
return op && (op->hasAttr(kPTOKernelAttrName) ||
op->hasAttr(kLegacyPTOAICoreAttrName));
}

bool mlir::pto::isPTOKernelFunction(func::FuncOp func) {
return func && !func.isDeclaration() && hasPTOKernelAttr(func.getOperation());
}

bool mlir::pto::hasExplicitPTOEntryAttr(func::FuncOp func) {
return func && (func->hasAttrOfType<UnitAttr>(kPTOEntryAttrName) ||
func->hasAttrOfType<UnitAttr>(kLegacyHACCEntryAttrName));
func->hasAttrOfType<UnitAttr>(kLegacyHACCEntryAttrName) ||
func->hasAttrOfType<UnitAttr>(kPTOKernelAttrName) ||
func->hasAttrOfType<UnitAttr>(kLegacyPTOAICoreAttrName));
}

static constexpr StringLiteral kEffectivePTOEntryAttrName =
"pto.internal.entry";

static SmallVector<func::FuncOp> getPTOFunctionDefinitions(ModuleOp module) {
SmallVector<func::FuncOp> defs;
if (!module)
return defs;
for (auto func : module.getOps<func::FuncOp>()) {
if (!func.isDeclaration())
defs.push_back(func);
}
return defs;
}

bool mlir::pto::isPTOEntryFunction(func::FuncOp func) {
if (!func || func.isDeclaration())
return false;
if (auto attr = func->getAttrOfType<BoolAttr>(kEffectivePTOEntryAttrName))
return attr.getValue();
if (hasExplicitPTOEntryAttr(func))
return true;

ModuleOp module = func->getParentOfType<ModuleOp>();
if (!module)
return false;
SmallVector<func::FuncOp> defs = getPTOFunctionDefinitions(module);
return defs.size() == 1 && defs.front() == func;
return hasExplicitPTOEntryAttr(func);
}

LogicalResult mlir::pto::validatePTOEntryFunctions(ModuleOp module) {
Expand All @@ -2569,7 +2542,7 @@ LogicalResult mlir::pto::validatePTOEntryFunctions(ModuleOp module) {
}

for (auto func : module.getOps<func::FuncOp>()) {
if (!isPTOEntryFunction(func))
if (!hasExplicitPTOEntryAttr(func))
continue;
if (func.getFunctionType().getNumResults() != 0) {
return func.emitOpError()
Expand All @@ -2583,23 +2556,8 @@ void mlir::pto::annotatePTOEntryFunctions(ModuleOp module) {
if (!module)
return;

SmallVector<func::FuncOp> defs = getPTOFunctionDefinitions(module);
for (auto func : module.getOps<func::FuncOp>())
func->removeAttr(kEffectivePTOEntryAttrName);

if (defs.empty())
return;
if (defs.size() == 1) {
defs.front()->setAttr(kEffectivePTOEntryAttrName,
BoolAttr::get(module.getContext(), true));
return;
}

for (auto func : defs) {
func->setAttr(kEffectivePTOEntryAttrName,
BoolAttr::get(module.getContext(),
hasExplicitPTOEntryAttr(func)));
}
}

//===----------------------------------------------------------------------===//
Expand Down
6 changes: 2 additions & 4 deletions lib/PTO/Transforms/VPTOCANN900LLVMEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3689,9 +3689,7 @@ static FailureOr<VcvtContract> buildVcvtContract(pto::VcvtOp op) {
}

static bool needsV300CtrlModeForVPTOFunc(func::FuncOp funcOp) {
if ((!pto::isPTOEntryFunction(funcOp) &&
!pto::isPTOKernelFunction(funcOp)) ||
funcOp.getBlocks().empty())
if (!pto::isPTOEntryFunction(funcOp) || funcOp.getBlocks().empty())
return false;

bool needsCtrlSetup = false;
Expand Down Expand Up @@ -9958,7 +9956,7 @@ static LogicalResult renameKernelFunctionsForKernelKind(ModuleOp module,
}

for (func::FuncOp funcOp : module.getOps<func::FuncOp>()) {
if (!pto::hasPTOKernelAttr(funcOp.getOperation()))
if (!pto::hasExplicitPTOEntryAttr(funcOp))
continue;
if (funcOp.getSymName().ends_with(suffix))
continue;
Expand Down
6 changes: 2 additions & 4 deletions lib/PTO/Transforms/VPTOLLVMEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3660,9 +3660,7 @@ static FailureOr<VcvtContract> buildVcvtContract(pto::VcvtOp op) {
}

static bool needsV300CtrlModeForVPTOFunc(func::FuncOp funcOp) {
if ((!pto::isPTOEntryFunction(funcOp) &&
!pto::isPTOKernelFunction(funcOp)) ||
funcOp.getBlocks().empty())
if (!pto::isPTOEntryFunction(funcOp) || funcOp.getBlocks().empty())
return false;

bool needsCtrlSetup = false;
Expand Down Expand Up @@ -9988,7 +9986,7 @@ static LogicalResult renameKernelFunctionsForKernelKind(ModuleOp module,
}

for (func::FuncOp funcOp : module.getOps<func::FuncOp>()) {
if (!pto::hasPTOKernelAttr(funcOp.getOperation()))
if (!pto::hasExplicitPTOEntryAttr(funcOp))
continue;
if (funcOp.getSymName().ends_with(suffix))
continue;
Expand Down
Loading
Loading