Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ptodsl/api/pto.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
TensorType,
TileBufConfig,
TileBufType,
ptr,
__getattr__,
)

Expand All @@ -34,6 +35,7 @@
"int16",
"int32",
"PtrType",
"ptr",
"TensorType",
"SubTensorType",
"TileBufConfig",
Expand Down
23 changes: 21 additions & 2 deletions ptodsl/api/type_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,26 @@ def __getattr__(name):
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")


def PtrType(dtype):
return _pto.PtrType.get(dtype)
def _resolve_address_space(memory_space):
if memory_space is None:
return _pto.AddressSpace.GM
if not isinstance(memory_space, str):
return memory_space
normalized = memory_space.strip().upper()
try:
return getattr(_pto.AddressSpace, normalized)
except AttributeError as exc:
raise ValueError(f"Unsupported memory_space '{memory_space}'.") from exc


def PtrType(dtype, memory_space=None):
if memory_space is None:
return _pto.PtrType.get(dtype)
return _pto.PtrType.get(dtype, _resolve_address_space(memory_space))


def ptr(dtype, *, space=None):
return PtrType(dtype, memory_space=space)


def TensorType(*, rank, dtype):
Expand Down Expand Up @@ -99,6 +117,7 @@ def TileBufType(*, shape, dtype, memory_space, valid_shape=None, config=None):

__all__ = [
"PtrType",
"ptr",
"TensorType",
"SubTensorType",
"TileBufConfig",
Expand Down
22 changes: 20 additions & 2 deletions ptodsl/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,26 @@ def __getattr__(name):
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")


def PtrType(dtype):
return pto.PtrType.get(dtype)
def _resolve_address_space(memory_space):
if memory_space is None:
return pto.AddressSpace.GM
if not isinstance(memory_space, str):
return memory_space
normalized = memory_space.strip().upper()
try:
return getattr(pto.AddressSpace, normalized)
except AttributeError as exc:
raise ValueError(f"Unsupported memory_space '{memory_space}'.") from exc


def PtrType(dtype, memory_space=None):
if memory_space is None:
return pto.PtrType.get(dtype)
return pto.PtrType.get(dtype, _resolve_address_space(memory_space))


def ptr(dtype, *, space=None):
return PtrType(dtype, memory_space=space)


def TensorType(*, rank, dtype):
Expand Down
127 changes: 127 additions & 0 deletions ptodsl/lib/a5/A5_HEADER_COVERAGE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# A5 Header Coverage

- Total A5 headers tracked: `116`
- Implemented: `49`
- Partial: `2`
- Native only: `11`
- Pending: `35`
- Blocked/meta: `19`

| header | status | helper | note |
| --- | --- | --- | --- |
| `MGather` | `pending` | `-` | Memory gather helper is not yet represented in the PTODSL A5 layer. |
| `MScatter` | `pending` | `-` | Memory scatter helper is not yet represented in the PTODSL A5 layer. |
| `TAdd` | `implemented` | `tadd` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TAddS` | `implemented` | `tadds` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TAlias` | `blocked` | `-` | C++ helper/meta header, not a tile micro-instruction kernel surface. |
| `TAnd` | `implemented` | `tand` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TAndS` | `implemented` | `tands` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TAssign` | `blocked` | `-` | C++ helper/meta header, not a tile micro-instruction kernel surface. |
| `TAxpy` | `partial` | `taxpy` | Same-dtype vector-micro path is implemented via vmula; the C++ mixed f32<-f16 variant is still missing. |
| `TBinOp` | `implemented` | `tbinary._binary_tile_vop` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TBinSOp` | `implemented` | `tscalar._scalar_tile_vop` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TCmp` | `native` | `compare` | Still forwarded to the native PTO tile builder; packed predicate tile lowering is not rewritten yet. |
| `TCmps` | `native` | `compare` | Still forwarded to the native PTO tile builder; scalar compare packing is not rewritten yet. |
| `TColArgMax` | `pending` | `-` | Arg-reduction micro lowering is not implemented yet. |
| `TColArgMin` | `pending` | `-` | Arg-reduction micro lowering is not implemented yet. |
| `TColExpand` | `implemented` | `tcol_expand` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColExpandAdd` | `implemented` | `tcol_expand_add` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColExpandBinOp` | `pending` | `-` | Generic binary broadcast frontend is not exposed yet. |
| `TColExpandDiv` | `implemented` | `tcol_expand_div` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColExpandExpdif` | `pending` | `-` | Specialized exp-diff broadcast lowering is not implemented yet. |
| `TColExpandMax` | `implemented` | `tcol_expand_max` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColExpandMin` | `implemented` | `tcol_expand_min` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColExpandMul` | `implemented` | `tcol_expand_mul` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColExpandSub` | `implemented` | `tcol_expand_sub` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColMax` | `implemented` | `tcol_max` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColMin` | `implemented` | `tcol_min` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColProd` | `blocked` | `-` | No column-product micro lowering is wired yet. |
| `TColReduceIdx` | `pending` | `-` | Indexed column reduction is not implemented yet. |
| `TColReduceOps` | `implemented` | `treduce._tcol_reduce` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TColSum` | `implemented` | `tcol_sum` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TConcat` | `native` | `concat` | Still forwarded to the native PTO tile builder, not rewritten to micro ops yet. |
| `TCvt` | `pending` | `-` | Tile conversion helper is not implemented in the A5 micro layer yet. |
| `TDeQuant` | `pending` | `-` | Quantization/dequantization path is not implemented yet. |
| `TDiv` | `implemented` | `tdiv` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TDivS` | `implemented` | `tdivs` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TExpandS` | `pending` | `-` | Scalar expand helper is not implemented yet. |
| `TExtract` | `native` | `extract` | Still forwarded to the native PTO tile builder. |
| `TFMod` | `pending` | `-` | Fmod lowering is not implemented yet. |
| `TFModS` | `pending` | `-` | Scalar fmod lowering is not implemented yet. |
| `TFillPad` | `pending` | `-` | Pad/fill helper is not implemented yet. |
| `TGather` | `partial` | `tgather` | Indexed gather is implemented via vgather2; mask-pattern gather still needs missing vsqz-style micro support. |
| `TGatherB` | `pending` | `-` | GatherB lowering is not implemented yet, even though vgatherb exists in the micro surface. |
| `TGetScaleAddr` | `pending` | `-` | Scale-address helper is not represented in the PTODSL A5 layer. |
| `THistogram` | `pending` | `-` | Histogram lowering is not implemented yet. |
| `TImg2col` | `blocked` | `-` | Hardware layout/state programming path, not a straightforward vector-micro rewrite target. |
| `TInsert` | `native` | `insert` | Still forwarded to the native PTO tile builder. |
| `TLRelu` | `implemented` | `tlrelu` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TLoad` | `native` | `load_tile` | Structural staging helper, not a compute rewrite target. |
| `TMatmul` | `blocked` | `-` | Cube/L0 path is not a pure vector-micro rewrite target. |
| `TMax` | `implemented` | `tmax` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TMaxs` | `implemented` | `tmaxs` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TMin` | `implemented` | `tmin` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TMins` | `implemented` | `tmins` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TMov` | `implemented` | `tmov` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TMrgSort` | `implemented` | `tmrgsort` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TMul` | `implemented` | `tmul` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TMulS` | `implemented` | `tmuls` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TOr` | `implemented` | `tor_` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TOrS` | `implemented` | `tors` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TPack` | `pending` | `-` | Pack lowering is not implemented yet. |
| `TPartAdd` | `pending` | `-` | Part-op lowering is not implemented yet. |
| `TPartBinOps` | `pending` | `-` | Part-op lowering is not implemented yet. |
| `TPartMax` | `pending` | `-` | Part-op lowering is not implemented yet. |
| `TPartMin` | `pending` | `-` | Part-op lowering is not implemented yet. |
| `TPartMul` | `pending` | `-` | Part-op lowering is not implemented yet. |
| `TPop` | `blocked` | `-` | Runtime buffer stack/state helper, not a direct vector tile rewrite target. |
| `TPrefetch` | `blocked` | `-` | Prefetch/runtime helper, not a direct vector tile rewrite target. |
| `TPrelu` | `pending` | `-` | PReLU lowering is not implemented yet. |
| `TPrint` | `native` | `native print` | Still forwarded to the native PTO tile builder. |
| `TPush` | `blocked` | `-` | Runtime buffer stack/state helper, not a direct vector tile rewrite target. |
| `TQuant` | `pending` | `-` | Quantization path is not implemented yet. |
| `TRandom` | `pending` | `-` | Random-number helper is not implemented yet. |
| `TRem` | `pending` | `-` | Remainder lowering is not implemented yet. |
| `TRemS` | `pending` | `-` | Scalar remainder lowering is not implemented yet. |
| `TReshape` | `native` | `native reshape` | View/layout helper, not rewritten in the A5 micro layer. |
| `TRowExpand` | `implemented` | `trow_expand` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TRowExpandAdd` | `implemented` | `trow_expand_add` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TRowExpandBinOp` | `pending` | `-` | Generic row-broadcast binary frontend is not exposed yet. |
| `TRowExpandDiv` | `implemented` | `trow_expand_div` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TRowExpandExpdif` | `pending` | `-` | Specialized exp-diff row-broadcast lowering is not implemented yet. |
| `TRowExpandMax` | `implemented` | `trow_expand_max` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TRowExpandMin` | `implemented` | `trow_expand_min` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TRowExpandMul` | `implemented` | `trow_expand_mul` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TRowExpandSub` | `implemented` | `trow_expand_sub` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TRowProd` | `blocked` | `-` | No row-product micro lowering is wired yet. |
| `TRowReduce` | `implemented` | `treduce._trow_reduce` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TRowReduceIdx` | `pending` | `-` | Indexed row reduction is not implemented yet. |
| `TRsqrt` | `implemented` | `trsqrt` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TScatter` | `pending` | `-` | Scatter lowering is not implemented yet, even though vscatter exists in the micro surface. |
| `TSel` | `pending` | `-` | Packed-mask select lowering is not implemented yet. |
| `TSels` | `pending` | `-` | Scalar/mask select lowering is not implemented yet. |
| `TSetFmatrix` | `blocked` | `-` | Hardware state setup header, not a straightforward vector-micro rewrite target. |
| `TSetImg2colPadding` | `blocked` | `-` | Hardware state setup header, not a straightforward vector-micro rewrite target. |
| `TSetImg2colRpt` | `blocked` | `-` | Hardware state setup header, not a straightforward vector-micro rewrite target. |
| `TShl` | `implemented` | `tshl` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TShlS` | `implemented` | `tshls` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TShr` | `implemented` | `tshr` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TShrS` | `implemented` | `tshrs` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TSort32` | `implemented` | `tsort32` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TStore` | `native` | `store_tile` | Structural staging helper, not a compute rewrite target. |
| `TSub` | `implemented` | `tsub` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TSubS` | `implemented` | `tsubs` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TSubView` | `blocked` | `-` | View helper, not a tile compute rewrite target. |
| `TSync` | `pending` | `-` | Synchronization helper is not represented in the A5 library layer yet. |
| `TTrans` | `native` | `trans` | Still forwarded to the native PTO tile builder. |
| `TTri` | `pending` | `-` | Triangular helper is not implemented yet. |
| `TUnaryOp` | `implemented` | `tunary._unary_tile_vop` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TXor` | `implemented` | `txor` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `TXorS` | `implemented` | `txors` | Rewritten with PTO micro instructions in the PTODSL A5 layer. |
| `Tci` | `native` | `native tci` | Still forwarded to the native PTO tile builder. |
| `common` | `blocked` | `-` | A5 shared infrastructure header. |
| `custom/Div754` | `blocked` | `-` | Custom implementation helper header. |
| `custom/TSyncCVID` | `blocked` | `-` | Custom sync helper header. |
| `custom/TSync_Custom` | `blocked` | `-` | Custom sync helper header. |
| `datatype` | `blocked` | `-` | A5 shared datatype infrastructure header. |
| `utils` | `blocked` | `-` | A5 shared utility infrastructure header. |
33 changes: 26 additions & 7 deletions ptodsl/lib/a5/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,29 @@ The scope of this layout is:

- Small, readable files that show how a tile helper is written from PTO micro
opcodes such as `pto.vlds`, `pto.vadd`, and `pto.vsts`
- A5-flavored aliases such as `TLoad`, `TAdd`, `TMatmul`, and `TStore`
- Canonical tile helper names such as `tadd`, `trow_sum`, and `tgather`, plus
matching A5 header aliases such as `TAdd` where parity with `pto-isa` matters
- Example builder kernels that emit `.pto` through PTODSL
- A checked-in generation flow for reproducible `.pto` artifacts
- A checked-in generation flow for reproducible `.pto` artifacts and HIVM LLVM
sidecars for pure micro kernels

Entry points:

- [`tbinary.py`](./tbinary.py): tile binary helpers such as `tadd`, `tsub`, `tmul`,
`tdiv`, and `tor_`, written with PTO vector micro ops
`tdiv`, `tmax`, `tmin`, and `tor_`, written with PTO vector micro ops
- [`tscalar.py`](./tscalar.py): scalar tile helpers such as `tadds`, `tmaxs`,
`tlrelu`, and `taxpy`
- [`tunary.py`](./tunary.py): tile unary helpers such as `texp`, `tlog`, `trelu`,
`tsqrt`, `trsqrt`, and `trecip`
- [`texpand.py`](./texpand.py): row and column broadcast helpers
- [`texpand.py`](./texpand.py): row and column broadcast helpers, including add/sub/div/mul/max/min variants
- [`treduce.py`](./treduce.py): row and column reduction helpers
- [`tsort.py`](./tsort.py): gather and sort helpers
- [`native.py`](./native.py): helpers that still map directly to tile/cube ops
- [`ops.py`](./ops.py): the public A5 surface that re-exports the split helpers
- [`kernels.py`](./kernels.py): translated example kernels
- [`a5_header_coverage.py`](./a5_header_coverage.py): tracked status for the wider A5 header inventory
- [`kernels.py`](./kernels.py): translated example kernels, including the
no-section `build_hivm_vadd_demo()` flow that lowers through PTOAS VPTO into
`llvm.hivm.*` intrinsics
- [`generated`](./generated): emitted `.pto` artifacts from `scripts/generate_a5_pto.py`

Regenerate the current artifacts with:
Expand All @@ -33,5 +40,17 @@ PYTHONPATH=/Users/zhoubot/github/.llvm-19.1.7/build-mlir-py312/tools/mlir/python
/Users/zhoubot/github/.venv-ptoas-src312/bin/python scripts/generate_a5_pto.py
```

`--emit-cpp` is best-effort: the tile-based kernels lower through local `ptoas`,
while the direct micro-only kernel currently remains `.pto`-only in this environment.
To also emit HIVM LLVM for the pure micro kernels:

```bash
PYTHONPATH=/Users/zhoubot/github/.llvm-19.1.7/build-mlir-py312/tools/mlir/python_packages/mlir_core:/Users/zhoubot/github/pto-org/PTOAS/install-src312:/Users/zhoubot/github/pto-org/PTOAS/build-src312/python \
/Users/zhoubot/github/.venv-ptoas-src312/bin/python scripts/generate_a5_pto.py --emit-hivm-llvm
```

`--emit-cpp` and `--emit-hivm-llvm` are intentionally asymmetric:
- pure micro kernels such as `a5_hivm_vadd_demo` now lower end-to-end through
PTOAS VPTO into `llvm.hivm.*`
- `--emit-hivm-llvm` only writes `.ll` sidecars for kernels listed in
`a5.HIVM_LLVM_KERNELS`
- tensor-view and tile-buffer frontend examples remain useful PTODSL coverage,
but they are not yet the canonical PTOAS VPTO/HIVM path
10 changes: 9 additions & 1 deletion ptodsl/lib/a5/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from . import native, ops, tbinary, texpand, treduce, tsort, tunary
from . import native, ops, tbinary, texpand, treduce, tscalar, tsort, tunary
from .a5_header_coverage import A5_HEADER_COVERAGE, a5_header_coverage_markdown
from .kernels import (
HIVM_LLVM_KERNELS,
KERNEL_BUILDERS,
build_cube_matmul,
build_elementwise_add,
build_hivm_vadd_demo,
build_mxfp8_matmul,
build_templated_elementwise_add,
build_vector_copy,
Expand All @@ -15,10 +18,14 @@
)

__all__ = list(ops.__all__) + [
"A5_HEADER_COVERAGE",
"HIVM_LLVM_KERNELS",
"KERNEL_BUILDERS",
"TILE_MICRO_COVERAGE",
"a5_header_coverage_markdown",
"build_cube_matmul",
"build_elementwise_add",
"build_hivm_vadd_demo",
"build_mxfp8_matmul",
"build_templated_elementwise_add",
"build_vector_copy",
Expand All @@ -28,6 +35,7 @@
"tbinary",
"texpand",
"treduce",
"tscalar",
"tsort",
"tunary",
]
Loading
Loading