Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
711 changes: 711 additions & 0 deletions docs/designs/ptoas-graph-sync-solver-buf-id-design.md

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions include/PTO/IR/PTOSyncUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ PIPE mapSyncOpTypeToPipe(SyncOpType opType);
/// True if the pipe is a concrete endpoint pipe (not PIPE_ALL/UNASSIGNED).
bool isConcreteSyncPipe(PIPE pipe);

/// Pick a canonical SyncOpType endpoint for a concrete PIPE. This is the
/// inverse of mapSyncOpTypeToPipe up to the fact that mapSyncOpTypeToPipe is
/// many-to-one: e.g. both TVEC and TMOV_M2V map to PIPE_V. The canonical
/// SyncOpType returned here is sufficient for the EmitC pattern matchers,
/// which only care that the high-level attr maps back to the same concrete
/// PIPE. Returns failure() for non-concrete pipes (PIPE_ALL/UNASSIGNED).
FailureOr<SyncOpType> mapPipeToCanonicalSyncOpType(PIPE pipe);

} // namespace pto
} // namespace mlir

Expand Down
47 changes: 47 additions & 0 deletions include/PTO/Transforms/GraphSyncSolver/SyncSolverIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ enum struct OpType {
SET_FLAG_OP,
WAIT_FLAG_OP,
SW_FLAG_OP_END,
BUF_OP,
GET_BUF_OP,
RLS_BUF_OP,
BUF_OP_END,
SYNC_OP_END,
RW_OPERATION,
MMAD_OPERATION,
Expand Down Expand Up @@ -503,6 +507,49 @@ class BarrierOp : public SyncOp {
std::string str(int indent, bool recursive) const override;
};

// A5-only buffer-id bracket op (get_buf / rls_buf). Unlike SetWaitOp which
// names both src and dst pipes, a BufOp lives on a single concrete pipe — the
// shared bufId on producer and consumer brackets is what enforces ordering.
class BufOp : public SyncOp {
public:
pto::PIPE pipe{pto::PIPE::PIPE_UNASSIGNED};
int64_t bufId{-1};

BufOp(OpType opType, Operation *op, OperationBase *parentOp, pto::PIPE pipe,
int64_t bufId)
: SyncOp(opType, op, parentOp), pipe(pipe), bufId(bufId) {}

static bool classof(const OperationBase *e) {
return e->opType >= OpType::BUF_OP && e->opType < OpType::BUF_OP_END;
}
};

class GetBufOp : public BufOp {
public:
GetBufOp(Operation *op, OperationBase *parentOp, pto::PIPE pipe,
int64_t bufId)
: BufOp(OpType::GET_BUF_OP, op, parentOp, pipe, bufId) {}

static bool classof(const OperationBase *e) {
return e->opType == OpType::GET_BUF_OP;
}

std::string str(int indent, bool recursive) const override;
};

class RlsBufOp : public BufOp {
public:
RlsBufOp(Operation *op, OperationBase *parentOp, pto::PIPE pipe,
int64_t bufId)
: BufOp(OpType::RLS_BUF_OP, op, parentOp, pipe, bufId) {}

static bool classof(const OperationBase *e) {
return e->opType == OpType::RLS_BUF_OP;
}

std::string str(int indent, bool recursive) const override;
};

// Bool comparator for sync ops ordering (used for containers).
bool operator<(const SyncOp &op1, const SyncOp &op2);
} // namespace mlir::pto::syncsolver
Expand Down
13 changes: 13 additions & 0 deletions include/PTO/Transforms/GraphSyncSolver/Utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,14 @@ enum SyncMode {
TEST_CROSS_CORE_MODE,
};

// Emission shape for the sync solver output. SET_WAIT is the legacy
// pto.set_flag / pto.wait_flag pairing. BUF_ID is the A5-only buffer-id
// bracketing model (pto.get_buf / pto.rls_buf around each anchor op).
enum class SyncEmitStyle {
SET_WAIT,
BUF_ID,
};

struct SyncSolverOptions {
// Synchronization mode.
const SyncMode syncMode;
Expand All @@ -130,6 +138,9 @@ struct SyncSolverOptions {
// Architecture is register based (A5).
const bool isRegBasedArch;

// Sync emission style. BUF_ID requires A5 (isRegBasedArch).
SyncEmitStyle emitStyle{SyncEmitStyle::SET_WAIT};

// Decompose MMAD L1 ops into simpler ops for better sync handling.
bool decomposeMmadl1Op{false};

Expand Down Expand Up @@ -184,6 +195,8 @@ struct SyncSolverOptions {
return syncMode == SyncMode::TEST_INTRA_CORE_MODE ||
syncMode == SyncMode::TEST_CROSS_CORE_MODE;
}

bool isBufIdEmit() const { return emitStyle == SyncEmitStyle::BUF_ID; }
};

struct Occurrence;
Expand Down
6 changes: 6 additions & 0 deletions include/PTO/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ def PTOGraphSyncSolver : Pass<"pto-graph-sync-solver", "func::FuncOp"> {
/*default=*/"8",
"Maximum EVENT_ID slots usable by coloring; this caps the hardware "
"available event-id budget used by the graph solver.">,
Option<"syncStyle", "sync-style", "std::string",
/*default=*/"\"set-wait\"",
"Sync emission style: 'set-wait' (default) or 'buf-id' (A5 only). "
"Under 'buf-id' the solver emits pto.get_buf/pto.rls_buf brackets "
"instead of pto.set_flag/pto.wait_flag, and skips pto.barrier "
"since A5 preserves same-pipe order in hardware.">,
];

let dependentDialects = [
Expand Down
19 changes: 19 additions & 0 deletions lib/PTO/IR/PTOSyncUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,22 @@ PIPE mlir::pto::mapSyncOpTypeToPipe(SyncOpType opType) {
bool mlir::pto::isConcreteSyncPipe(PIPE pipe) {
return pipe != PIPE::PIPE_UNASSIGNED && pipe != PIPE::PIPE_ALL;
}

FailureOr<SyncOpType> mlir::pto::mapPipeToCanonicalSyncOpType(PIPE pipe) {
switch (pipe) {
case PIPE::PIPE_MTE2:
return SyncOpType::TLOAD;
case PIPE::PIPE_MTE3:
return SyncOpType::TSTORE_VEC;
case PIPE::PIPE_FIX:
return SyncOpType::TSTORE_ACC;
case PIPE::PIPE_MTE1:
return SyncOpType::TMOV_M2L;
case PIPE::PIPE_V:
return SyncOpType::TVEC;
case PIPE::PIPE_M:
return SyncOpType::TMATMUL;
default:
return failure();
}
}
31 changes: 31 additions & 0 deletions lib/PTO/Transforms/GraphSyncSolver/PTOGraphSyncSolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,41 @@ struct PTOGraphSyncSolverPass
// handleBarrierConflict() drop the PIPE_V barrier that A5 hardware
// does not support.
const bool isA5 = pto::isTargetArchA5(func.getOperation());

SyncEmitStyle emitStyle;
if (syncStyle == "set-wait") {
emitStyle = SyncEmitStyle::SET_WAIT;
} else if (syncStyle == "buf-id") {
emitStyle = SyncEmitStyle::BUF_ID;
} else {
func.emitError("--graph-sync-solver-sync-style: unknown value '")
<< syncStyle << "', expected 'set-wait' or 'buf-id'";
return signalPassFailure();
}
if (emitStyle == SyncEmitStyle::BUF_ID && !isA5) {
func.emitError(
"--graph-sync-solver-sync-style=buf-id requires --pto-arch=a5; "
"get_buf/rls_buf are only available on A5");
return signalPassFailure();
}

SyncSolverOptions opts(SyncMode::INTRA_CORE_SYNC,
/*isMemBasedArch=*/!isA5,
/*isRegBasedArch=*/isA5);
opts.eventIdNumMax = eventIdNumMax;
opts.emitStyle = emitStyle;
if (emitStyle == SyncEmitStyle::BUF_ID) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Skip same-pipe barriers in buf-id mode

When buf-id is selected here, same-pipe hazards are still sent through the existing handleBarrierConflict path; that path only suppresses A5 barriers for PIPE_V/PIPE_M, so legal A5 cases such as two TLOAD/MTE2 operations on the same buffer can still produce pto.barrier even though this option and the pass docs promise that buf-id/A5 emits no barriers. This makes the new mode generate unsupported/unwanted sync for same-pipe MTE2/MTE3/FIX conflicts unless the option also disables barrier conflicts for all pipes in buf-id mode.

Useful? React with 👍 / 👎.

// Constraint 4 ("different pipe pairs should not share an id") — in
// buf-id mode we conservatively disable the cross-pipe-pair id reuse
// optimization that the set-wait flow runs by default for intra-core.
opts.reuseSyncPairToSaveEventIds = false;
// buf-id is a sequential programming model; the hw (get_cnt, rel_cnt)
// counters handle loop-carried sync natively, so the set/wait backward-
// sync hoisting / merging optimizations are unnecessary (and would
// break the in-loop bracket form).
opts.considerOuterBackwardSyncPairs = false;
opts.moveOutAndMergeBackwardSyncPairs = false;
}
auto translator = std::make_unique<IRTranslator>(func, opts);

// Trivial / empty function bodies have nothing to solve.
Expand Down
Loading
Loading