Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions integration_test/circt-synth/comb-lowering-compare.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ hw.module @icmp_unsigned_sklanskey(in %lhs: i3, in %rhs: i3, out out_ugt: i1, ou

// RUN: circt-lec %t.mlir %s -c1=icmp_unsigned_kogge_stone -c2=icmp_unsigned_kogge_stone --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ICMP_UNSIGNED_KOGGE_STONE
// COMB_ICMP_UNSIGNED_KOGGE_STONE: c1 == c2
hw.module @icmp_unsigned_kogge_stone(in %lhs: i3, in %rhs: i3, out out_ugt: i1, out out_uge: i1, out out_ult: i1, out out_ule: i1) {
%ugt = comb.icmp ugt %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
%uge = comb.icmp uge %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
%ult = comb.icmp ult %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
%ule = comb.icmp ule %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
// Use slightly larger width to verify the lazy prefix tree logic
hw.module @icmp_unsigned_kogge_stone(in %lhs: i14, in %rhs: i14, out out_ugt: i1, out out_uge: i1, out out_ult: i1, out out_ule: i1) {
%ugt = comb.icmp ugt %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i14
%uge = comb.icmp uge %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i14
%ult = comb.icmp ult %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i14
%ule = comb.icmp ule %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i14
hw.output %ugt, %uge, %ult, %ule : i1, i1, i1, i1
}

Expand Down
87 changes: 80 additions & 7 deletions lib/Conversion/CombToSynth/CombToSynth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,67 @@ void lowerBrentKungPrefixTree(OpBuilder &builder, Location loc,
});
}

// TODO: Generalize to other parallel prefix trees.
class LazyKoggeStonePrefixTree {
public:
LazyKoggeStonePrefixTree(OpBuilder &builder, Location loc, int64_t width,
ArrayRef<Value> pPrefix, ArrayRef<Value> gPrefix)
: builder(builder), loc(loc), width(width) {
assert(width > 0 && "width must be positive");
for (size_t i = 0; i < static_cast<size_t>(width); ++i)
prefixCache[{0, i}] = {pPrefix[i], gPrefix[i]};
}

// Get the final group and propagate values for bit i.
std::pair<Value, Value> getFinal(int64_t i) {
assert(i >= 0 && i < width && "i out of bounds");
// Final level is ceil(log2(width)) in Kogge-Stone.
return getGroupAndPropagate(llvm::Log2_64_Ceil(width), i);
}

private:
// Recursively get the group and propagate values for bit i at level `level`.
// Level 0 is the initial level with the input propagate and generate values.
// Level n computes the group and propagate values for a stride of 2^(n-1).
// Uses memoization to cache intermediate results.
std::pair<Value, Value> getGroupAndPropagate(int64_t level, int64_t i);
OpBuilder &builder;
Location loc;
int64_t width;
DenseMap<std::pair<int64_t, int64_t>, std::pair<Value, Value>> prefixCache;
};

std::pair<Value, Value>
LazyKoggeStonePrefixTree::getGroupAndPropagate(int64_t level, int64_t i) {
assert(i < static_cast<int64_t>(width) && "i out of bounds");
auto key = std::make_pair(level, i);
auto it = prefixCache.find(key);
if (it != prefixCache.end())
return it->second;

assert(level > 0 && "level must be positive");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: perhaps a comment to justify that we're now moving to compute the values and we can only do that after level 0 would help - took me a minute to get that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely, sorry for the confusion.


int64_t previousStride = 1ULL << (level - 1);
if (i < previousStride) {
// No dependency, just copy from the previous level.
auto [propagateI, generateI] = getGroupAndPropagate(level - 1, i);
prefixCache[key] = {propagateI, generateI};
return prefixCache[key];
}
// Get the dependency index.
int64_t j = i - previousStride;
auto [propagateI, generateI] = getGroupAndPropagate(level - 1, i);
auto [propagateJ, generateJ] = getGroupAndPropagate(level - 1, j);
// Group generate: g_i OR (p_i AND g_j)
Value andPG = comb::AndOp::create(builder, loc, propagateI, generateJ);
Value newGenerate = comb::OrOp::create(builder, loc, generateI, andPG);
// Group propagate: p_i AND p_j
Value newPropagate =
comb::AndOp::create(builder, loc, propagateI, propagateJ);
prefixCache[key] = {newPropagate, newGenerate};
return prefixCache[key];
}

template <bool lowerToMIG>
struct CombAddOpConversion : OpConversionPattern<AddOp> {
using OpConversionPattern<AddOp>::OpConversionPattern;
Expand Down Expand Up @@ -1080,37 +1141,49 @@ struct CombICmpOpConversion : OpConversionPattern<ICmpOp> {
// need the final result. Optimizing this to skip intermediate computations
// is non-trivial because each iteration depends on results from previous
// iterations. We rely on DCE passes to remove unused operations.
// TODO: Lazily compute only the required prefix values.
// TODO: Lazily compute only the required prefix values. Kogge-Stone is
// already implemented in a lazy manner below, but other architectures can
// also be optimized.
static Value computePrefixComparison(ConversionPatternRewriter &rewriter,
Location loc, SmallVector<Value> pPrefix,
SmallVector<Value> gPrefix,
bool includeEq, AdderArchitecture arch) {
auto width = pPrefix.size();
Value finalGroup, finalPropagate;
// Apply the appropriate prefix tree algorithm
switch (arch) {
case AdderArchitecture::RippleCarry:
llvm_unreachable("Ripple-Carry should be handled separately");
break;
case AdderArchitecture::Sklanskey:
case AdderArchitecture::Sklanskey: {
lowerSklanskeyPrefixTree(rewriter, loc, pPrefix, gPrefix);
finalGroup = gPrefix[width - 1];
finalPropagate = pPrefix[width - 1];
break;
}
case AdderArchitecture::KoggeStone:
lowerKoggeStonePrefixTree(rewriter, loc, pPrefix, gPrefix);
// Use lazy Kogge-Stone implementation to avoid computing all
// intermediate prefix values.
std::tie(finalPropagate, finalGroup) =
LazyKoggeStonePrefixTree(rewriter, loc, width, pPrefix, gPrefix)
.getFinal(width - 1);
break;
case AdderArchitecture::BrentKung:
case AdderArchitecture::BrentKung: {
lowerBrentKungPrefixTree(rewriter, loc, pPrefix, gPrefix);
finalGroup = gPrefix[width - 1];
finalPropagate = pPrefix[width - 1];
break;
}
}

// Final result: gPrefix[width-1] gives us "a < b"
if (includeEq) {
// a <= b iff (a < b) OR (a == b)
// a == b iff pPrefix[width-1] (all bits are equal)
return comb::OrOp::create(rewriter, loc, gPrefix[width - 1],
pPrefix[width - 1]);
return comb::OrOp::create(rewriter, loc, finalGroup, finalPropagate);
}
// a < b iff gPrefix[width-1]
return gPrefix[width - 1];
return finalGroup;
}

// Construct an unsigned comparator using either ripple-carry or
Expand Down