Skip to content

Commit a7acda3

Browse files
committed
[CombToSynth] Compute Kogge-Stone prefix tree lazily in unsigned comparison lowering
Add LazyKoggeStonePrefixTree class for on-demand computation of prefix values, use lazy evaluation in ICmpOp conversion to avoid computing all intermediate prefix values.
1 parent e589add commit a7acda3

File tree

2 files changed

+86
-12
lines changed

2 files changed

+86
-12
lines changed

integration_test/circt-synth/comb-lowering-compare.mlir

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,12 @@ hw.module @icmp_unsigned_sklanskey(in %lhs: i3, in %rhs: i3, out out_ugt: i1, ou
2525

2626
// RUN: circt-lec %t.mlir %s -c1=icmp_unsigned_kogge_stone -c2=icmp_unsigned_kogge_stone --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ICMP_UNSIGNED_KOGGE_STONE
2727
// COMB_ICMP_UNSIGNED_KOGGE_STONE: c1 == c2
28-
hw.module @icmp_unsigned_kogge_stone(in %lhs: i3, in %rhs: i3, out out_ugt: i1, out out_uge: i1, out out_ult: i1, out out_ule: i1) {
29-
%ugt = comb.icmp ugt %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
30-
%uge = comb.icmp uge %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
31-
%ult = comb.icmp ult %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
32-
%ule = comb.icmp ule %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
28+
// Use slightly larger width to verify the lazy prefix tree logic
29+
hw.module @icmp_unsigned_kogge_stone(in %lhs: i14, in %rhs: i14, out out_ugt: i1, out out_uge: i1, out out_ult: i1, out out_ule: i1) {
30+
%ugt = comb.icmp ugt %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i14
31+
%uge = comb.icmp uge %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i14
32+
%ult = comb.icmp ult %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i14
33+
%ule = comb.icmp ule %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i14
3334
hw.output %ugt, %uge, %ult, %ule : i1, i1, i1, i1
3435
}
3536

lib/Conversion/CombToSynth/CombToSynth.cpp

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,67 @@ void lowerBrentKungPrefixTree(OpBuilder &builder, Location loc,
700700
});
701701
}
702702

703+
// TODO: Generalize to other parallel prefix trees.
704+
class LazyKoggeStonePrefixTree {
705+
public:
706+
LazyKoggeStonePrefixTree(OpBuilder &builder, Location loc, int64_t width,
707+
ArrayRef<Value> pPrefix, ArrayRef<Value> gPrefix)
708+
: builder(builder), loc(loc), width(width) {
709+
assert(width > 0 && "width must be positive");
710+
for (size_t i = 0; i < static_cast<size_t>(width); ++i)
711+
prefixCache[{0, i}] = {pPrefix[i], gPrefix[i]};
712+
}
713+
714+
// Get the final group and propagate values for bit i.
715+
std::pair<Value, Value> getFinal(int64_t i) {
716+
assert(i >= 0 && i < width && "i out of bounds");
717+
// Final level is ceil(log2(width)) in Kogge-Stone.
718+
return getGroupAndPropagate(llvm::Log2_64_Ceil(width), i);
719+
}
720+
721+
private:
722+
// Recursively get the group and propagate values for bit i at level `level`.
723+
// Level 0 is the initial level with the input propagate and generate values.
724+
// Level n computes the group and propagate values for a stride of 2^(n-1).
725+
// Uses memoization to cache intermediate results.
726+
std::pair<Value, Value> getGroupAndPropagate(int64_t level, int64_t i);
727+
OpBuilder &builder;
728+
Location loc;
729+
int64_t width;
730+
DenseMap<std::pair<int64_t, int64_t>, std::pair<Value, Value>> prefixCache;
731+
};
732+
733+
std::pair<Value, Value>
734+
LazyKoggeStonePrefixTree::getGroupAndPropagate(int64_t level, int64_t i) {
735+
assert(i < static_cast<int64_t>(width) && "i out of bounds");
736+
auto key = std::make_pair(level, i);
737+
auto it = prefixCache.find(key);
738+
if (it != prefixCache.end())
739+
return it->second;
740+
741+
assert(level > 0 && "level must be positive");
742+
743+
int64_t previousStride = 1ULL << (level - 1);
744+
if (i < previousStride) {
745+
// No dependency, just copy from the previous level.
746+
auto [propagateI, generateI] = getGroupAndPropagate(level - 1, i);
747+
prefixCache[key] = {propagateI, generateI};
748+
return prefixCache[key];
749+
}
750+
// Get the dependency index.
751+
int64_t j = i - previousStride;
752+
auto [propagateI, generateI] = getGroupAndPropagate(level - 1, i);
753+
auto [propagateJ, generateJ] = getGroupAndPropagate(level - 1, j);
754+
// Group generate: g_i OR (p_i AND g_j)
755+
Value andPG = comb::AndOp::create(builder, loc, propagateI, generateJ);
756+
Value newGenerate = comb::OrOp::create(builder, loc, generateI, andPG);
757+
// Group propagate: p_i AND p_j
758+
Value newPropagate =
759+
comb::AndOp::create(builder, loc, propagateI, propagateJ);
760+
prefixCache[key] = {newPropagate, newGenerate};
761+
return prefixCache[key];
762+
}
763+
703764
template <bool lowerToMIG>
704765
struct CombAddOpConversion : OpConversionPattern<AddOp> {
705766
using OpConversionPattern<AddOp>::OpConversionPattern;
@@ -1080,37 +1141,49 @@ struct CombICmpOpConversion : OpConversionPattern<ICmpOp> {
10801141
// need the final result. Optimizing this to skip intermediate computations
10811142
// is non-trivial because each iteration depends on results from previous
10821143
// iterations. We rely on DCE passes to remove unused operations.
1083-
// TODO: Lazily compute only the required prefix values.
1144+
// TODO: Lazily compute only the required prefix values. Kogge-Stone is
1145+
// already implemented in a lazy manner below, but other architectures can
1146+
// also be optimized.
10841147
static Value computePrefixComparison(ConversionPatternRewriter &rewriter,
10851148
Location loc, SmallVector<Value> pPrefix,
10861149
SmallVector<Value> gPrefix,
10871150
bool includeEq, AdderArchitecture arch) {
10881151
auto width = pPrefix.size();
1152+
Value finalGroup, finalPropagate;
10891153
// Apply the appropriate prefix tree algorithm
10901154
switch (arch) {
10911155
case AdderArchitecture::RippleCarry:
10921156
llvm_unreachable("Ripple-Carry should be handled separately");
10931157
break;
1094-
case AdderArchitecture::Sklanskey:
1158+
case AdderArchitecture::Sklanskey: {
10951159
lowerSklanskeyPrefixTree(rewriter, loc, pPrefix, gPrefix);
1160+
finalGroup = gPrefix[width - 1];
1161+
finalPropagate = pPrefix[width - 1];
10961162
break;
1163+
}
10971164
case AdderArchitecture::KoggeStone:
1098-
lowerKoggeStonePrefixTree(rewriter, loc, pPrefix, gPrefix);
1165+
// Use lazy Kogge-Stone implementation to avoid computing all
1166+
// intermediate prefix values.
1167+
std::tie(finalPropagate, finalGroup) =
1168+
LazyKoggeStonePrefixTree(rewriter, loc, width, pPrefix, gPrefix)
1169+
.getFinal(width - 1);
10991170
break;
1100-
case AdderArchitecture::BrentKung:
1171+
case AdderArchitecture::BrentKung: {
11011172
lowerBrentKungPrefixTree(rewriter, loc, pPrefix, gPrefix);
1173+
finalGroup = gPrefix[width - 1];
1174+
finalPropagate = pPrefix[width - 1];
11021175
break;
11031176
}
1177+
}
11041178

11051179
// Final result: gPrefix[width-1] gives us "a < b"
11061180
if (includeEq) {
11071181
// a <= b iff (a < b) OR (a == b)
11081182
// a == b iff pPrefix[width-1] (all bits are equal)
1109-
return comb::OrOp::create(rewriter, loc, gPrefix[width - 1],
1110-
pPrefix[width - 1]);
1183+
return comb::OrOp::create(rewriter, loc, finalGroup, finalPropagate);
11111184
}
11121185
// a < b iff gPrefix[width-1]
1113-
return gPrefix[width - 1];
1186+
return finalGroup;
11141187
}
11151188

11161189
// Construct an unsigned comparator using either ripple-carry or

0 commit comments

Comments
 (0)