@@ -700,6 +700,67 @@ void lowerBrentKungPrefixTree(OpBuilder &builder, Location loc,
700700  });
701701}
702702
703+ //  TODO: Generalize to other parallel prefix trees.
704+ class  LazyKoggeStonePrefixTree  {
705+ public: 
706+   LazyKoggeStonePrefixTree (OpBuilder &builder, Location loc, int64_t  width,
707+                            ArrayRef<Value> pPrefix, ArrayRef<Value> gPrefix )
708+       : builder(builder), loc(loc), width(width) {
709+     assert (width > 0  && " width must be positive" 
710+     for  (size_t  i = 0 ; i < static_cast <size_t >(width); ++i)
711+       prefixCache[{0 , i}] = {pPrefix[i], gPrefix [i]};
712+   }
713+ 
714+   //  Get the final group and propagate values for bit i.
715+   std::pair<Value, Value> getFinal (int64_t  i) {
716+     assert (i >= 0  && i < width && " i out of bounds" 
717+     //  Final level is ceil(log2(width)) in Kogge-Stone.
718+     return  getGroupAndPropagate (llvm::Log2_64_Ceil (width), i);
719+   }
720+ 
721+ private: 
722+   //  Recursively get the group and propagate values for bit i at level `level`.
723+   //  Level 0 is the initial level with the input propagate and generate values.
724+   //  Level n computes the group and propagate values for a stride of 2^(n-1).
725+   //  Uses memoization to cache intermediate results.
726+   std::pair<Value, Value> getGroupAndPropagate (int64_t  level, int64_t  i);
727+   OpBuilder &builder;
728+   Location loc;
729+   int64_t  width;
730+   DenseMap<std::pair<int64_t , int64_t >, std::pair<Value, Value>> prefixCache;
731+ };
732+ 
733+ std::pair<Value, Value>
734+ LazyKoggeStonePrefixTree::getGroupAndPropagate (int64_t  level, int64_t  i) {
735+   assert (i < static_cast <int64_t >(width) && " i out of bounds" 
736+   auto  key = std::make_pair (level, i);
737+   auto  it = prefixCache.find (key);
738+   if  (it != prefixCache.end ())
739+     return  it->second ;
740+ 
741+   assert (level > 0  && " level must be positive" 
742+ 
743+   int64_t  previousStride = 1ULL  << (level - 1 );
744+   if  (i < previousStride) {
745+     //  No dependency, just copy from the previous level.
746+     auto  [propagateI, generateI] = getGroupAndPropagate (level - 1 , i);
747+     prefixCache[key] = {propagateI, generateI};
748+     return  prefixCache[key];
749+   }
750+   //  Get the dependency index.
751+   int64_t  j = i - previousStride;
752+   auto  [propagateI, generateI] = getGroupAndPropagate (level - 1 , i);
753+   auto  [propagateJ, generateJ] = getGroupAndPropagate (level - 1 , j);
754+   //  Group generate: g_i OR (p_i AND g_j)
755+   Value andPG = comb::AndOp::create (builder, loc, propagateI, generateJ);
756+   Value newGenerate = comb::OrOp::create (builder, loc, generateI, andPG);
757+   //  Group propagate: p_i AND p_j
758+   Value newPropagate =
759+       comb::AndOp::create (builder, loc, propagateI, propagateJ);
760+   prefixCache[key] = {newPropagate, newGenerate};
761+   return  prefixCache[key];
762+ }
763+ 
703764template  <bool  lowerToMIG>
704765struct  CombAddOpConversion  : OpConversionPattern<AddOp> {
705766  using  OpConversionPattern<AddOp>::OpConversionPattern;
@@ -1080,37 +1141,49 @@ struct CombICmpOpConversion : OpConversionPattern<ICmpOp> {
10801141  //  need the final result. Optimizing this to skip intermediate computations
10811142  //  is non-trivial because each iteration depends on results from previous
10821143  //  iterations. We rely on DCE passes to remove unused operations.
1083-   //  TODO: Lazily compute only the required prefix values.
1144+   //  TODO: Lazily compute only the required prefix values. Kogge-Stone is
1145+   //  already implemented in a lazy manner below, but other architectures can
1146+   //  also be optimized.
10841147  static  Value computePrefixComparison (ConversionPatternRewriter &rewriter,
10851148                                       Location loc, SmallVector<Value> pPrefix,
10861149                                       SmallVector<Value> gPrefix ,
10871150                                       bool  includeEq, AdderArchitecture arch) {
10881151    auto  width = pPrefix.size ();
1152+     Value finalGroup, finalPropagate;
10891153    //  Apply the appropriate prefix tree algorithm
10901154    switch  (arch) {
10911155    case  AdderArchitecture::RippleCarry:
10921156      llvm_unreachable (" Ripple-Carry should be handled separately" 
10931157      break ;
1094-     case  AdderArchitecture::Sklanskey:
1158+     case  AdderArchitecture::Sklanskey: { 
10951159      lowerSklanskeyPrefixTree (rewriter, loc, pPrefix, gPrefix );
1160+       finalGroup = gPrefix [width - 1 ];
1161+       finalPropagate = pPrefix[width - 1 ];
10961162      break ;
1163+     }
10971164    case  AdderArchitecture::KoggeStone:
1098-       lowerKoggeStonePrefixTree (rewriter, loc, pPrefix, gPrefix );
1165+       //  Use lazy Kogge-Stone implementation to avoid computing all
1166+       //  intermediate prefix values.
1167+       std::tie (finalPropagate, finalGroup) =
1168+           LazyKoggeStonePrefixTree (rewriter, loc, width, pPrefix, gPrefix )
1169+               .getFinal (width - 1 );
10991170      break ;
1100-     case  AdderArchitecture::BrentKung:
1171+     case  AdderArchitecture::BrentKung: { 
11011172      lowerBrentKungPrefixTree (rewriter, loc, pPrefix, gPrefix );
1173+       finalGroup = gPrefix [width - 1 ];
1174+       finalPropagate = pPrefix[width - 1 ];
11021175      break ;
11031176    }
1177+     }
11041178
11051179    //  Final result: gPrefix[width-1] gives us "a < b"
11061180    if  (includeEq) {
11071181      //  a <= b iff (a < b) OR (a == b)
11081182      //  a == b iff pPrefix[width-1] (all bits are equal)
1109-       return  comb::OrOp::create (rewriter, loc, gPrefix [width - 1 ],
1110-                                 pPrefix[width - 1 ]);
1183+       return  comb::OrOp::create (rewriter, loc, finalGroup, finalPropagate);
11111184    }
11121185    //  a < b iff gPrefix[width-1]
1113-     return  gPrefix [width -  1 ] ;
1186+     return  finalGroup ;
11141187  }
11151188
11161189  //  Construct an unsigned comparator using either ripple-carry or
0 commit comments