@@ -220,6 +220,8 @@ void CloneAddressArithmetic::computeFlow(llvm::Instruction* I) {
220220 BFSQ.push (I);
221221 unsigned int NumOfUses = Uses[I];
222222
223+ std::unordered_set<llvm::Instruction *> Explored;
224+
223225 while (!BFSQ.empty ()) {
224226
225227 llvm::Instruction *CurrI = BFSQ.front ();
@@ -233,11 +235,14 @@ void CloneAddressArithmetic::computeFlow(llvm::Instruction* I) {
233235 bool NotConstant = !llvm::isa<llvm::Constant>(Op);
234236 bool NotUniform = IGC_IS_FLAG_ENABLED (RematRespectUniformity) ? !WI->isUniform (Op) : true ;
235237 bool AddressArithmetic = isAddressArithmetic (Op);
238+ bool NotExplored = !Explored.count (Op);
236239
237- if (NotConstant && NotPHI && AddressArithmetic && NotUniform) {
238- FlowMap[Op] = FlowMap[Op] + NumOfUses;
239- BFSQ.push (Op);
240- }
240+ bool Skip = !(NotConstant && NotPHI && AddressArithmetic && NotUniform && NotExplored);
241+ if (Skip) continue ;
242+
243+ FlowMap[Op] = FlowMap[Op] + NumOfUses;
244+ Explored.insert (Op);
245+ BFSQ.push (Op);
241246 }
242247 }
243248}
@@ -254,6 +259,7 @@ CloneAddressArithmetic::collectRematChain(llvm::Instruction* I, unsigned int Num
254259 PRINT_LOG (" Collect chain for: " ); PRINT_INST (I); PRINT_LOG_NL (" " );
255260
256261 llvm::SmallVector<unsigned int , 4 > StateVector;
262+ std::unordered_set<llvm::Instruction *> Explored;
257263
258264 // we are travdrsing ssa-chain for address arithmetic
259265 while (!BFSQ.empty ()) {
@@ -264,8 +270,7 @@ CloneAddressArithmetic::collectRematChain(llvm::Instruction* I, unsigned int Num
264270 for (unsigned int i = 0 ; i < CurrI->getNumOperands (); ++i) {
265271
266272 Instruction *Op = llvm::dyn_cast<Instruction>(CurrI->getOperand (i));
267- if ( !Op)
268- continue ;
273+ if (!Op) continue ;
269274
270275 PRINT_LOG (" Candidate: [" << FlowMap[Op] << " ] " ); PRINT_INST (Op);
271276
@@ -274,16 +279,21 @@ CloneAddressArithmetic::collectRematChain(llvm::Instruction* I, unsigned int Num
274279 bool SameBB = IGC_IS_FLAG_ENABLED (RematSameBBScope) ? Op->getParent () == I->getParent () : true ;
275280 bool NotUniform = IGC_IS_FLAG_ENABLED (RematRespectUniformity) ? !WI->isUniform (Op) : true ;
276281 bool AddressArithmetic = isAddressArithmetic (Op);
277-
278282 bool NotTooManyUses = FlowMap[Op] <= NumOfUsesLimit;
283+ bool NotExplored = !Explored.count (Op);
279284
280- if (SameBB && NotConstant && NotPHI && NotTooManyUses && AddressArithmetic && NotUniform) {
281- BFSQ. push (Op );
282- RematVector. push_back (Op);
283- PRINT_LOG_NL (" \t\t --> Accepted " );
285+ PRINT_LOG ( " \t\t " << " BB: " << SameBB << " Uses: " << NotTooManyUses << " Ar: " << AddressArithmetic << " Un: " << NotUniform);
286+ bool Skip = !(SameBB && NotConstant && NotPHI && NotTooManyUses && AddressArithmetic && NotUniform && NotExplored );
287+ if (Skip) {
288+ PRINT_LOG_NL (" \t\t --> Rejected " );
284289 continue ;
285290 }
286- PRINT_LOG_NL (" \t\t --> Rejected: " << " BB:" << SameBB << " Uses:" << NotTooManyUses << " Ar:" << AddressArithmetic << " Un:" << NotUniform);
291+
292+ BFSQ.push (Op);
293+ Explored.insert (Op);
294+ RematVector.push_back (Op);
295+
296+ PRINT_LOG_NL (" \t\t --> Accepted" );
287297 }
288298 }
289299
@@ -540,8 +550,10 @@ unsigned int CloneAddressArithmetic::collectFlow(RematSet& ToProcess, Function&
540550 float Coefficient = 0 .01f *(float )Base;
541551 unsigned int Result = (unsigned int )((float )FlowBudget*Coefficient);
542552
543- for (auto el : ToProcess)
553+ for (auto el : ToProcess) {
554+ PRINT_LOG (" Start to compute flow: " ); PRINT_INST_NL (el);
544555 computeFlow ((Instruction*)el);
556+ }
545557
546558 if (DEBUG) {
547559 for (const auto &el : FlowMap) {
@@ -555,25 +567,26 @@ unsigned int CloneAddressArithmetic::collectFlow(RematSet& ToProcess, Function&
555567
556568bool CloneAddressArithmetic::greedyRemat (Function &F) {
557569
558- bool Result = false ;
559570 if (isRegPressureLow (F))
560- return Result ;
571+ return false ;
561572
562573 initializeLogFile (F);
563574 countUses (F);
575+
564576 RematSet ToProcess;
565577 collectInstToProcess (ToProcess, F);
566578
567579 unsigned int FlowThreshold = collectFlow (ToProcess, F);
568580 writeLog ();
569- speculateWholeChain (ToProcess, FlowThreshold);
570581
582+ speculateWholeChain (ToProcess, FlowThreshold);
571583 writeLog ();
584+
572585 rematerialize (ToProcess, FlowThreshold);
573586 writeLog ();
574587
575588 FlowMap.clear ();
576- return Result ;
589+ return true ;
577590}
578591
579592
0 commit comments