@@ -2048,10 +2048,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
20482048
20492049 // need extra move for dst
20502050 if (!IS_DTYPE (origDst->getType ()) || origDst->getHorzStride () != 1 ||
2051- !builder.isOpndAligned (origDst, 32 ))
2051+ !builder.isOpndAligned (origDst, getGRFSize () ))
20522052 {
20532053 // macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2054- G4_DstRegRegion* tmpDst = insertMovAfter (it, origDst, tmpType, bb);
2054+ G4_DstRegRegion* tmpDst = insertMovAfter (it, origDst, tmpType, bb, GRFALIGN );
20552055 mulInst->setDest (tmpDst);
20562056 }
20572057 }
@@ -2090,10 +2090,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
20902090 machIter = bb->insertBefore (++machIter, maclInst);
20912091
20922092 if (!IS_DTYPE (origDst->getType ()) || origDst->getHorzStride () != 1 ||
2093- !builder.isOpndAligned (origDst, 32 ))
2093+ !builder.isOpndAligned (origDst, getGRFSize () ))
20942094 {
20952095 // macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2096- G4_DstRegRegion* tmpDst = insertMovAfter (machIter, origDst, tmpType, bb);
2096+ G4_DstRegRegion* tmpDst = insertMovAfter (machIter, origDst, tmpType, bb, GRFALIGN );
20972097 maclInst->setDest (tmpDst);
20982098 }
20992099 }
@@ -2460,10 +2460,9 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
24602460// Translate MULH into
24612461// MUL acc src0 src1
24622462// MACH dst src0 src1
2463- bool HWConformity::fixMULHInst (INST_LIST_ITER& i, G4_BB* bb)
2463+ void HWConformity::fixMULHInst (INST_LIST_ITER& i, G4_BB* bb)
24642464{
24652465 G4_INST* inst = *i;
2466- INST_LIST_ITER iter = i;
24672466 G4_ExecSize execSize = inst->getExecSize ();
24682467
24692468 int inst_opt = inst->getOption ();
@@ -2530,23 +2529,20 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25302529 execSize > 1 ? builder.getRegionStride2 () : builder.getRegionScalar (),
25312530 dst->getType ());
25322531
2533- ++iter;
2534-
25352532 G4_INST* tmpMov = builder.createMov (execSize, dst, tmpSrc, inst->getOption (), false );
25362533 tmpMov->setPredicate (builder.duplicateOperand (inst->getPredicate ()));
25372534
2538- bb->insertBefore (iter, tmpMov);
2539- // it will decrement back to mov
2540- i = iter;
2535+ bb->insertAfter (i, tmpMov);
25412536
2542- /*
2543- Need to remove dst from uses list of mulh, and add them to movInst useList
2544- add movInst to uselist of mulh.
2545- Add mulh to def instruction list of movInst
2546- */
2537+ // Check the new inserted mov inst
2538+ i++;
2539+
2540+ // Need to remove dst from uses list of mulh, and add them to movInst useList
2541+ // add movInst to uselist of mulh.
2542+ // Add mulh to def instruction list of movInst
25472543 inst->transferUse (tmpMov);
25482544 inst->addDefUse (tmpMov, Opnd_src0);
2549- return true ;
2545+ return ;
25502546 }
25512547
25522548 // src1 does not support modifier
@@ -2575,8 +2571,6 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25752571 // Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Mul->Mul + Macl expanding will
25762572 // be done in expandMulPostSchedule pass.
25772573
2578- bool newInstInserted = false ;
2579-
25802574 // sat cannot be used at all in the macro sequence
25812575 // this effectivly means sat is broken for mul D D D
25822576 inst->setSaturate (g4::NOSAT);
@@ -2595,32 +2589,30 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25952589 }
25962590
25972591 INST_LIST_ITER end_iter = i;
2598- // check if the ACC source is aligned to mach dst
2599- // ToDo: this should be checked by fixAcc?
2592+ // this mul will be expanded into mul+macl in expandMulPostSchedule pass. Since expanded macl
2593+ // must be grf-aligned, so need to make mul to be grf-aligned.
26002594 G4_DstRegRegion* dst = inst->getDst ();
26012595 if (inst->getSaturate () ||
26022596 dst->getExecTypeSize () > TypeSize (Type_D) ||
2603- isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))
2597+ isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
2598+ !builder.isOpndAligned (dst, getGRFSize ()))
26042599 {
26052600 // add a tmp mov
2606- inst->setDest (insertMovAfter (i, dst, dst->getType (), bb));
2601+ inst->setDest (insertMovAfter (i, dst, dst->getType (), bb, GRFALIGN ));
26072602 end_iter++;
2608- newInstInserted = true ;
26092603 }
26102604
26112605 if (execSize > builder.getNativeExecSize ())
26122606 {
26132607 auto start_iter = i;
2614- splitDWMULInst (i, end_iter, bb);
2615- newInstInserted = true ;
2608+ splitDWMULInst (start_iter, end_iter, bb);
2609+ // start_iter points to the first half of mulh. Need double check this new inserted mulh to see if need split again
2610+ i = start_iter;
26162611 }
2617-
2618- if (newInstInserted)
2612+ else
26192613 {
2620- // it will decrease back to mulh
26212614 i++;
26222615 }
2623- return newInstInserted;
26242616 }
26252617 else
26262618 {
@@ -2634,7 +2626,7 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
26342626 G4_INST* newMul = builder.createBinOp (G4_mul, execSize,
26352627 acc_dst_opnd, builder.duplicateOperand (src0), builder.duplicateOperand (src1), inst_opt, false );
26362628
2637- bb->insertBefore (iter , newMul);
2629+ bb->insertBefore (i , newMul);
26382630 inst->copyDefsTo (newMul, false );
26392631
26402632 fixMulSrc1 (std::prev (i), bb);
@@ -2681,10 +2673,16 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
26812673 {
26822674 auto start_iter = std::prev (i);
26832675 splitDWMULInst (start_iter, end_iter, bb);
2684- i = end_iter;
2676+ // start_iter ponits to the first half of mul. Need to check the new inserted mul/mach instructions
2677+ i = start_iter;
2678+ }
2679+ else
2680+ {
2681+ // i points to mach, and need to check the new inserted mul before mach
2682+ i = std::prev (i);
26852683 }
2686- return true ;
26872684 }
2685+ return ;
26882686}
26892687
26902688//
@@ -3569,6 +3567,11 @@ void HWConformity::splitDWMULInst(INST_LIST_ITER& start, INST_LIST_ITER& end, G4
35693567 evenlySplitInst (iter, bb);
35703568 G4_INST* expand_sec_half_op = *iter;
35713569 bb->insertBefore (last_iter, expand_sec_half_op);
3570+ // For the case that only one instruction needed to split, that is to say start equals to end
3571+ if (start == end)
3572+ {
3573+ start--;
3574+ }
35723575 end--;
35733576 bb->erase (iter);
35743577 }
@@ -5271,14 +5274,9 @@ void HWConformity::conformBB(G4_BB* bb)
52715274
52725275 if (inst->opcode () == G4_mulh)
52735276 {
5274- if (fixMULHInst (i, bb))
5275- {
5276- // inserted mul before
5277- // check the newly added MUL inst
5278- i--;
5279- next_iter = i;
5280- continue ;
5281- }
5277+ fixMULHInst (i, bb);
5278+ next_iter = i;
5279+ continue ;
52825280 }
52835281
52845282#ifdef _DEBUG
@@ -7172,4 +7170,4 @@ void HWConformity::fixSrc1Region(INST_LIST_ITER it, G4_BB* bb)
71727170 G4_Operand* new_src1 = insertMovBefore (it, 1 , src1->getType (), bb);
71737171 inst->setSrc (new_src1, 1 );
71747172 }
7175- }
7173+ }
0 commit comments