@@ -6911,7 +6911,7 @@ void GlobalRA::stackCallProlog()
69116911 G4_DstRegRegion* postDst = builder.createNullDst (Type_UD);
69126912 G4_INST* store = nullptr ;
69136913 {
6914- store = builder.createSpill (postDst, payloadSrc, G4_ExecSize (execSize), 1 , 0 , builder.getBESP (), InstOpt_WriteEnable);
6914+ store = builder.createSpill (postDst, payloadSrc, G4_ExecSize (execSize), 1 , 0 , builder.getBESP (), InstOpt_WriteEnable, false );
69156915 }
69166916 builder.setFDSpillInst (store);
69176917 G4_BB* entryBB = builder.kernel .fg .getEntryBB ();
@@ -6953,7 +6953,8 @@ void GlobalRA::saveRegs(
69536953 builder.getRegionStride1 (), Type_UD);
69546954 G4_DstRegRegion* dst = builder.createNullDst ((execSize > 8 ) ? Type_UW : Type_UD);
69556955 G4_INST* spillIntrinsic = nullptr ;
6956- spillIntrinsic = builder.createSpill (dst, sendSrc2, execSize, messageLength, frameOwordOffset/2 , framePtr, InstOpt_WriteEnable);
6956+ spillIntrinsic = builder.createSpill (dst, sendSrc2, execSize, messageLength, frameOwordOffset/2 , framePtr, InstOpt_WriteEnable, false );
6957+ spillIntrinsic->inheritDIFrom (*insertIt);
69576958 bb->insertBefore (insertIt, spillIntrinsic);
69586959 group.insert (spillIntrinsic);
69596960 }
@@ -7036,7 +7037,8 @@ void GlobalRA::restoreRegs(
70367037 dstDcl->getRegVar ()->setPhyReg (regPool.getGreg (startReg), 0 );
70377038 G4_DstRegRegion* dstRgn = builder.createDst (dstDcl->getRegVar (), 0 , 0 , 1 , (execSize > 8 ) ? Type_UW : Type_UD);
70387039 G4_INST* fillIntrinsic = nullptr ;
7039- fillIntrinsic = builder.createFill (dstRgn, execSize, responseLength, frameOwordOffset / 2 , framePtr, InstOpt_WriteEnable);
7040+ fillIntrinsic = builder.createFill (dstRgn, execSize, responseLength, frameOwordOffset / 2 , framePtr, InstOpt_WriteEnable, false );
7041+ fillIntrinsic->inheritDIFrom (*insertIt);
70407042 bb->insertBefore (insertIt, fillIntrinsic);
70417043 group.insert (fillIntrinsic);
70427044 }
@@ -7795,6 +7797,7 @@ void GlobalRA::addCallerSavePseudoCode()
77957797 G4_DstRegRegion* dst = builder.createDst (pseudoVCADcl->getRegVar (), 0 , 0 , 1 , Type_UD);
77967798 G4_INST* saveInst = builder.createInternalIntrinsicInst (
77977799 nullptr , Intrinsic::CallerSave, g4::SIMD1, dst, nullptr , nullptr , nullptr , InstOpt_WriteEnable);
7800+ saveInst->inheritDIFrom (fcallInst);
77987801 INST_LIST_ITER callBBIt = bb->end ();
77997802 bb->insertBefore (--callBBIt, saveInst);
78007803
@@ -7819,6 +7822,7 @@ void GlobalRA::addCallerSavePseudoCode()
78197822 G4_INST* restoreInst =
78207823 builder.createInternalIntrinsicInst (
78217824 nullptr , Intrinsic::CallerRestore, g4::SIMD1, nullptr , src, nullptr , nullptr , InstOpt_WriteEnable);
7825+ restoreInst->inheritDIFrom (fcallInst);
78227826 retBB->insertBefore (retBBIt, restoreInst);
78237827 }
78247828 }
@@ -9735,8 +9739,8 @@ int GlobalRA::coloringRegAlloc()
97359739
97369740 bool disableSpillCoalecse = builder.getOption (vISA_DisableSpillCoalescing) ||
97379741 builder.getOption (vISA_FastSpill) || fastCompile || builder.getOption (vISA_Debug) ||
9738- (!useScratchMsgForSpill
9739- );
9742+ // spill cleanup is not support when we use oword msg for spill/fill for non-stack calls.
9743+ (!useScratchMsgForSpill && !hasStackCall );
97409744
97419745 if (!reserveSpillReg && !disableSpillCoalecse && builder.useSends ())
97429746 {
@@ -12451,6 +12455,57 @@ unsigned GraphColor::edgeWeightARF(const LiveRange* lr1, const LiveRange* lr2)
1245112455 return 0 ;
1245212456}
1245312457
12458+ void GlobalRA::fixSrc0IndirFcall ()
12459+ {
12460+ // Indirect calls look like:
12461+ // mov (1|NM) V10 0x123456:ud
12462+ // fcall (1) dst V10 <-- V10 which is src0 contains %ip to jump to
12463+ //
12464+ // In this function, we want to set V10 to r125.0 which is same as dst of fcall
12465+ // as per ABI. This way, when inserting save/restore code around fcall, no
12466+ // special checks are needed to handle V10.
12467+ //
12468+ // But this works only if V10 is a local. If it not a local we create a mov
12469+ // that copies V10 in to a new temp variable. And then we map this temp
12470+ // variable to r125.0. Hopefully V10 being global would be a rare occurence.
12471+ for (auto bb : kernel.fg )
12472+ {
12473+ if (bb->isEndWithFCall ())
12474+ {
12475+ auto fcall = bb->back ()->asCFInst ();
12476+ if (!fcall->getSrc (0 ) ||
12477+ !fcall->getSrc (0 )->isSrcRegRegion ())
12478+ continue ;
12479+
12480+ auto src0Rgn = fcall->getSrc (0 )->asSrcRegRegion ();
12481+ auto src0Dcl = src0Rgn->getBase ()->asRegVar ()->getDeclare ();
12482+ auto src0TopDcl = src0Rgn->getTopDcl ();
12483+
12484+ if (src0Dcl != src0TopDcl ||
12485+ !isBlockLocal (src0TopDcl) ||
12486+ src0TopDcl->getNumElems () > 1 )
12487+ {
12488+ // create a copy
12489+ auto tmpDcl = kernel.fg .builder ->createHardwiredDeclare (1 , src0Rgn->getType (), kernel.getFPSPGRF (),
12490+ IR_Builder::SubRegs_Stackcall::Ret_IP);
12491+ auto dst = kernel.fg .builder ->createDst (tmpDcl->getRegVar (), src0Rgn->getType ());
12492+ auto src = kernel.fg .builder ->duplicateOperand (src0Rgn);
12493+ auto copy = kernel.fg .builder ->createMov (g4::SIMD1, dst, src, InstOpt_WriteEnable, false );
12494+ auto iter = std::find_if (bb->begin (), bb->end (), [](G4_INST* inst) { return inst->isFCall (); });
12495+ bb->insertBefore (iter, copy);
12496+ auto newSrc = kernel.fg .builder ->createSrc (tmpDcl->getRegVar (), 0 , 0 , kernel.fg .builder ->getRegionScalar (),
12497+ src0Rgn->getType ());
12498+ fcall->setSrc (newSrc, 0 );
12499+ }
12500+ else
12501+ {
12502+ src0TopDcl->getRegVar ()->setPhyReg (fcall->getDst ()->getBase ()->asRegVar ()->getPhyReg (),
12503+ fcall->getDst ()->getBase ()->asRegVar ()->getPhyRegOff ());
12504+ }
12505+ }
12506+ }
12507+ }
12508+
1245412509bool dump (const char * s, LiveRange** lrs, unsigned size)
1245512510{
1245612511 // Utility function to dump lr from name.
0 commit comments