@@ -887,16 +887,16 @@ bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI
887887 IGC_ASSERT_MESSAGE (IEI_ix < nelts, " ICE: IEI's index out of bound!" );
888888 SVecInsEltInfo &InsEltInfo = AllIEIs[IEI_ix];
889889 if (InsEltInfo.IEI ) {
890- // This element is inserted more than once, skip.
890+ // One element is inserted more than once, skip.
891891 return false ;
892892 }
893893 InsEltInfo.IEI = I;
894894 InsEltInfo.Elt = E;
895895 InsEltInfo.FromVec = V;
896896 InsEltInfo.FromVec_eltIx = V_ix;
897-
898- // So far, E is never nullptr (could be in the future)
899- InsEltInfo. EEI = dyn_cast_or_null<ExtractElementInst>(E);
897+ if (E) {
898+ InsEltInfo. EEI = dyn_cast<ExtractElementInst>(E);
899+ }
900900
901901 if (!I->hasOneUse ()) {
902902 break ;
@@ -923,24 +923,19 @@ bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI
923923 if (tV == nullptr )
924924 return false ;
925925
926- // Expect all IEIs are in the same DeSSA CC (DeSSA special-handles IEIs)
926+ // Expect node values for all IEIs are identical. In general, if they
927+ // are in the same DeSSA CC, that would be fine.
927928 Value *tV_nv = m_DeSSA->getNodeValue (tV);
928929 if (V_root != getRootValue (tV_nv))
929930 return false ;
930931
931932 Value *E = AllIEIs[i].Elt ;
932- if (!E || isa<Constant>(E)) {
933- // constant is okay for either non-uniform or uniform.
934- continue ;
935- }
936933 Value *FromVec = AllIEIs[i].FromVec ;
937- if (FromVec) {
938- Value *FromVec_nv = m_DeSSA->getNodeValue (FromVec);
939- // check if FromVec has been coalesced with IEI already by DeSSA.
940- // (Wouldn't happen under current DeSSA, but might happen in future)
941- if (V_root == getRootValue (FromVec_nv))
942- return false ;
943- }
934+ Value *FromVec_nv = m_DeSSA->getNodeValue (FromVec);
935+ // check if FromVec has been coalesced with IEI already by DeSSA.
936+ // (Wouldn't happen under current DeSSA, but might happen in future)
937+ if (V_root == getRootValue (FromVec_nv))
938+ return false ;
944939
945940 // Make sure FromVec or E have the same uniformness as V.
946941 if ((E && V_dep != m_WIA->whichDepend (E)) || (FromVec && V_dep != m_WIA->whichDepend (FromVec)))
@@ -974,13 +969,17 @@ Value *VariableReuseAnalysis::traceAliasValue(Value *V) {
974969}
975970
976971//
977- // Returns true if there is the following pattern; otherwise return false.
972+ // Returns true if the following is true
978973// IEI = insertElement <vectorType> Vec, S, <constant IEI_ix>
979- // 1. S is from another vector V.
980- // S = extractElement <vectorType> V, <constant V_ix>
981- // In this case, S is the element denoted by (V, V_ix)
982- // 2. otherwise, V=nullptr, V_ix=0.
983- // S is a candidate and could be alias to the vector.
974+ // Return false, otherwise.
975+ //
976+ // When the above condition is true, V and V_ix are used for the
977+ // following cases:
978+ // 1. S is from another vector V.
979+ // S = extractElement <vectorType> V, <constant V_ix>
980+ // S is the element denoted by (V, V_ix)
981+ // 2. otherwise, V=nullptr, V_ix=0.
982+ // S is a candidate inserted and could be alias to the vector.
984983//
985984// Input: IEI
986985// Output: IEI_ix, S, V, V_ix
@@ -1000,9 +999,9 @@ bool VariableReuseAnalysis::getElementValue(InsertElementInst *IEI, int &IEI_ix,
1000999 IEI_ix = (int )CI->getZExtValue ();
10011000
10021001 Value *elem0 = IEI->getOperand (1 );
1003- if (hasBeenPayloadCoalesced (elem0) || isOrCoalescedWithArg (elem0)) {
1004- // If elem0 has been payload-coalesced or it has been aliased to
1005- // an argument, skip it.
1002+ if (hasBeenPayloadCoalesced (elem0) || isa<Constant>(elem0) || isOrCoalescedWithArg (elem0)) {
1003+ // If elem0 has been payload-coalesced, is constant,
1004+ // or it has been aliased to an argument, skip it.
10061005 return false ;
10071006 }
10081007
@@ -1047,10 +1046,11 @@ void VariableReuseAnalysis::InsertElementAliasing(Function *F) {
10471046
10481047 // IGC Key VectorAlias controls vectorAlias optimiation.
10491048 //
1050- // VectorAlias (also from m_pCtx->getVectorCoalescingControl())
1051- // 0x0: disable vector aliasing
1052- // 0x1: subvec aliasing for isolated values (getRootValue()=null)
1053- // 0x2: subvec aliasing for both isolated and non-isolated value)
1049+ // Do it if VectorAlias != 0.
1050+ // VectorAlias=0x1: subvec aliasing for isolated values
1051+ // (getRootValue()=null)
1052+ // =0x2: subvec aliasing for both isolated and non-isolated
1053+ // value)
10541054 const auto control = (m_pCtx->getVectorCoalescingControl () & 0x3 );
10551055 // To avoid increasing GRF pressure, skip if F is too large or not an entry
10561056 const int32_t NumBBThreshold = IGC_GET_FLAG_VALUE (VectorAliasBBThreshold);
@@ -1253,7 +1253,6 @@ bool VariableReuseAnalysis::processInsertTo(BasicBlock *BB, VecInsEltInfoTy &All
12531253 isSubCandidate = false ;
12541254 }
12551255
1256- // So far, Elt is never nullptr (could be in the future)
12571256 if (Elt && Sub == nullptr && skipScalarAliaser (BB, Elt)) {
12581257 // Skip scalar coalescing
12591258 isSubCandidate = false ;
@@ -1434,11 +1433,8 @@ VariableReuseAnalysis::AState VariableReuseAnalysis::getCandidateStateUse(Value
14341433 }
14351434 } else if (StoreInst *SI = dyn_cast<StoreInst>(Val)) {
14361435 retSt = AState::TARGET;
1437- } else if (CallInst *CallI = dyn_cast<CallInst>(Val)) {
1438- if (CallI->isInlineAsm ())
1439- retSt = AState::TARGET;
1440- else
1441- return AState::SKIP;
1436+ } else if (isa<CallInst>(Val)) {
1437+ return AState::SKIP;
14421438 }
14431439 }
14441440 return retSt;
@@ -1464,17 +1460,15 @@ VariableReuseAnalysis::AState VariableReuseAnalysis::getCandidateStateDef(Value
14641460 }
14651461 } else if (LoadInst *SI = dyn_cast<LoadInst>(Val)) {
14661462 return AState::TARGET;
1467- } else if (CallInst *CallI = dyn_cast<CallInst>(Val)) {
1468- if (CallI->isInlineAsm ())
1469- return AState::TARGET;
1463+ } else if (isa<CallInst>(Val)) {
14701464 return AState::SKIP;
14711465 }
14721466 return AState::OK;
14731467}
14741468
14751469// Vector alias disables extractMask optimization. This function
14761470// checks if extractMask optim can be applied. And the caller
1477- // will decide whether to favor extractMask optimization or not .
1471+ // will decide whether to favor extractMask optimization.
14781472bool VariableReuseAnalysis::isExtractMaskCandidate (Value *V) const {
14791473 auto BIT = [](int n) { return (uint32_t )(1 << n); };
14801474
0 commit comments