@@ -130,6 +130,7 @@ class VectorCombine {
130
130
bool foldShuffleOfIntrinsics (Instruction &I);
131
131
bool foldShuffleToIdentity (Instruction &I);
132
132
bool foldShuffleFromReductions (Instruction &I);
133
+ bool foldShuffleChainsToReduce (Instruction &I);
133
134
bool foldCastFromReductions (Instruction &I);
134
135
bool foldSelectShuffle (Instruction &I, bool FromReduction = false );
135
136
bool foldInterleaveIntrinsics (Instruction &I);
@@ -2988,6 +2989,179 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
2988
2989
return foldSelectShuffle (*Shuffle, true );
2989
2990
}
2990
2991
2992
+ bool VectorCombine::foldShuffleChainsToReduce (Instruction &I) {
2993
+ auto *EEI = dyn_cast<ExtractElementInst>(&I);
2994
+ if (!EEI)
2995
+ return false ;
2996
+
2997
+ std::queue<Value *> InstWorklist;
2998
+ Value *InitEEV = nullptr ;
2999
+ Intrinsic::ID CommonOp = 0 ;
3000
+
3001
+ bool IsFirstCallInst = true ;
3002
+ bool ShouldBeCallInst = true ;
3003
+
3004
+ SmallVector<Value *, 3 > PrevVecV (3 , nullptr );
3005
+ int64_t ShuffleMaskHalf = -1 , ExpectedShuffleMaskHalf = 1 ;
3006
+ int64_t VecSize = -1 ;
3007
+
3008
+ Value *VecOp;
3009
+ if (!match (&I, m_ExtractElt (m_Value (VecOp), m_Zero ())))
3010
+ return false ;
3011
+
3012
+ auto *FVT = dyn_cast<FixedVectorType>(VecOp->getType ());
3013
+ if (!FVT)
3014
+ return false ;
3015
+
3016
+ VecSize = FVT->getNumElements ();
3017
+ if (VecSize < 2 || (VecSize % 2 ) != 0 )
3018
+ return false ;
3019
+
3020
+ ShuffleMaskHalf = 1 ;
3021
+ PrevVecV[2 ] = VecOp;
3022
+ InitEEV = EEI;
3023
+
3024
+ InstWorklist.push (PrevVecV[2 ]);
3025
+
3026
+ while (!InstWorklist.empty ()) {
3027
+ Value *V = InstWorklist.front ();
3028
+ InstWorklist.pop ();
3029
+
3030
+ auto *CI = dyn_cast<Instruction>(V);
3031
+ if (!CI)
3032
+ return false ;
3033
+
3034
+ if (auto *CallI = dyn_cast<CallInst>(CI)) {
3035
+ if (!ShouldBeCallInst || !PrevVecV[2 ])
3036
+ return false ;
3037
+
3038
+ if (!IsFirstCallInst &&
3039
+ any_of (PrevVecV, [](Value *VecV) { return VecV == nullptr ; }))
3040
+ return false ;
3041
+
3042
+ if (CallI != (IsFirstCallInst ? PrevVecV[2 ] : PrevVecV[0 ]))
3043
+ return false ;
3044
+ IsFirstCallInst = false ;
3045
+
3046
+ auto *II = dyn_cast<IntrinsicInst>(CallI);
3047
+ if (!II)
3048
+ return false ;
3049
+
3050
+ if (!CommonOp)
3051
+ CommonOp = II->getIntrinsicID ();
3052
+ if (II->getIntrinsicID () != CommonOp)
3053
+ return false ;
3054
+
3055
+ switch (II->getIntrinsicID ()) {
3056
+ case Intrinsic::umin:
3057
+ case Intrinsic::umax:
3058
+ case Intrinsic::smin:
3059
+ case Intrinsic::smax: {
3060
+ auto *Op0 = CallI->getOperand (0 );
3061
+ auto *Op1 = CallI->getOperand (1 );
3062
+ PrevVecV[0 ] = Op0;
3063
+ PrevVecV[1 ] = Op1;
3064
+ break ;
3065
+ }
3066
+ default :
3067
+ return false ;
3068
+ }
3069
+ ShouldBeCallInst ^= 1 ;
3070
+
3071
+ if (!isa<ShuffleVectorInst>(PrevVecV[1 ]))
3072
+ std::swap (PrevVecV[0 ], PrevVecV[1 ]);
3073
+ InstWorklist.push (PrevVecV[1 ]);
3074
+ InstWorklist.push (PrevVecV[0 ]);
3075
+ } else if (auto *SVInst = dyn_cast<ShuffleVectorInst>(CI)) {
3076
+ if (ShouldBeCallInst ||
3077
+ any_of (PrevVecV, [](Value *VecV) { return VecV == nullptr ; }))
3078
+ return false ;
3079
+
3080
+ if (SVInst != PrevVecV[1 ])
3081
+ return false ;
3082
+
3083
+ auto *ShuffleVec = SVInst->getOperand (0 );
3084
+ if (!ShuffleVec || ShuffleVec != PrevVecV[0 ])
3085
+ return false ;
3086
+
3087
+ SmallVector<int > CurMask;
3088
+ SVInst->getShuffleMask (CurMask);
3089
+
3090
+ if (ShuffleMaskHalf != ExpectedShuffleMaskHalf)
3091
+ return false ;
3092
+ ExpectedShuffleMaskHalf *= 2 ;
3093
+
3094
+ for (int Mask = 0 , MaskSize = CurMask.size (); Mask != MaskSize; ++Mask) {
3095
+ if (Mask < ShuffleMaskHalf && CurMask[Mask] != ShuffleMaskHalf + Mask)
3096
+ return false ;
3097
+ if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1 )
3098
+ return false ;
3099
+ }
3100
+ ShuffleMaskHalf *= 2 ;
3101
+ if (ExpectedShuffleMaskHalf == VecSize)
3102
+ break ;
3103
+ ShouldBeCallInst ^= 1 ;
3104
+ } else {
3105
+ return false ;
3106
+ }
3107
+ }
3108
+
3109
+ if (ShouldBeCallInst)
3110
+ return false ;
3111
+
3112
+ assert (VecSize != -1 && ExpectedShuffleMaskHalf == VecSize &&
3113
+ " Expected Match for Vector Size and Mask Half" );
3114
+
3115
+ Value *FinalVecV = PrevVecV[0 ];
3116
+ auto *FinalVecVTy = dyn_cast<FixedVectorType>(FinalVecV->getType ());
3117
+
3118
+ if (!InitEEV || !FinalVecV)
3119
+ return false ;
3120
+
3121
+ assert (FinalVecVTy && " Expected non-null value for Vector Type" );
3122
+
3123
+ Intrinsic::ID ReducedOp = 0 ;
3124
+ switch (CommonOp) {
3125
+ case Intrinsic::umin:
3126
+ ReducedOp = Intrinsic::vector_reduce_umin;
3127
+ break ;
3128
+ case Intrinsic::umax:
3129
+ ReducedOp = Intrinsic::vector_reduce_umax;
3130
+ break ;
3131
+ case Intrinsic::smin:
3132
+ ReducedOp = Intrinsic::vector_reduce_smin;
3133
+ break ;
3134
+ case Intrinsic::smax:
3135
+ ReducedOp = Intrinsic::vector_reduce_smax;
3136
+ break ;
3137
+ default :
3138
+ return false ;
3139
+ }
3140
+
3141
+ InstructionCost OrigCost = 0 ;
3142
+ unsigned int NumLevels = Log2_64 (VecSize);
3143
+
3144
+ for (unsigned int Level = 0 ; Level < NumLevels; ++Level) {
3145
+ OrigCost += TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc,
3146
+ FinalVecVTy, FinalVecVTy);
3147
+ OrigCost += TTI.getArithmeticInstrCost (Instruction::ICmp, FinalVecVTy);
3148
+ }
3149
+ OrigCost += TTI.getVectorInstrCost (Instruction::ExtractElement, FinalVecVTy,
3150
+ CostKind, 0 );
3151
+
3152
+ IntrinsicCostAttributes ICA (ReducedOp, FinalVecVTy, {FinalVecV});
3153
+ InstructionCost NewCost = TTI.getIntrinsicInstrCost (ICA, CostKind);
3154
+
3155
+ if (NewCost >= OrigCost)
3156
+ return false ;
3157
+
3158
+ auto *ReducedResult =
3159
+ Builder.CreateIntrinsic (ReducedOp, {FinalVecV->getType ()}, {FinalVecV});
3160
+ replaceValue (*InitEEV, *ReducedResult);
3161
+
3162
+ return true ;
3163
+ }
3164
+
2991
3165
// / Determine if its more efficient to fold:
2992
3166
// / reduce(trunc(x)) -> trunc(reduce(x)).
2993
3167
// / reduce(sext(x)) -> sext(reduce(x)).
@@ -3705,6 +3879,9 @@ bool VectorCombine::run() {
3705
3879
MadeChange |= foldShuffleFromReductions (I);
3706
3880
MadeChange |= foldCastFromReductions (I);
3707
3881
break ;
3882
+ case Instruction::ExtractElement:
3883
+ MadeChange |= foldShuffleChainsToReduce (I);
3884
+ break ;
3708
3885
case Instruction::ICmp:
3709
3886
case Instruction::FCmp:
3710
3887
MadeChange |= foldExtractExtract (I);
0 commit comments