@@ -129,6 +129,7 @@ class VectorCombine {
129
129
bool foldShuffleOfIntrinsics (Instruction &I);
130
130
bool foldShuffleToIdentity (Instruction &I);
131
131
bool foldShuffleFromReductions (Instruction &I);
132
+ bool foldShuffleChainsToReduce (Instruction &I);
132
133
bool foldCastFromReductions (Instruction &I);
133
134
bool foldSelectShuffle (Instruction &I, bool FromReduction = false );
134
135
bool foldInterleaveIntrinsics (Instruction &I);
@@ -2910,6 +2911,192 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
2910
2911
return foldSelectShuffle (*Shuffle, true );
2911
2912
}
2912
2913
2914
+ bool VectorCombine::foldShuffleChainsToReduce (Instruction &I) {
2915
+ auto *EEI = dyn_cast<ExtractElementInst>(&I);
2916
+ if (!EEI)
2917
+ return false ;
2918
+
2919
+ std::queue<Value *> InstWorklist;
2920
+ Value *InitEEV = nullptr ;
2921
+ Intrinsic::ID CommonOp = 0 ;
2922
+
2923
+ bool IsFirstEEInst = true , IsFirstCallInst = true ;
2924
+ bool ShouldBeCallInst = true ;
2925
+
2926
+ SmallVector<Value *, 3 > PrevVecV (3 , nullptr );
2927
+ int64_t ShuffleMaskHalf = -1 , ExpectedShuffleMaskHalf = 1 ;
2928
+ int64_t VecSize = -1 ;
2929
+
2930
+ InstWorklist.push (EEI);
2931
+
2932
+ while (!InstWorklist.empty ()) {
2933
+ Value *V = InstWorklist.front ();
2934
+ InstWorklist.pop ();
2935
+
2936
+ auto *CI = dyn_cast<Instruction>(V);
2937
+ if (!CI)
2938
+ return false ;
2939
+
2940
+ if (auto *EEInst = dyn_cast<ExtractElementInst>(CI)) {
2941
+ if (!IsFirstEEInst)
2942
+ return false ;
2943
+ IsFirstEEInst = false ;
2944
+
2945
+ auto *VecOp = EEInst->getVectorOperand ();
2946
+ if (!VecOp)
2947
+ return false ;
2948
+
2949
+ auto *FVT = dyn_cast<FixedVectorType>(VecOp->getType ());
2950
+ if (!FVT)
2951
+ return false ;
2952
+
2953
+ VecSize = FVT->getNumElements ();
2954
+ if (VecSize < 2 || (VecSize % 2 ) != 0 )
2955
+ return false ;
2956
+
2957
+ auto *IndexOp = EEInst->getIndexOperand ();
2958
+ if (!IndexOp)
2959
+ return false ;
2960
+
2961
+ auto *ConstIndex = dyn_cast<ConstantInt>(IndexOp);
2962
+ if (ConstIndex && ConstIndex->getValue () != 0 )
2963
+ return false ;
2964
+
2965
+ ShuffleMaskHalf = 1 ;
2966
+ PrevVecV[2 ] = VecOp;
2967
+ InitEEV = EEInst;
2968
+ InstWorklist.push (PrevVecV[2 ]);
2969
+ } else if (auto *CallI = dyn_cast<CallInst>(CI)) {
2970
+ if (IsFirstEEInst || !ShouldBeCallInst || !PrevVecV[2 ])
2971
+ return false ;
2972
+
2973
+ if (!IsFirstCallInst &&
2974
+ any_of (PrevVecV, [](Value *VecV) { return VecV == nullptr ; }))
2975
+ return false ;
2976
+
2977
+ if (CallI != (IsFirstCallInst ? PrevVecV[2 ] : PrevVecV[0 ]))
2978
+ return false ;
2979
+ IsFirstCallInst = false ;
2980
+
2981
+ auto *II = dyn_cast<IntrinsicInst>(CallI);
2982
+ if (!II)
2983
+ return false ;
2984
+
2985
+ if (!CommonOp)
2986
+ CommonOp = II->getIntrinsicID ();
2987
+ if (II->getIntrinsicID () != CommonOp)
2988
+ return false ;
2989
+
2990
+ switch (II->getIntrinsicID ()) {
2991
+ case Intrinsic::umin:
2992
+ case Intrinsic::umax:
2993
+ case Intrinsic::smin:
2994
+ case Intrinsic::smax: {
2995
+ auto *Op0 = CallI->getOperand (0 );
2996
+ auto *Op1 = CallI->getOperand (1 );
2997
+ PrevVecV[0 ] = Op0;
2998
+ PrevVecV[1 ] = Op1;
2999
+ break ;
3000
+ }
3001
+ default :
3002
+ return false ;
3003
+ }
3004
+ ShouldBeCallInst ^= 1 ;
3005
+
3006
+ if (!isa<ShuffleVectorInst>(PrevVecV[1 ]))
3007
+ std::swap (PrevVecV[0 ], PrevVecV[1 ]);
3008
+ InstWorklist.push (PrevVecV[1 ]);
3009
+ InstWorklist.push (PrevVecV[0 ]);
3010
+ } else if (auto *SVInst = dyn_cast<ShuffleVectorInst>(CI)) {
3011
+ if (IsFirstEEInst || ShouldBeCallInst ||
3012
+ any_of (PrevVecV, [](Value *VecV) { return VecV == nullptr ; }))
3013
+ return false ;
3014
+
3015
+ if (SVInst != PrevVecV[1 ])
3016
+ return false ;
3017
+
3018
+ auto *ShuffleVec = SVInst->getOperand (0 );
3019
+ if (!ShuffleVec || ShuffleVec != PrevVecV[0 ])
3020
+ return false ;
3021
+
3022
+ SmallVector<int > CurMask;
3023
+ SVInst->getShuffleMask (CurMask);
3024
+
3025
+ if (ShuffleMaskHalf != ExpectedShuffleMaskHalf)
3026
+ return false ;
3027
+ ExpectedShuffleMaskHalf *= 2 ;
3028
+
3029
+ for (int Mask = 0 , MaskSize = CurMask.size (); Mask != MaskSize; ++Mask) {
3030
+ if (Mask < ShuffleMaskHalf && CurMask[Mask] != ShuffleMaskHalf + Mask)
3031
+ return false ;
3032
+ if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1 )
3033
+ return false ;
3034
+ }
3035
+ ShuffleMaskHalf *= 2 ;
3036
+ if (ExpectedShuffleMaskHalf == VecSize)
3037
+ break ;
3038
+ ShouldBeCallInst ^= 1 ;
3039
+ } else {
3040
+ return false ;
3041
+ }
3042
+ }
3043
+
3044
+ if (IsFirstEEInst || ShouldBeCallInst)
3045
+ return false ;
3046
+
3047
+ assert (VecSize != -1 && ExpectedShuffleMaskHalf == VecSize &&
3048
+ " Expected Match for Vector Size and Mask Half" );
3049
+
3050
+ Value *FinalVecV = PrevVecV[0 ];
3051
+ auto *FinalVecVTy = dyn_cast<FixedVectorType>(FinalVecV->getType ());
3052
+
3053
+ if (!InitEEV || !FinalVecV)
3054
+ return false ;
3055
+
3056
+ assert (FinalVecVTy && " Expected non-null value for Vector Type" );
3057
+
3058
+ Intrinsic::ID ReducedOp = 0 ;
3059
+ switch (CommonOp) {
3060
+ case Intrinsic::umin:
3061
+ ReducedOp = Intrinsic::vector_reduce_umin;
3062
+ break ;
3063
+ case Intrinsic::umax:
3064
+ ReducedOp = Intrinsic::vector_reduce_umax;
3065
+ break ;
3066
+ case Intrinsic::smin:
3067
+ ReducedOp = Intrinsic::vector_reduce_smin;
3068
+ break ;
3069
+ case Intrinsic::smax:
3070
+ ReducedOp = Intrinsic::vector_reduce_smax;
3071
+ break ;
3072
+ default :
3073
+ return false ;
3074
+ }
3075
+
3076
+ InstructionCost OrigCost = 0 ;
3077
+ unsigned int NumLevels = Log2_64 (VecSize);
3078
+
3079
+ for (unsigned int Level = 0 ; Level < NumLevels; ++Level) {
3080
+ OrigCost += TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc,
3081
+ FinalVecVTy);
3082
+ OrigCost += TTI.getArithmeticInstrCost (Instruction::ICmp, FinalVecVTy);
3083
+ }
3084
+ OrigCost += TTI.getVectorInstrCost (Instruction::ExtractElement, FinalVecVTy,
3085
+ CostKind, 0 );
3086
+
3087
+ IntrinsicCostAttributes ICA (ReducedOp, FinalVecVTy, {FinalVecV});
3088
+ InstructionCost NewCost = TTI.getIntrinsicInstrCost (ICA, CostKind);
3089
+
3090
+ if (NewCost >= OrigCost)
3091
+ return false ;
3092
+
3093
+ auto *ReducedResult =
3094
+ Builder.CreateIntrinsic (ReducedOp, {FinalVecV->getType ()}, {FinalVecV});
3095
+ replaceValue (*InitEEV, *ReducedResult);
3096
+
3097
+ return true ;
3098
+ }
3099
+
2913
3100
// / Determine if its more efficient to fold:
2914
3101
// / reduce(trunc(x)) -> trunc(reduce(x)).
2915
3102
// / reduce(sext(x)) -> sext(reduce(x)).
@@ -3621,6 +3808,9 @@ bool VectorCombine::run() {
3621
3808
MadeChange |= foldShuffleFromReductions (I);
3622
3809
MadeChange |= foldCastFromReductions (I);
3623
3810
break ;
3811
+ case Instruction::ExtractElement:
3812
+ MadeChange |= foldShuffleChainsToReduce (I);
3813
+ break ;
3624
3814
case Instruction::ICmp:
3625
3815
case Instruction::FCmp:
3626
3816
MadeChange |= foldExtractExtract (I);
0 commit comments