@@ -39,7 +39,8 @@ static bool regSortCompareAfterRA(regMapBRA map1, regMapBRA map2) {
39
39
return false ;
40
40
}
41
41
42
- void changeToIndirectSend (G4_INST *inst, G4_Declare *s0Var, int totalRegs,
42
+ void changeToIndirectSend (G4_INST *inst, G4_Declare *s0Var,
43
+ unsigned short S0Sub, int totalRegs,
43
44
IR_Builder &builder, bool isLargeGRF) {
44
45
// Change the send instruction to sendi
45
46
G4_InstSend *Send = inst->asSendInst ();
@@ -56,7 +57,7 @@ void changeToIndirectSend(G4_INST *inst, G4_Declare *s0Var, int totalRegs,
56
57
// Replace source 0 with scalar register
57
58
G4_SrcRegRegion *headerOpnd =
58
59
builder.createSrcRegRegion (Mod_src_undef, IndirGRF, s0Var->getRegVar (), 0 ,
59
- 0 , builder.getRegionScalar (), Type_UB);
60
+ S0Sub * 8 , builder.getRegionScalar (), Type_UB);
60
61
// Replace source 1 with null.
61
62
G4_SrcRegRegion *payloadToUse = builder.createNullSrc (Type_UD);
62
63
@@ -286,7 +287,7 @@ bool SRSubPass::replaceWithSendi(G4_BB *bb, INST_LIST_ITER instIter,
286
287
bb->insertBefore (instIter, movInst);
287
288
}
288
289
289
- changeToIndirectSend (inst, s0Var, totalRegs, builder, false );
290
+ changeToIndirectSend (inst, s0Var, 0 , totalRegs, builder, false );
290
291
291
292
return true ;
292
293
}
@@ -762,6 +763,54 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
762
763
}
763
764
764
765
766
+ unsigned short SRSubPassAfterRA::allocateS0 (unsigned short UQNum) {
767
+ unsigned short freeSRSub = S0Index;
768
+ bool find = false ;
769
+ for (; freeSRSub < S0SubRegNum;
770
+ freeSRSub++) {
771
+ if (UQNum > 1 && (freeSRSub + 1 ) < S0SubRegNum) {
772
+ if (!UsedS0SubReg.isSet (freeSRSub) &&
773
+ !UsedS0SubReg.isSet (freeSRSub + 1 )) {
774
+ find = true ;
775
+ S0Index += 2 ;
776
+ break ;
777
+ }
778
+ } else if ((UQNum == 1 ) && !UsedS0SubReg.isSet (freeSRSub)) {
779
+ find = true ;
780
+ S0Index++;
781
+ break ;
782
+ }
783
+ S0Index++;
784
+ }
785
+
786
+ if (find) {
787
+ return freeSRSub;
788
+ }
789
+
790
+ for (freeSRSub = 0 ; freeSRSub < S0Index; freeSRSub++) {
791
+ if (UQNum > 1 && (freeSRSub + 1 ) < S0SubRegNum) {
792
+ if (!UsedS0SubReg.isSet (freeSRSub) &&
793
+ !UsedS0SubReg.isSet (freeSRSub + 1 )) {
794
+ find = true ;
795
+ S0Index += 2 ;
796
+ break ;
797
+ }
798
+ } else if ((UQNum == 1 ) && !UsedS0SubReg.isSet (freeSRSub)) {
799
+ find = true ;
800
+ S0Index++;
801
+ break ;
802
+ }
803
+ S0Index++;
804
+ }
805
+
806
+ if (S0Index >= S0SubRegNum) {
807
+ S0Index = 0 ;
808
+ }
809
+ // At most 2 Qwords required, since we reserved two QWords, should always find
810
+ assert (find);
811
+ return freeSRSub;
812
+ }
813
+
765
814
// Replace the send instruction with the payload of
766
815
// Insert the scalar register intialization mov instructions.
767
816
bool SRSubPassAfterRA::replaceWithSendiAfterRA (G4_BB *bb,
@@ -873,10 +922,11 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
873
922
if (dstSrcRegs.isLargeGRF ) {
874
923
UQNum = totalRegs > (TypeSize (Type_UQ) / TypeSize (Type_UW)) ? 2 : 1 ;
875
924
}
925
+ unsigned short S0Sub = allocateS0 (UQNum);
876
926
G4_Declare *s0Var = builder.createTempScalar (UQNum, " S0_" );
877
927
s0Var->getRegVar ()->setPhyReg (builder.phyregpool .getScalarReg (), 0 );
878
928
G4_DstRegRegion *dst =
879
- builder.createDst (s0Var->getRegVar (), 0 , 0 , 1 , Type_UQ);
929
+ builder.createDst (s0Var->getRegVar (), 0 , S0Sub , 1 , Type_UQ);
880
930
G4_INST *movInst = nullptr ;
881
931
if (!dstSrcRegs.isLargeGRF ) {
882
932
movInst = builder.createIntrinsicAddrMovInst (
@@ -891,7 +941,7 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
891
941
892
942
if (UQNum > 1 ) {
893
943
G4_DstRegRegion *dst1 =
894
- builder.createDst (s0Var->getRegVar (), 0 , 1 , 1 , Type_UQ);
944
+ builder.createDst (s0Var->getRegVar (), 0 , S0Sub + 1 , 1 , Type_UQ);
895
945
G4_INST *movInst1 = nullptr ;
896
946
if (!dstSrcRegs.isLargeGRF ) {
897
947
movInst1 = builder.createIntrinsicAddrMovInst (
@@ -916,7 +966,8 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
916
966
}
917
967
}
918
968
919
- changeToIndirectSend (inst, s0Var, totalRegs, builder, dstSrcRegs.isLargeGRF );
969
+ changeToIndirectSend (inst, s0Var, S0Sub, totalRegs, builder,
970
+ dstSrcRegs.isLargeGRF );
920
971
921
972
return true ;
922
973
}
@@ -933,12 +984,15 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
933
984
std::map<G4_INST *, regCandidatesBRA> candidates;
934
985
std::map<G4_INST *, regCandidatesBRA>::iterator candidatesIt;
935
986
987
+ S0SubRegNum = builder.getScalarRegisterSizeInBytes () / 8 ;
988
+ UsedS0SubReg.resize (S0SubRegNum);
989
+ S0Index = 0 ;
990
+
936
991
INST_LIST_ITER ii = bb->begin (), iend (bb->end ());
937
992
unsigned candidateStart = builder.getuint32Option (vISA_IndirectInstStart);
938
993
unsigned candidateEnd = builder.getuint32Option (vISA_IndirectInstEnd);
939
994
while (ii != iend) {
940
995
G4_INST *inst = *ii;
941
-
942
996
regCandidatesBRA dstSrcRegs;
943
997
if (!isSRCandidateAfterRA (inst, dstSrcRegs)) {
944
998
ii++;
0 commit comments