Skip to content

Commit 5b342d0

Browse files
bcheng0127igcbot
authored andcommitted
Updated the scalar regsiter assginment in indirect send
Use round robin to avoid WAR dependence
1 parent b668c76 commit 5b342d0

File tree

4 files changed

+65
-29
lines changed

4 files changed

+65
-29
lines changed

visa/BuildIR.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -758,10 +758,6 @@ class IR_Builder {
758758
G4_Declare *getBuiltinHWTID() const { return builtinHWTID; }
759759
G4_Declare *getBuiltinSR0Dot1() const { return builtinSR0Dot1; }
760760

761-
// The first part of s0 is reserved for Xe3+ Gather Send (indirect send)
762-
// This tests if an operand refers to Gather Send or something else
763-
bool isBuiltinSendIndirectS0(G4_Operand *op) const;
764-
765761
G4_Declare *getBuiltinT252() const { return builtinT252; }
766762
G4_Declare *getBuiltinBindlessSampler() const {
767763
return builtinBindlessSampler;

visa/BuildIRImpl.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,24 +1908,6 @@ G4_INST *IR_Builder::createInternalBfnInst(
19081908
return ii;
19091909
}
19101910

1911-
bool IR_Builder::isBuiltinSendIndirectS0(G4_Operand *op) const
1912-
{
1913-
const G4_Declare *d = op->getTopDcl();
1914-
if (d == nullptr || d->getRegFile() != G4_SCALAR)
1915-
return false;
1916-
unsigned rightBound = 0;
1917-
if (op->isDstRegRegion()) {
1918-
G4_DstRegRegion *dst = op->asDstRegRegion();
1919-
rightBound = dst->getRightBound();
1920-
} else if (op->isSrcRegRegion()) {
1921-
G4_SrcRegRegion *dst = op->asSrcRegRegion();
1922-
rightBound = dst->getRightBound();
1923-
} else {
1924-
return false;
1925-
}
1926-
return rightBound < 8 * FIRST_SURFACE_S0_QW;
1927-
}
1928-
19291911
// scratch surfaces, write the content of T251 to extended message descriptor
19301912
// exdesc holds the value of the extended message descriptor for bit [0:11]
19311913
// add (1) a0.2<1>:ud T251<1>:ud exDesc:ud {NoMask}

visa/Passes/SRSubstitution.cpp

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ static bool regSortCompareAfterRA(regMapBRA map1, regMapBRA map2) {
3939
return false;
4040
}
4141

42-
void changeToIndirectSend(G4_INST *inst, G4_Declare *s0Var, int totalRegs,
42+
void changeToIndirectSend(G4_INST *inst, G4_Declare *s0Var,
43+
unsigned short S0Sub, int totalRegs,
4344
IR_Builder &builder, bool isLargeGRF) {
4445
// Change the send instruction to sendi
4546
G4_InstSend *Send = inst->asSendInst();
@@ -56,7 +57,7 @@ void changeToIndirectSend(G4_INST *inst, G4_Declare *s0Var, int totalRegs,
5657
// Replace source 0 with scalar register
5758
G4_SrcRegRegion *headerOpnd =
5859
builder.createSrcRegRegion(Mod_src_undef, IndirGRF, s0Var->getRegVar(), 0,
59-
0, builder.getRegionScalar(), Type_UB);
60+
S0Sub * 8, builder.getRegionScalar(), Type_UB);
6061
// Replace source 1 with null.
6162
G4_SrcRegRegion *payloadToUse = builder.createNullSrc(Type_UD);
6263

@@ -286,7 +287,7 @@ bool SRSubPass::replaceWithSendi(G4_BB *bb, INST_LIST_ITER instIter,
286287
bb->insertBefore(instIter, movInst);
287288
}
288289

289-
changeToIndirectSend(inst, s0Var, totalRegs, builder, false);
290+
changeToIndirectSend(inst, s0Var, 0, totalRegs, builder, false);
290291

291292
return true;
292293
}
@@ -762,6 +763,54 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
762763
}
763764

764765

766+
unsigned short SRSubPassAfterRA::allocateS0(unsigned short UQNum) {
767+
unsigned short freeSRSub = S0Index;
768+
bool find = false;
769+
for (; freeSRSub < S0SubRegNum;
770+
freeSRSub++) {
771+
if (UQNum > 1 && (freeSRSub + 1) < S0SubRegNum) {
772+
if (!UsedS0SubReg.isSet(freeSRSub) &&
773+
!UsedS0SubReg.isSet(freeSRSub + 1)) {
774+
find = true;
775+
S0Index += 2;
776+
break;
777+
}
778+
} else if ((UQNum == 1) && !UsedS0SubReg.isSet(freeSRSub)) {
779+
find = true;
780+
S0Index++;
781+
break;
782+
}
783+
S0Index++;
784+
}
785+
786+
if (find) {
787+
return freeSRSub;
788+
}
789+
790+
for (freeSRSub = 0; freeSRSub < S0Index; freeSRSub++) {
791+
if (UQNum > 1 && (freeSRSub + 1) < S0SubRegNum) {
792+
if (!UsedS0SubReg.isSet(freeSRSub) &&
793+
!UsedS0SubReg.isSet(freeSRSub + 1)) {
794+
find = true;
795+
S0Index += 2;
796+
break;
797+
}
798+
} else if ((UQNum == 1) && !UsedS0SubReg.isSet(freeSRSub)) {
799+
find = true;
800+
S0Index++;
801+
break;
802+
}
803+
S0Index++;
804+
}
805+
806+
if (S0Index >= S0SubRegNum) {
807+
S0Index = 0;
808+
}
809+
// At most 2 Qwords required, since we reserved two QWords, should always find
810+
assert(find);
811+
return freeSRSub;
812+
}
813+
765814
// Replace the send instruction with the payload of
766815
// Insert the scalar register intialization mov instructions.
767816
bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
@@ -873,10 +922,11 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
873922
if (dstSrcRegs.isLargeGRF) {
874923
UQNum = totalRegs > (TypeSize(Type_UQ) / TypeSize(Type_UW)) ? 2 : 1;
875924
}
925+
unsigned short S0Sub = allocateS0(UQNum);
876926
G4_Declare *s0Var = builder.createTempScalar(UQNum, "S0_");
877927
s0Var->getRegVar()->setPhyReg(builder.phyregpool.getScalarReg(), 0);
878928
G4_DstRegRegion *dst =
879-
builder.createDst(s0Var->getRegVar(), 0, 0, 1, Type_UQ);
929+
builder.createDst(s0Var->getRegVar(), 0, S0Sub, 1, Type_UQ);
880930
G4_INST *movInst = nullptr;
881931
if (!dstSrcRegs.isLargeGRF) {
882932
movInst = builder.createIntrinsicAddrMovInst(
@@ -891,7 +941,7 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
891941

892942
if (UQNum > 1) {
893943
G4_DstRegRegion *dst1 =
894-
builder.createDst(s0Var->getRegVar(), 0, 1, 1, Type_UQ);
944+
builder.createDst(s0Var->getRegVar(), 0, S0Sub + 1, 1, Type_UQ);
895945
G4_INST *movInst1 = nullptr;
896946
if (!dstSrcRegs.isLargeGRF) {
897947
movInst1 = builder.createIntrinsicAddrMovInst(
@@ -916,7 +966,8 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
916966
}
917967
}
918968

919-
changeToIndirectSend(inst, s0Var, totalRegs, builder, dstSrcRegs.isLargeGRF);
969+
changeToIndirectSend(inst, s0Var, S0Sub, totalRegs, builder,
970+
dstSrcRegs.isLargeGRF);
920971

921972
return true;
922973
}
@@ -933,12 +984,15 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
933984
std::map<G4_INST *, regCandidatesBRA> candidates;
934985
std::map<G4_INST *, regCandidatesBRA>::iterator candidatesIt;
935986

987+
S0SubRegNum = builder.getScalarRegisterSizeInBytes() / 8;
988+
UsedS0SubReg.resize(S0SubRegNum);
989+
S0Index = 0;
990+
936991
INST_LIST_ITER ii = bb->begin(), iend(bb->end());
937992
unsigned candidateStart = builder.getuint32Option(vISA_IndirectInstStart);
938993
unsigned candidateEnd = builder.getuint32Option(vISA_IndirectInstEnd);
939994
while (ii != iend) {
940995
G4_INST *inst = *ii;
941-
942996
regCandidatesBRA dstSrcRegs;
943997
if (!isSRCandidateAfterRA(inst, dstSrcRegs)) {
944998
ii++;

visa/Passes/SRSubstitution.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ class SRSubPass {
9494
class SRSubPassAfterRA {
9595
IR_Builder &builder;
9696
G4_Kernel &kernel;
97+
BitSet UsedS0SubReg;
98+
unsigned short S0SubRegNum = 0;
99+
unsigned short S0Index = 0;
97100
unsigned candidateID = 0;
98101

99102
public:
@@ -110,6 +113,7 @@ class SRSubPassAfterRA {
110113
}
111114
}
112115
bool isSRCandidateAfterRA(G4_INST *inst, regCandidatesBRA &dstSrcRegs);
116+
unsigned short allocateS0(unsigned short UQNum);
113117
bool replaceWithSendiAfterRA(G4_BB *bb, INST_LIST_ITER instIter,
114118
regCandidatesBRA &dstSrcRegs);
115119
void SRSubAfterRA(G4_BB *bb);

0 commit comments

Comments
 (0)