Skip to content

Commit cc4ecd8

Browse files
committed
add VALU->SGPR<-SALU case
1 parent 3a30594 commit cc4ecd8

12 files changed

+185
-279
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,12 @@ class AMDGPUInsertDelayAlu {
4949

5050
static bool instructionWaitsForSGPRWrites(const MachineInstr &MI) {
5151
// These instruction types wait for VA_SDST==0 before issuing.
52-
const uint64_t VA_SDST_0 = SIInstrFlags::SALU | SIInstrFlags::SMRD;
53-
if (MI.getDesc().TSFlags & VA_SDST_0) {
54-
for (auto &Op : MI.uses())
52+
uint64_t MIFlags = MI.getDesc().TSFlags;
53+
if (MIFlags & SIInstrFlags::SMRD)
54+
return true;
55+
56+
if (MIFlags & SIInstrFlags::SALU) {
57+
for (auto &Op : MI.operands())
5558
if (Op.isReg())
5659
return true;
5760
}

llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
223223
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
224224
; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
225225
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
226-
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
227226
; GFX1164-NEXT: v_mad_u32_u24 v0, v0, 5, s2
228227
; GFX1164-NEXT: s_mov_b32 s2, -1
229228
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -257,7 +256,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
257256
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
258257
; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
259258
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
260-
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
261259
; GFX1132-NEXT: v_mad_u32_u24 v0, v0, 5, s2
262260
; GFX1132-NEXT: s_mov_b32 s2, -1
263261
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -293,7 +291,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
293291
; GFX1264-NEXT: s_wait_kmcnt 0x0
294292
; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
295293
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
296-
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
297294
; GFX1264-NEXT: v_mad_u32_u24 v0, v0, 5, s2
298295
; GFX1264-NEXT: s_mov_b32 s2, -1
299296
; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -326,7 +323,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
326323
; GFX1232-NEXT: s_wait_kmcnt 0x0
327324
; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
328325
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
329-
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
330326
; GFX1232-NEXT: v_mad_u32_u24 v0, v0, 5, s2
331327
; GFX1232-NEXT: s_mov_b32 s2, -1
332328
; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -937,7 +933,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
937933
; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
938934
; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
939935
; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
940-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
941936
; GFX1164_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
942937
; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1
943938
; GFX1164_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -985,7 +980,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
985980
; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
986981
; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
987982
; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
988-
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
989983
; GFX1132_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
990984
; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1
991985
; GFX1132_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -1034,7 +1028,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
10341028
; GFX1264_ITERATIVE-NEXT: s_wait_kmcnt 0x0
10351029
; GFX1264_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
10361030
; GFX1264_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
1037-
; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
10381031
; GFX1264_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
10391032
; GFX1264_ITERATIVE-NEXT: s_mov_b32 s2, -1
10401033
; GFX1264_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -1082,7 +1075,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
10821075
; GFX1232_ITERATIVE-NEXT: s_wait_kmcnt 0x0
10831076
; GFX1232_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
10841077
; GFX1232_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
1085-
; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
10861078
; GFX1232_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
10871079
; GFX1232_ITERATIVE-NEXT: s_mov_b32 s2, -1
10881080
; GFX1232_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -2159,12 +2151,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
21592151
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
21602152
; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
21612153
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
2162-
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2154+
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
21632155
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
21642156
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
21652157
; GFX1164-NEXT: s_mov_b32 s2, -1
21662158
; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2167-
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
21682159
; GFX1164-NEXT: v_mov_b32_e32 v1, v3
21692160
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
21702161
; GFX1164-NEXT: s_endpgm
@@ -2203,12 +2194,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22032194
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
22042195
; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
22052196
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
2206-
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2197+
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
22072198
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
22082199
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
22092200
; GFX1132-NEXT: s_mov_b32 s2, -1
22102201
; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2211-
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
22122202
; GFX1132-NEXT: v_mov_b32_e32 v1, v3
22132203
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
22142204
; GFX1132-NEXT: s_endpgm
@@ -2246,7 +2236,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22462236
; GFX1264-NEXT: s_wait_kmcnt 0x0
22472237
; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
22482238
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
2249-
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2239+
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
22502240
; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
22512241
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
22522242
; GFX1264-NEXT: s_mov_b32 s2, -1
@@ -2286,7 +2276,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22862276
; GFX1232-NEXT: s_wait_kmcnt 0x0
22872277
; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
22882278
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
2289-
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2279+
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
22902280
; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
22912281
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
22922282
; GFX1232-NEXT: s_mov_b32 s2, -1
@@ -4084,7 +4074,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
40844074
; GFX1164-NEXT: v_mul_lo_u32 v0, s8, v0
40854075
; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
40864076
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
4087-
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
40884077
; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s2, v0
40894078
; GFX1164-NEXT: s_mov_b32 s2, -1
40904079
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4121,7 +4110,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41214110
; GFX1132-NEXT: v_mul_lo_u32 v0, s4, v0
41224111
; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
41234112
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
4124-
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
41254113
; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s2, v0
41264114
; GFX1132-NEXT: s_mov_b32 s2, -1
41274115
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4160,7 +4148,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41604148
; GFX1264-NEXT: v_mul_lo_u32 v0, s8, v0
41614149
; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
41624150
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
4163-
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
41644151
; GFX1264-NEXT: v_sub_nc_u32_e32 v0, s2, v0
41654152
; GFX1264-NEXT: s_mov_b32 s2, -1
41664153
; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4198,7 +4185,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41984185
; GFX1232-NEXT: v_mul_lo_u32 v0, s4, v0
41994186
; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
42004187
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
4201-
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
42024188
; GFX1232-NEXT: v_sub_nc_u32_e32 v0, s2, v0
42034189
; GFX1232-NEXT: s_mov_b32 s2, -1
42044190
; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4482,7 +4468,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
44824468
; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
44834469
; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
44844470
; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4485-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
44864471
; GFX1164_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
44874472
; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1
44884473
; GFX1164_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4530,7 +4515,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
45304515
; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
45314516
; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
45324517
; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4533-
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
45344518
; GFX1132_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
45354519
; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1
45364520
; GFX1132_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4579,7 +4563,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
45794563
; GFX1264_ITERATIVE-NEXT: s_wait_kmcnt 0x0
45804564
; GFX1264_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
45814565
; GFX1264_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4582-
; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
45834566
; GFX1264_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
45844567
; GFX1264_ITERATIVE-NEXT: s_mov_b32 s2, -1
45854568
; GFX1264_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4627,7 +4610,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
46274610
; GFX1232_ITERATIVE-NEXT: s_wait_kmcnt 0x0
46284611
; GFX1232_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
46294612
; GFX1232_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4630-
; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
46314613
; GFX1232_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
46324614
; GFX1232_ITERATIVE-NEXT: s_mov_b32 s2, -1
46334615
; GFX1232_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -5734,7 +5716,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
57345716
; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
57355717
; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v3
57365718
; GFX1164-NEXT: s_mov_b32 s2, -1
5737-
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_2)
57385719
; GFX1164-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v5, vcc
57395720
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
57405721
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5779,7 +5760,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
57795760
; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
57805761
; GFX1132-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
57815762
; GFX1132-NEXT: s_mov_b32 s2, -1
5782-
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
57835763
; GFX1132-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v5, vcc_lo
57845764
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
57855765
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5823,7 +5803,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
58235803
; GFX1264-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
58245804
; GFX1264-NEXT: v_sub_co_u32 v0, vcc, s2, v3
58255805
; GFX1264-NEXT: s_mov_b32 s2, -1
5826-
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_2)
58275806
; GFX1264-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v4, vcc
58285807
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
58295808
; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@@ -5866,7 +5845,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
58665845
; GFX1232-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
58675846
; GFX1232-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
58685847
; GFX1232-NEXT: s_mov_b32 s2, -1
5869-
; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_2)
58705848
; GFX1232-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v4, vcc_lo
58715849
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
58725850
; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null

0 commit comments

Comments
 (0)