@@ -223,7 +223,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
223
223
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
224
224
; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
225
225
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
226
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
227
226
; GFX1164-NEXT: v_mad_u32_u24 v0, v0, 5, s2
228
227
; GFX1164-NEXT: s_mov_b32 s2, -1
229
228
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -257,7 +256,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
257
256
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
258
257
; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
259
258
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
260
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
261
259
; GFX1132-NEXT: v_mad_u32_u24 v0, v0, 5, s2
262
260
; GFX1132-NEXT: s_mov_b32 s2, -1
263
261
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -293,7 +291,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
293
291
; GFX1264-NEXT: s_wait_kmcnt 0x0
294
292
; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
295
293
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
296
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
297
294
; GFX1264-NEXT: v_mad_u32_u24 v0, v0, 5, s2
298
295
; GFX1264-NEXT: s_mov_b32 s2, -1
299
296
; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -326,7 +323,6 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace
326
323
; GFX1232-NEXT: s_wait_kmcnt 0x0
327
324
; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
328
325
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
329
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
330
326
; GFX1232-NEXT: v_mad_u32_u24 v0, v0, 5, s2
331
327
; GFX1232-NEXT: s_mov_b32 s2, -1
332
328
; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -937,7 +933,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
937
933
; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
938
934
; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
939
935
; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
940
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
941
936
; GFX1164_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
942
937
; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1
943
938
; GFX1164_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -985,7 +980,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
985
980
; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
986
981
; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
987
982
; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
988
- ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
989
983
; GFX1132_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
990
984
; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1
991
985
; GFX1132_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -1034,7 +1028,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
1034
1028
; GFX1264_ITERATIVE-NEXT: s_wait_kmcnt 0x0
1035
1029
; GFX1264_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
1036
1030
; GFX1264_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
1037
- ; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
1038
1031
; GFX1264_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
1039
1032
; GFX1264_ITERATIVE-NEXT: s_mov_b32 s2, -1
1040
1033
; GFX1264_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -1082,7 +1075,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
1082
1075
; GFX1232_ITERATIVE-NEXT: s_wait_kmcnt 0x0
1083
1076
; GFX1232_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
1084
1077
; GFX1232_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
1085
- ; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
1086
1078
; GFX1232_ITERATIVE-NEXT: v_add_nc_u32_e32 v0, s2, v0
1087
1079
; GFX1232_ITERATIVE-NEXT: s_mov_b32 s2, -1
1088
1080
; GFX1232_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -2159,12 +2151,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
2159
2151
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
2160
2152
; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
2161
2153
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
2162
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_1)
2154
+ ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_1)
2163
2155
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
2164
2156
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
2165
2157
; GFX1164-NEXT: s_mov_b32 s2, -1
2166
2158
; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2167
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
2168
2159
; GFX1164-NEXT: v_mov_b32_e32 v1, v3
2169
2160
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
2170
2161
; GFX1164-NEXT: s_endpgm
@@ -2203,12 +2194,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
2203
2194
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
2204
2195
; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
2205
2196
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
2206
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_1)
2197
+ ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_1)
2207
2198
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
2208
2199
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
2209
2200
; GFX1132-NEXT: s_mov_b32 s2, -1
2210
2201
; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2211
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
2212
2202
; GFX1132-NEXT: v_mov_b32_e32 v1, v3
2213
2203
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
2214
2204
; GFX1132-NEXT: s_endpgm
@@ -2246,7 +2236,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
2246
2236
; GFX1264-NEXT: s_wait_kmcnt 0x0
2247
2237
; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
2248
2238
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
2249
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2239
+ ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
2250
2240
; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
2251
2241
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
2252
2242
; GFX1264-NEXT: s_mov_b32 s2, -1
@@ -2286,7 +2276,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
2286
2276
; GFX1232-NEXT: s_wait_kmcnt 0x0
2287
2277
; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
2288
2278
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
2289
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2279
+ ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
2290
2280
; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
2291
2281
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
2292
2282
; GFX1232-NEXT: s_mov_b32 s2, -1
@@ -4084,7 +4074,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
4084
4074
; GFX1164-NEXT: v_mul_lo_u32 v0, s8, v0
4085
4075
; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
4086
4076
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
4087
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
4088
4077
; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4089
4078
; GFX1164-NEXT: s_mov_b32 s2, -1
4090
4079
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4121,7 +4110,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
4121
4110
; GFX1132-NEXT: v_mul_lo_u32 v0, s4, v0
4122
4111
; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
4123
4112
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
4124
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
4125
4113
; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4126
4114
; GFX1132-NEXT: s_mov_b32 s2, -1
4127
4115
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4160,7 +4148,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
4160
4148
; GFX1264-NEXT: v_mul_lo_u32 v0, s8, v0
4161
4149
; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
4162
4150
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
4163
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
4164
4151
; GFX1264-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4165
4152
; GFX1264-NEXT: s_mov_b32 s2, -1
4166
4153
; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4198,7 +4185,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
4198
4185
; GFX1232-NEXT: v_mul_lo_u32 v0, s4, v0
4199
4186
; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
4200
4187
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
4201
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
4202
4188
; GFX1232-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4203
4189
; GFX1232-NEXT: s_mov_b32 s2, -1
4204
4190
; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4482,7 +4468,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
4482
4468
; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
4483
4469
; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
4484
4470
; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4485
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
4486
4471
; GFX1164_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4487
4472
; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1
4488
4473
; GFX1164_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4530,7 +4515,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
4530
4515
; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0)
4531
4516
; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
4532
4517
; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4533
- ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
4534
4518
; GFX1132_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4535
4519
; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1
4536
4520
; GFX1132_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4579,7 +4563,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
4579
4563
; GFX1264_ITERATIVE-NEXT: s_wait_kmcnt 0x0
4580
4564
; GFX1264_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
4581
4565
; GFX1264_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4582
- ; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
4583
4566
; GFX1264_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4584
4567
; GFX1264_ITERATIVE-NEXT: s_mov_b32 s2, -1
4585
4568
; GFX1264_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4627,7 +4610,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
4627
4610
; GFX1232_ITERATIVE-NEXT: s_wait_kmcnt 0x0
4628
4611
; GFX1232_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v1
4629
4612
; GFX1232_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000
4630
- ; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1)
4631
4613
; GFX1232_ITERATIVE-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4632
4614
; GFX1232_ITERATIVE-NEXT: s_mov_b32 s2, -1
4633
4615
; GFX1232_ITERATIVE-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -5734,7 +5716,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
5734
5716
; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
5735
5717
; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v3
5736
5718
; GFX1164-NEXT: s_mov_b32 s2, -1
5737
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_2)
5738
5719
; GFX1164-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v5, vcc
5739
5720
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
5740
5721
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5779,7 +5760,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
5779
5760
; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
5780
5761
; GFX1132-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
5781
5762
; GFX1132-NEXT: s_mov_b32 s2, -1
5782
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_2)
5783
5763
; GFX1132-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v5, vcc_lo
5784
5764
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
5785
5765
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5823,7 +5803,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
5823
5803
; GFX1264-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
5824
5804
; GFX1264-NEXT: v_sub_co_u32 v0, vcc, s2, v3
5825
5805
; GFX1264-NEXT: s_mov_b32 s2, -1
5826
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_2)
5827
5806
; GFX1264-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v4, vcc
5828
5807
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
5829
5808
; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@@ -5866,7 +5845,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
5866
5845
; GFX1232-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
5867
5846
; GFX1232-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
5868
5847
; GFX1232-NEXT: s_mov_b32 s2, -1
5869
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_2)
5870
5848
; GFX1232-NEXT: v_sub_co_ci_u32_e64 v1, null, s3, v4, vcc_lo
5871
5849
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
5872
5850
; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
0 commit comments