@@ -3390,6 +3390,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
3390
3390
; GFX1264_DPP-NEXT: v_readlane_b32 s2, v2, 31
3391
3391
; GFX1264_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
3392
3392
; GFX1264_DPP-NEXT: s_wait_alu 0xf1ff
3393
+ ; GFX1264_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
3393
3394
; GFX1264_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
3394
3395
; GFX1264_DPP-NEXT: v_mov_b32_e32 v4, 0
3395
3396
; GFX1264_DPP-NEXT: s_wait_alu 0xfffd
@@ -3445,6 +3446,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
3445
3446
; GFX1264_DPP-NEXT: v_mov_b32_e32 v9, v5
3446
3447
; GFX1264_DPP-NEXT: v_readfirstlane_b32 s3, v7
3447
3448
; GFX1264_DPP-NEXT: s_wait_alu 0xf1ff
3449
+ ; GFX1264_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
3448
3450
; GFX1264_DPP-NEXT: v_add_co_u32 v6, vcc, s2, v8
3449
3451
; GFX1264_DPP-NEXT: s_mov_b32 s2, s6
3450
3452
; GFX1264_DPP-NEXT: s_wait_alu 0xfffd
@@ -6954,6 +6956,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
6954
6956
; GFX1264_DPP-NEXT: v_readlane_b32 s2, v2, 31
6955
6957
; GFX1264_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
6956
6958
; GFX1264_DPP-NEXT: s_wait_alu 0xf1ff
6959
+ ; GFX1264_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
6957
6960
; GFX1264_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
6958
6961
; GFX1264_DPP-NEXT: v_mov_b32_e32 v4, 0
6959
6962
; GFX1264_DPP-NEXT: s_wait_alu 0xfffd
@@ -7009,6 +7012,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
7009
7012
; GFX1264_DPP-NEXT: v_mov_b32_e32 v9, v5
7010
7013
; GFX1264_DPP-NEXT: v_readfirstlane_b32 s3, v7
7011
7014
; GFX1264_DPP-NEXT: s_wait_alu 0xf1ff
7015
+ ; GFX1264_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
7012
7016
; GFX1264_DPP-NEXT: v_sub_co_u32 v6, vcc, s2, v8
7013
7017
; GFX1264_DPP-NEXT: s_mov_b32 s2, s6
7014
7018
; GFX1264_DPP-NEXT: s_wait_alu 0xfffd
@@ -8233,6 +8237,7 @@ define amdgpu_kernel void @uniform_add_i8(ptr addrspace(1) %result, ptr addrspac
8233
8237
; GFX1264-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
8234
8238
; GFX1264-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
8235
8239
; GFX1264-TRUE16-NEXT: s_wait_alu 0xf1ff
8240
+ ; GFX1264-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
8236
8241
; GFX1264-TRUE16-NEXT: v_mad_u16 v0.l, s10, v4.l, s2
8237
8242
; GFX1264-TRUE16-NEXT: s_mov_b32 s2, -1
8238
8243
; GFX1264-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], null
@@ -8298,6 +8303,7 @@ define amdgpu_kernel void @uniform_add_i8(ptr addrspace(1) %result, ptr addrspac
8298
8303
; GFX1264-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
8299
8304
; GFX1264-FAKE16-NEXT: v_readfirstlane_b32 s2, v0
8300
8305
; GFX1264-FAKE16-NEXT: s_wait_alu 0xf1ff
8306
+ ; GFX1264-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
8301
8307
; GFX1264-FAKE16-NEXT: v_mad_u16 v0, s10, v4, s2
8302
8308
; GFX1264-FAKE16-NEXT: s_mov_b32 s2, -1
8303
8309
; GFX1264-FAKE16-NEXT: buffer_store_b8 v0, off, s[0:3], null
@@ -8364,6 +8370,7 @@ define amdgpu_kernel void @uniform_add_i8(ptr addrspace(1) %result, ptr addrspac
8364
8370
; GFX1232-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
8365
8371
; GFX1232-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
8366
8372
; GFX1232-TRUE16-NEXT: s_wait_alu 0xf1ff
8373
+ ; GFX1232-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
8367
8374
; GFX1232-TRUE16-NEXT: v_mad_u16 v0.l, s8, v4.l, s2
8368
8375
; GFX1232-TRUE16-NEXT: s_mov_b32 s2, -1
8369
8376
; GFX1232-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], null
@@ -8429,6 +8436,7 @@ define amdgpu_kernel void @uniform_add_i8(ptr addrspace(1) %result, ptr addrspac
8429
8436
; GFX1232-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
8430
8437
; GFX1232-FAKE16-NEXT: v_readfirstlane_b32 s2, v0
8431
8438
; GFX1232-FAKE16-NEXT: s_wait_alu 0xf1ff
8439
+ ; GFX1232-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
8432
8440
; GFX1232-FAKE16-NEXT: v_mad_u16 v0, s8, v4, s2
8433
8441
; GFX1232-FAKE16-NEXT: s_mov_b32 s2, -1
8434
8442
; GFX1232-FAKE16-NEXT: buffer_store_b8 v0, off, s[0:3], null
@@ -8818,7 +8826,7 @@ define amdgpu_kernel void @uniform_or_i16(ptr addrspace(1) %result, ptr addrspac
8818
8826
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
8819
8827
; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
8820
8828
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
8821
- ; GFX7LESS-NEXT: ; implicit-def: $vgpr0
8829
+ ; GFX7LESS-NEXT: ; implicit-def: $vgpr0
8822
8830
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
8823
8831
; GFX7LESS-NEXT: s_cbranch_execz .LBB15_2
8824
8832
; GFX7LESS-NEXT: ; %bb.1:
@@ -9328,7 +9336,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
9328
9336
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0
9329
9337
; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v4, s7, v0
9330
9338
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9331
- ; GFX7LESS-NEXT: ; implicit-def: $vgpr0
9339
+ ; GFX7LESS-NEXT: ; implicit-def: $vgpr0
9332
9340
; GFX7LESS-NEXT: s_and_saveexec_b64 s[8:9], vcc
9333
9341
; GFX7LESS-NEXT: s_cbranch_execz .LBB16_4
9334
9342
; GFX7LESS-NEXT: ; %bb.1:
@@ -9931,6 +9939,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
9931
9939
; GFX1264-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
9932
9940
; GFX1264-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
9933
9941
; GFX1264-TRUE16-NEXT: s_wait_alu 0xf1ff
9942
+ ; GFX1264-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
9934
9943
; GFX1264-TRUE16-NEXT: v_mad_u16 v0.l, s10, v4.l, s2
9935
9944
; GFX1264-TRUE16-NEXT: s_mov_b32 s2, -1
9936
9945
; GFX1264-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
@@ -9996,6 +10005,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
9996
10005
; GFX1264-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
9997
10006
; GFX1264-FAKE16-NEXT: v_readfirstlane_b32 s2, v0
9998
10007
; GFX1264-FAKE16-NEXT: s_wait_alu 0xf1ff
10008
+ ; GFX1264-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
9999
10009
; GFX1264-FAKE16-NEXT: v_mad_u16 v0, s10, v4, s2
10000
10010
; GFX1264-FAKE16-NEXT: s_mov_b32 s2, -1
10001
10011
; GFX1264-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
@@ -10062,6 +10072,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
10062
10072
; GFX1232-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
10063
10073
; GFX1232-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
10064
10074
; GFX1232-TRUE16-NEXT: s_wait_alu 0xf1ff
10075
+ ; GFX1232-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
10065
10076
; GFX1232-TRUE16-NEXT: v_mad_u16 v0.l, s8, v4.l, s2
10066
10077
; GFX1232-TRUE16-NEXT: s_mov_b32 s2, -1
10067
10078
; GFX1232-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
@@ -10127,6 +10138,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
10127
10138
; GFX1232-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
10128
10139
; GFX1232-FAKE16-NEXT: v_readfirstlane_b32 s2, v0
10129
10140
; GFX1232-FAKE16-NEXT: s_wait_alu 0xf1ff
10141
+ ; GFX1232-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
10130
10142
; GFX1232-FAKE16-NEXT: v_mad_u16 v0, s8, v4, s2
10131
10143
; GFX1232-FAKE16-NEXT: s_mov_b32 s2, -1
10132
10144
; GFX1232-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
@@ -12703,6 +12715,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add
12703
12715
; GFX1264-FAKE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff
12704
12716
; GFX1264-FAKE16-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0
12705
12717
; GFX1264-FAKE16-NEXT: s_wait_alu 0xfffd
12718
+ ; GFX1264-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
12706
12719
; GFX1264-FAKE16-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
12707
12720
; GFX1264-FAKE16-NEXT: s_wait_alu 0xf1ff
12708
12721
; GFX1264-FAKE16-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[0:1]
@@ -12816,6 +12829,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add
12816
12829
; GFX1232-FAKE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff
12817
12830
; GFX1232-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v0, v0
12818
12831
; GFX1232-FAKE16-NEXT: s_wait_alu 0xfffd
12832
+ ; GFX1232-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
12819
12833
; GFX1232-FAKE16-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc_lo
12820
12834
; GFX1232-FAKE16-NEXT: s_wait_alu 0xf1ff
12821
12835
; GFX1232-FAKE16-NEXT: v_cndmask_b32_e64 v0, v3, v5, s0
0 commit comments