Skip to content

Commit 2d8d232

Browse files
committed
[AMDGPU][SDAG] Enable ISD::PTRADD for 64-bit AS by default
Also removes the command line option to control this feature. There seem to be mainly two kinds of test changes: - Some operands of addition instructions are swapped; that is to be expected since PTRADD is not commutative. - Improvements in code generation, probably because the legacy lowering enabled some transformations that were sometimes harmful. For SWDEV-516125.
1 parent f0f708e commit 2d8d232

15 files changed

+311
-310
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,6 @@ static cl::opt<bool> UseDivergentRegisterIndexing(
6363
cl::desc("Use indirect register addressing for divergent indexes"),
6464
cl::init(false));
6565

66-
// TODO: This option should be removed once we switch to always using PTRADD in
67-
// the SelectionDAG.
68-
static cl::opt<bool> UseSelectionDAGPTRADD(
69-
"amdgpu-use-sdag-ptradd", cl::Hidden,
70-
cl::desc("Generate ISD::PTRADD nodes for 64-bit pointer arithmetic in the "
71-
"SelectionDAG ISel"),
72-
cl::init(false));
73-
7466
static bool denormalModeIsFlushAllF32(const MachineFunction &MF) {
7567
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
7668
return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign();
@@ -10599,7 +10591,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1059910591

1060010592
bool SITargetLowering::shouldPreservePtrArith(const Function &F,
1060110593
EVT PtrVT) const {
10602-
return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
10594+
return PtrVT == MVT::i64;
1060310595
}
1060410596

1060510597
bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F,

llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll

Lines changed: 212 additions & 142 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ define protected amdgpu_kernel void @InferNothing(i32 %a, ptr %b, double %c) {
1111
; CHECK-NEXT: v_mov_b32_e32 v0, s2
1212
; CHECK-NEXT: v_mov_b32_e32 v1, s3
1313
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
14-
; CHECK-NEXT: s_add_u32 s0, s2, s0
15-
; CHECK-NEXT: s_addc_u32 s1, s3, s1
14+
; CHECK-NEXT: s_add_u32 s0, s0, s2
15+
; CHECK-NEXT: s_addc_u32 s1, s1, s3
1616
; CHECK-NEXT: v_mov_b32_e32 v3, s1
1717
; CHECK-NEXT: v_add_co_u32_e64 v2, vcc, -8, s0
1818
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
@@ -69,13 +69,13 @@ define protected amdgpu_kernel void @InferMixed(i32 %a, ptr addrspace(1) %b, dou
6969
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
7070
; CHECK-NEXT: s_add_u32 s0, s0, s2
7171
; CHECK-NEXT: s_addc_u32 s1, s1, s3
72+
; CHECK-NEXT: s_add_u32 s0, s0, -8
73+
; CHECK-NEXT: s_addc_u32 s1, s1, -1
7274
; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
7375
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
7476
; CHECK-NEXT: buffer_wbinvl1_vol
75-
; CHECK-NEXT: v_mov_b32_e32 v1, s1
76-
; CHECK-NEXT: v_add_co_u32_e64 v0, vcc, -7, s0
77-
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
78-
; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
77+
; CHECK-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
78+
; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:1
7979
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8080
; CHECK-NEXT: buffer_wbinvl1_vol
8181
; CHECK-NEXT: s_endpgm
@@ -113,7 +113,7 @@ define protected amdgpu_kernel void @InferPHI(i32 %a, ptr addrspace(1) %b, doubl
113113
; CHECK-NEXT: s_addc_u32 s1, s1, s5
114114
; CHECK-NEXT: s_add_u32 s4, s0, -8
115115
; CHECK-NEXT: s_addc_u32 s5, s1, -1
116-
; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 9
116+
; CHECK-NEXT: s_cmp_eq_u64 s[4:5], 1
117117
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
118118
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
119119
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0

llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ define void @use_extern_normal() #0 {
4646
; CHECK-NEXT: s_ashr_i32 s5, s15, 31
4747
; CHECK-NEXT: v_mov_b32_e32 v0, 0x4048f5c3
4848
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
49-
; CHECK-NEXT: s_add_u32 s4, s4, s6
50-
; CHECK-NEXT: s_addc_u32 s5, s5, s7
49+
; CHECK-NEXT: s_add_u32 s4, s6, s4
50+
; CHECK-NEXT: s_addc_u32 s5, s7, s5
5151
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
5252
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
5353
; CHECK-NEXT: v_mov_b32_e32 v1, s4
@@ -70,8 +70,8 @@ define void @use_extern_overalign() #0 {
7070
; CHECK-NEXT: s_ashr_i32 s5, s15, 31
7171
; CHECK-NEXT: v_mov_b32_e32 v0, 0x42280000
7272
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
73-
; CHECK-NEXT: s_add_u32 s4, s4, s6
74-
; CHECK-NEXT: s_addc_u32 s5, s5, s7
73+
; CHECK-NEXT: s_add_u32 s4, s6, s4
74+
; CHECK-NEXT: s_addc_u32 s5, s7, s5
7575
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
7676
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
7777
; CHECK-NEXT: v_mov_b32_e32 v1, s4

llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ define void @f2() {
8484
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+4
8585
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+12
8686
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
87-
; GCN-NEXT: s_add_u32 s4, s4, s6
88-
; GCN-NEXT: s_addc_u32 s5, s5, s7
87+
; GCN-NEXT: s_add_u32 s4, s6, s4
88+
; GCN-NEXT: s_addc_u32 s5, s7, s5
8989
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
9090
; GCN-NEXT: s_waitcnt lgkmcnt(0)
9191
; GCN-NEXT: v_mov_b32_e32 v2, s4

llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ define void @f0() {
4949
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+4
5050
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+12
5151
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
52-
; GCN-NEXT: s_add_u32 s4, s4, s6
53-
; GCN-NEXT: s_addc_u32 s5, s5, s7
52+
; GCN-NEXT: s_add_u32 s4, s6, s4
53+
; GCN-NEXT: s_addc_u32 s5, s7, s5
5454
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
5555
; GCN-NEXT: s_waitcnt lgkmcnt(0)
5656
; GCN-NEXT: v_mov_b32_e32 v0, s4
@@ -90,8 +90,8 @@ define void @f1() {
9090
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+8
9191
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+16
9292
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
93-
; GCN-NEXT: s_add_u32 s4, s4, s6
94-
; GCN-NEXT: s_addc_u32 s5, s5, s7
93+
; GCN-NEXT: s_add_u32 s4, s6, s4
94+
; GCN-NEXT: s_addc_u32 s5, s7, s5
9595
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
9696
; GCN-NEXT: s_waitcnt lgkmcnt(0)
9797
; GCN-NEXT: v_mov_b32_e32 v0, s4
@@ -131,8 +131,8 @@ define void @f2() {
131131
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+12
132132
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+20
133133
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
134-
; GCN-NEXT: s_add_u32 s4, s4, s6
135-
; GCN-NEXT: s_addc_u32 s5, s5, s7
134+
; GCN-NEXT: s_add_u32 s4, s6, s4
135+
; GCN-NEXT: s_addc_u32 s5, s7, s5
136136
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
137137
; GCN-NEXT: s_waitcnt lgkmcnt(0)
138138
; GCN-NEXT: v_mov_b32_e32 v2, s4
@@ -172,8 +172,8 @@ define void @f3() {
172172
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+16
173173
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+24
174174
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
175-
; GCN-NEXT: s_add_u32 s4, s4, s6
176-
; GCN-NEXT: s_addc_u32 s5, s5, s7
175+
; GCN-NEXT: s_add_u32 s4, s6, s4
176+
; GCN-NEXT: s_addc_u32 s5, s7, s5
177177
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
178178
; GCN-NEXT: s_waitcnt lgkmcnt(0)
179179
; GCN-NEXT: v_mov_b32_e32 v0, s4

llvm/test/CodeGen/AMDGPU/match-perm-extract-vector-elt-bug.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define amdgpu_kernel void @test(ptr addrspace(1) %src, ptr addrspace(1) %dst) {
1313
; GFX9-NEXT: s_and_b32 s4, s4, 0xffff
1414
; GFX9-NEXT: s_mul_i32 s14, s14, s4
1515
; GFX9-NEXT: s_add_i32 s5, s5, s14
16-
; GFX9-NEXT: v_add_u32_e32 v0, s5, v0
17-
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
18-
; GFX9-NEXT: v_lshlrev_b64 v[4:5], 4, v[0:1]
16+
; GFX9-NEXT: v_add_u32_e32 v1, s5, v0
17+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
18+
; GFX9-NEXT: v_ashrrev_i64 v[4:5], 28, v[0:1]
1919
; GFX9-NEXT: v_mov_b32_e32 v1, s1
2020
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v4
2121
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
@@ -37,12 +37,12 @@ define amdgpu_kernel void @test(ptr addrspace(1) %src, ptr addrspace(1) %dst) {
3737
; GFX10-NEXT: s_load_dword s4, s[8:9], 0x1c
3838
; GFX10-NEXT: s_load_dword s5, s[8:9], 0x38
3939
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
40+
; GFX10-NEXT: v_mov_b32_e32 v1, 0
4041
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
4142
; GFX10-NEXT: s_and_b32 s4, s4, 0xffff
4243
; GFX10-NEXT: s_mul_i32 s14, s14, s4
43-
; GFX10-NEXT: v_add3_u32 v0, s5, s14, v0
44-
; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0
45-
; GFX10-NEXT: v_lshlrev_b64 v[4:5], 4, v[0:1]
44+
; GFX10-NEXT: v_add3_u32 v2, s5, s14, v0
45+
; GFX10-NEXT: v_ashrrev_i64 v[4:5], 28, v[1:2]
4646
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v4
4747
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v5, vcc_lo
4848
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, s2, v4
@@ -62,21 +62,19 @@ define amdgpu_kernel void @test(ptr addrspace(1) %src, ptr addrspace(1) %dst) {
6262
; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x1c
6363
; GFX11-NEXT: s_load_b32 s7, s[4:5], 0x38
6464
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
65-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
65+
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_and_b32 v1, 0x3ff, v0
6666
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6767
; GFX11-NEXT: s_and_b32 s4, s6, 0xffff
6868
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6969
; GFX11-NEXT: s_mul_i32 s13, s13, s4
7070
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
71-
; GFX11-NEXT: v_add3_u32 v0, s7, s13, v0
72-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
73-
; GFX11-NEXT: v_ashrrev_i32_e32 v1, 31, v0
74-
; GFX11-NEXT: v_lshlrev_b64 v[4:5], 4, v[0:1]
71+
; GFX11-NEXT: v_add3_u32 v1, s7, s13, v1
7572
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
73+
; GFX11-NEXT: v_ashrrev_i64 v[4:5], 28, v[0:1]
7674
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, s0, v4
75+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
7776
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v5, vcc_lo
7877
; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, s2, v4
79-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8078
; GFX11-NEXT: v_add_co_ci_u32_e64 v5, null, s3, v5, vcc_lo
8179
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
8280
; GFX11-NEXT: s_waitcnt vmcnt(0)

llvm/test/CodeGen/AMDGPU/memmove-var-size.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,8 @@ define void @memmove_p0_p3(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align
388388
; CHECK-NEXT: s_and_saveexec_b32 s7, s4
389389
; CHECK-NEXT: s_cbranch_execz .LBB2_13
390390
; CHECK-NEXT: ; %bb.11: ; %memmove_bwd_residual_loop.preheader
391-
; CHECK-NEXT: v_add_co_u32 v9, s4, v3, v0
392-
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v4, v1, s4
391+
; CHECK-NEXT: v_add_co_u32 v9, s4, v0, v3
392+
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v1, v4, s4
393393
; CHECK-NEXT: v_add3_u32 v4, v3, v2, -1
394394
; CHECK-NEXT: v_add_co_u32 v9, s4, v9, -1
395395
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, -1, v10, s4
@@ -684,8 +684,8 @@ define void @memmove_p0_p5(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align
684684
; CHECK-NEXT: s_and_saveexec_b32 s7, s4
685685
; CHECK-NEXT: s_cbranch_execz .LBB4_13
686686
; CHECK-NEXT: ; %bb.11: ; %memmove_bwd_residual_loop.preheader
687-
; CHECK-NEXT: v_add_co_u32 v9, s4, v3, v0
688-
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v4, v1, s4
687+
; CHECK-NEXT: v_add_co_u32 v9, s4, v0, v3
688+
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v1, v4, s4
689689
; CHECK-NEXT: v_add3_u32 v4, v3, v2, -1
690690
; CHECK-NEXT: v_add_co_u32 v9, s4, v9, -1
691691
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, -1, v10, s4
@@ -1411,8 +1411,8 @@ define void @memmove_p3_p0(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align
14111411
; CHECK-NEXT: s_and_saveexec_b32 s7, s4
14121412
; CHECK-NEXT: s_cbranch_execz .LBB10_13
14131413
; CHECK-NEXT: ; %bb.11: ; %memmove_bwd_residual_loop.preheader
1414-
; CHECK-NEXT: v_add_co_u32 v9, s4, v3, v1
1415-
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v4, v2, s4
1414+
; CHECK-NEXT: v_add_co_u32 v9, s4, v1, v3
1415+
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v2, v4, s4
14161416
; CHECK-NEXT: v_add3_u32 v4, v3, v0, -1
14171417
; CHECK-NEXT: v_add_co_u32 v9, s4, v9, -1
14181418
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, -1, v10, s4
@@ -1889,8 +1889,8 @@ define void @memmove_p5_p0(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align
18891889
; CHECK-NEXT: s_and_saveexec_b32 s7, s4
18901890
; CHECK-NEXT: s_cbranch_execz .LBB15_13
18911891
; CHECK-NEXT: ; %bb.11: ; %memmove_bwd_residual_loop.preheader
1892-
; CHECK-NEXT: v_add_co_u32 v9, s4, v3, v1
1893-
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v4, v2, s4
1892+
; CHECK-NEXT: v_add_co_u32 v9, s4, v1, v3
1893+
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v2, v4, s4
18941894
; CHECK-NEXT: v_add3_u32 v4, v3, v0, -1
18951895
; CHECK-NEXT: v_add_co_u32 v9, s4, v9, -1
18961896
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, -1, v10, s4

llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -277,8 +277,7 @@ define amdgpu_kernel void @random_incorrect_offset(ptr addrspace(1) inreg %out)
277277
; GFX942-NEXT: .p2align 8
278278
; GFX942-NEXT: ; %bb.2:
279279
; GFX942-NEXT: .LBB8_0:
280-
; GFX942-NEXT: s_mov_b32 s4, 8
281-
; GFX942-NEXT: s_load_dword s0, s[0:1], s4 offset:0x2
280+
; GFX942-NEXT: s_load_dword s0, s[0:1], 0xa
282281
; GFX942-NEXT: v_mov_b32_e32 v0, 0
283282
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
284283
; GFX942-NEXT: v_mov_b32_e32 v1, s0
@@ -293,8 +292,7 @@ define amdgpu_kernel void @random_incorrect_offset(ptr addrspace(1) inreg %out)
293292
; GFX90a-NEXT: .p2align 8
294293
; GFX90a-NEXT: ; %bb.2:
295294
; GFX90a-NEXT: .LBB8_0:
296-
; GFX90a-NEXT: s_mov_b32 s0, 8
297-
; GFX90a-NEXT: s_load_dword s0, s[4:5], s0 offset:0x2
295+
; GFX90a-NEXT: s_load_dword s0, s[4:5], 0xa
298296
; GFX90a-NEXT: v_mov_b32_e32 v0, 0
299297
; GFX90a-NEXT: s_waitcnt lgkmcnt(0)
300298
; GFX90a-NEXT: v_mov_b32_e32 v1, s0

llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
612612
; GFX10-NEXT: s_movk_i32 s1, 0x7f
613613
; GFX10-NEXT: v_and_b32_e32 v6, 0xfe000000, v1
614614
; GFX10-NEXT: v_lshl_or_b32 v0, v0, 3, v6
615-
; GFX10-NEXT: v_add_co_u32 v0, s0, v0, s34
616-
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s35, s0
615+
; GFX10-NEXT: v_add_co_u32 v0, s0, s34, v0
616+
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s35, 0, s0
617617
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x5000, v0
618618
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
619619
; GFX10-NEXT: .LBB1_1: ; %for.cond.preheader
@@ -830,8 +830,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
830830
; GFX11-NEXT: v_and_b32_e32 v6, 0xfe000000, v1
831831
; GFX11-NEXT: v_lshl_or_b32 v0, v0, 3, v6
832832
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
833-
; GFX11-NEXT: v_add_co_u32 v0, s0, v0, s34
834-
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s35, s0
833+
; GFX11-NEXT: v_add_co_u32 v0, s0, s34, v0
834+
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s35, 0, s0
835835
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
836836
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x5000, v0
837837
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo

llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX6,GFX6_PTRADD %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX6,GFX6_LEGACY %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=GFX6 %s
43

54
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF.
65

@@ -34,7 +33,3 @@ define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in
3433
store i32 %result, ptr addrspace(1) %out
3534
ret void
3635
}
37-
38-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
39-
; GFX6_LEGACY: {{.*}}
40-
; GFX6_PTRADD: {{.*}}

0 commit comments

Comments
 (0)