Skip to content

Commit 7dde602

Browse files
authored
[DAG] canCreateUndefOrPoison - add handling for ISD::SELECT (#146046)
Followup to #143760 which handled ISD::VSELECT I've moved ISD::SELECT/VSELECT under the "No poison except from flags (which is handled above)" subgroup to try to remind people that these can have poison generating FMFs (NINF/NNAN), even though this hasn't been well explained anywhere I can find :( Helps with regressions from #145939
1 parent 0158ca2 commit 7dde602

File tree

4 files changed

+20
-19
lines changed

4 files changed

+20
-19
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5553,7 +5553,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
55535553
case ISD::BUILD_VECTOR:
55545554
case ISD::BUILD_PAIR:
55555555
case ISD::SPLAT_VECTOR:
5556-
case ISD::VSELECT:
55575556
return false;
55585557

55595558
case ISD::SELECT_CC:
@@ -5577,6 +5576,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
55775576

55785577
case ISD::OR:
55795578
case ISD::ZERO_EXTEND:
5579+
case ISD::SELECT:
5580+
case ISD::VSELECT:
55805581
case ISD::ADD:
55815582
case ISD::SUB:
55825583
case ISD::MUL:

llvm/test/CodeGen/AMDGPU/bf16-conversions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) {
158158
; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v7
159159
; GFX-950-NEXT: v_cndmask_b32_e64 v2, -1, 1, s[2:3]
160160
; GFX-950-NEXT: v_add_u32_e32 v2, v6, v2
161-
; GFX-950-NEXT: s_or_b64 vcc, vcc, s[0:1]
161+
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
162162
; GFX-950-NEXT: v_cvt_f32_f64_e32 v5, v[0:1]
163163
; GFX-950-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc
164164
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[2:3], v5
@@ -168,7 +168,7 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) {
168168
; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v6
169169
; GFX-950-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
170170
; GFX-950-NEXT: v_add_u32_e32 v0, v5, v0
171-
; GFX-950-NEXT: s_or_b64 vcc, vcc, s[0:1]
171+
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
172172
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
173173
; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4
174174
; GFX-950-NEXT: ; return to shader part epilog

llvm/test/CodeGen/RISCV/float-convert.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -643,12 +643,12 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
643643
; RV32IF-NEXT: addi a2, a3, -1
644644
; RV32IF-NEXT: .LBB12_4: # %start
645645
; RV32IF-NEXT: feq.s a3, fs0, fs0
646-
; RV32IF-NEXT: neg a4, s0
647-
; RV32IF-NEXT: neg a5, a1
646+
; RV32IF-NEXT: neg a4, a1
647+
; RV32IF-NEXT: neg a1, s0
648648
; RV32IF-NEXT: neg a3, a3
649-
; RV32IF-NEXT: and a0, a4, a0
649+
; RV32IF-NEXT: and a0, a1, a0
650650
; RV32IF-NEXT: and a1, a3, a2
651-
; RV32IF-NEXT: or a0, a5, a0
651+
; RV32IF-NEXT: or a0, a4, a0
652652
; RV32IF-NEXT: and a0, a3, a0
653653
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
654654
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload

llvm/test/CodeGen/RISCV/rv64-float-convert.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -125,26 +125,26 @@ define i128 @fptosi_sat_f32_to_i128(float %a) nounwind {
125125
; RV64IF-NEXT: fmv.w.x fa5, a0
126126
; RV64IF-NEXT: fle.s s0, fa5, fa0
127127
; RV64IF-NEXT: call __fixsfti
128-
; RV64IF-NEXT: li a3, -1
128+
; RV64IF-NEXT: li a2, -1
129129
; RV64IF-NEXT: bnez s0, .LBB4_2
130130
; RV64IF-NEXT: # %bb.1:
131-
; RV64IF-NEXT: slli a1, a3, 63
131+
; RV64IF-NEXT: slli a1, a2, 63
132132
; RV64IF-NEXT: .LBB4_2:
133-
; RV64IF-NEXT: lui a2, %hi(.LCPI4_0)
134-
; RV64IF-NEXT: flw fa5, %lo(.LCPI4_0)(a2)
135-
; RV64IF-NEXT: flt.s a2, fa5, fs0
136-
; RV64IF-NEXT: beqz a2, .LBB4_4
133+
; RV64IF-NEXT: lui a3, %hi(.LCPI4_0)
134+
; RV64IF-NEXT: flw fa5, %lo(.LCPI4_0)(a3)
135+
; RV64IF-NEXT: flt.s a3, fa5, fs0
136+
; RV64IF-NEXT: beqz a3, .LBB4_4
137137
; RV64IF-NEXT: # %bb.3:
138-
; RV64IF-NEXT: srli a1, a3, 1
138+
; RV64IF-NEXT: srli a1, a2, 1
139139
; RV64IF-NEXT: .LBB4_4:
140-
; RV64IF-NEXT: feq.s a3, fs0, fs0
140+
; RV64IF-NEXT: feq.s a2, fs0, fs0
141+
; RV64IF-NEXT: neg a3, a3
141142
; RV64IF-NEXT: neg a4, s0
142143
; RV64IF-NEXT: neg a2, a2
143-
; RV64IF-NEXT: neg a3, a3
144144
; RV64IF-NEXT: and a0, a4, a0
145-
; RV64IF-NEXT: and a1, a3, a1
146-
; RV64IF-NEXT: or a0, a2, a0
147-
; RV64IF-NEXT: and a0, a3, a0
145+
; RV64IF-NEXT: and a1, a2, a1
146+
; RV64IF-NEXT: or a0, a3, a0
147+
; RV64IF-NEXT: and a0, a2, a0
148148
; RV64IF-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
149149
; RV64IF-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
150150
; RV64IF-NEXT: flw fs0, 12(sp) # 4-byte Folded Reload

0 commit comments

Comments
 (0)