Skip to content

Commit a905b76

Browse files
rampitecshiltian
andcommitted
[AMDGPU] Add the support for v_cvt_f32_bf16 on gfx1250
Co-authored-by: Shilei Tian <[email protected]>
1 parent a76448c commit a905b76

21 files changed

+798
-7
lines changed

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
599599
DecW, Address, CS))
600600
break;
601601

602+
if (isGFX1250() &&
603+
tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
604+
DecW, Address, CS))
605+
break;
606+
602607
if (isGFX12() &&
603608
tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
604609
DecW, Address, CS))
@@ -661,9 +666,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
661666
if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
662667
break;
663668

664-
// FIXME: DecoderTableGFX125064 is not defined yet.
665669
if (isGFX1250() &&
666-
tryDecodeInst(DecoderTableGFX1250_FAKE1664, MI, QW, Address, CS))
670+
tryDecodeInst(DecoderTableGFX125064, MI, QW, Address, CS))
667671
break;
668672

669673
if (isGFX12() &&
@@ -722,10 +726,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
722726
Address, CS))
723727
break;
724728

725-
// FIXME: Should use DecoderTableGFX1250_FAKE1632, but it is not generated
726-
// yet.
727729
if (isGFX1250() &&
728-
tryDecodeInst(DecoderTableGFX125032, MI, DW, Address, CS))
730+
tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
731+
DW, Address, CS))
729732
break;
730733

731734
if (isGFX12() &&

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,12 @@ foreach vt = Reg32Types.types in {
259259
>;
260260
}
261261

262+
let HasOMod = 0, HasClamp = 0 in {
263+
def VOPProfile_CVT_F32_BF16_gfx1250_t16 : VOPProfile_True16 <VOP_F32_BF16>;
264+
let HasOpSel = 1, EmitDstSel = 0 in
265+
def VOPProfile_CVT_F32_BF16_gfx1250_fake16 : VOPProfile_Fake16 <VOP_F32_BF16>;
266+
} // End HasOMod = 0, HasClamp = 0
267+
262268
let isReMaterializable = 1 in {
263269
let SchedRW = [WriteDoubleCvt] in {
264270
// OMod clears exceptions when set in this instruction
@@ -309,8 +315,14 @@ let OtherPredicates = [UseRealTrue16Insts] in
309315
let OtherPredicates = [UseFakeTrue16Insts] in
310316
defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
311317

312-
let SubtargetPredicate = HasBF16ConversionInsts in
313-
defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
318+
let SubtargetPredicate = HasGFX950Insts, OtherPredicates = [HasBF16ConversionInsts] in {
319+
defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
320+
}
321+
let SubtargetPredicate = isGFX1250Plus, OtherPredicates = [HasBF16ConversionInsts] in {
322+
defm V_CVT_F32_BF16_gfx1250 : VOP1Inst_t16_with_profiles <"v_cvt_f32_bf16_gfx1250", VOP_F32_BF16,
323+
VOPProfile_CVT_F32_BF16_gfx1250_t16,
324+
VOPProfile_CVT_F32_BF16_gfx1250_fake16>;
325+
}
314326

315327
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
316328
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
@@ -980,6 +992,13 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
980992
VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
981993
VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
982994

995+
multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250<
996+
bits<9> op, string asmName = !tolower(NAME), string opName = NAME> {
997+
defm opName#"_t16" :
998+
VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_t16", asmName>;
999+
defm opName#"_fake16":
1000+
VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>;
1001+
}
9831002

9841003
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
9851004
defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
@@ -1042,6 +1061,8 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>;
10421061
defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
10431062
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
10441063

1064+
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
1065+
10451066
//===----------------------------------------------------------------------===//
10461067
// GFX10.
10471068
//===----------------------------------------------------------------------===//
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
3+
4+
v_cvt_f32_bf16 v5, v1
5+
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
6+
7+
v_cvt_f32_bf16 v5, v127
8+
// GFX1250: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xe5,0x0a,0x7e]
9+
10+
v_cvt_f32_bf16 v5, s1
11+
// GFX1250: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xe4,0x0a,0x7e]
12+
13+
v_cvt_f32_bf16 v5, s105
14+
// GFX1250: v_cvt_f32_bf16_e32 v5, s105 ; encoding: [0x69,0xe4,0x0a,0x7e]
15+
16+
v_cvt_f32_bf16 v5, vcc_lo
17+
// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xe4,0x0a,0x7e]
18+
19+
v_cvt_f32_bf16 v5, vcc_hi
20+
// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xe4,0x0a,0x7e]
21+
22+
v_cvt_f32_bf16 v5, ttmp15
23+
// GFX1250: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xe4,0x0a,0x7e]
24+
25+
v_cvt_f32_bf16 v5, m0
26+
// GFX1250: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7d,0xe4,0x0a,0x7e]
27+
28+
v_cvt_f32_bf16 v5, exec_lo
29+
// GFX1250: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xe4,0x0a,0x7e]
30+
31+
v_cvt_f32_bf16 v5, exec_hi
32+
// GFX1250: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xe4,0x0a,0x7e]
33+
34+
v_cvt_f32_bf16 v5, null
35+
// GFX1250: v_cvt_f32_bf16_e32 v5, null ; encoding: [0x7c,0xe4,0x0a,0x7e]
36+
37+
v_cvt_f32_bf16 v5, -1
38+
// GFX1250: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xe4,0x0a,0x7e]
39+
40+
v_cvt_f32_bf16 v5, 0.5
41+
// GFX1250: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xe4,0x0a,0x7e]
42+
43+
v_cvt_f32_bf16 v5, src_scc
44+
// GFX1250: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xe4,0x0a,0x7e]
45+
46+
v_cvt_f32_bf16 v127, 0x8000
47+
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
3+
4+
v_cvt_f32_bf16 v5, v1
5+
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
6+
7+
v_cvt_f32_bf16 v5, v127
8+
// GFX1250: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xe5,0x0a,0x7e]
9+
10+
v_cvt_f32_bf16 v5, s1
11+
// GFX1250: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xe4,0x0a,0x7e]
12+
13+
v_cvt_f32_bf16 v5, s105
14+
// GFX1250: v_cvt_f32_bf16_e32 v5, s105 ; encoding: [0x69,0xe4,0x0a,0x7e]
15+
16+
v_cvt_f32_bf16 v5, vcc_lo
17+
// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xe4,0x0a,0x7e]
18+
19+
v_cvt_f32_bf16 v5, vcc_hi
20+
// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xe4,0x0a,0x7e]
21+
22+
v_cvt_f32_bf16 v5, ttmp15
23+
// GFX1250: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xe4,0x0a,0x7e]
24+
25+
v_cvt_f32_bf16 v5, m0
26+
// GFX1250: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7d,0xe4,0x0a,0x7e]
27+
28+
v_cvt_f32_bf16 v5, exec_lo
29+
// GFX1250: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xe4,0x0a,0x7e]
30+
31+
v_cvt_f32_bf16 v5, exec_hi
32+
// GFX1250: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xe4,0x0a,0x7e]
33+
34+
v_cvt_f32_bf16 v5, null
35+
// GFX1250: v_cvt_f32_bf16_e32 v5, null ; encoding: [0x7c,0xe4,0x0a,0x7e]
36+
37+
v_cvt_f32_bf16 v5, -1
38+
// GFX1250: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xe4,0x0a,0x7e]
39+
40+
v_cvt_f32_bf16 v5, 0.5
41+
// GFX1250: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xe4,0x0a,0x7e]
42+
43+
v_cvt_f32_bf16 v5, src_scc
44+
// GFX1250: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xe4,0x0a,0x7e]
45+
46+
v_cvt_f32_bf16 v127, 0x8000
47+
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
48+
49+
v_cvt_f32_bf16 v5, v1.h
50+
// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
3+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
4+
5+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
6+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
7+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8+
9+
v_cvt_f32_bf16 v5, v1 quad_perm:[0,1,2,3]
10+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
11+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
12+
13+
v_cvt_f32_bf16 v5, v1 row_mirror
14+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff]
15+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
16+
17+
v_cvt_f32_bf16 v5, v1 row_half_mirror
18+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff]
19+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
20+
21+
v_cvt_f32_bf16 v5, v1 row_shl:1
22+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff]
23+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
24+
25+
v_cvt_f32_bf16 v5, v1 row_shl:15
26+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
27+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
28+
29+
v_cvt_f32_bf16 v5, v1 row_shr:1
30+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff]
31+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
32+
33+
v_cvt_f32_bf16 v5, v1 row_shr:15
34+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
35+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
36+
37+
v_cvt_f32_bf16 v5, v1 row_ror:1
38+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff]
39+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
40+
41+
v_cvt_f32_bf16 v5, v1 row_ror:15
42+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
43+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
44+
45+
v_cvt_f32_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
46+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff]
47+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
48+
49+
v_cvt_f32_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
50+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
51+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
52+
53+
v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
54+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13]
55+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
56+
57+
v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
58+
// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
59+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
3+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
4+
5+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
6+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
7+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8+
9+
v_cvt_f32_bf16 v5, v1 quad_perm:[0,1,2,3]
10+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
11+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
12+
13+
v_cvt_f32_bf16 v5, v1 row_mirror
14+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff]
15+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
16+
17+
v_cvt_f32_bf16 v5, v1 row_half_mirror
18+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff]
19+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
20+
21+
v_cvt_f32_bf16 v5, v1 row_shl:1
22+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff]
23+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
24+
25+
v_cvt_f32_bf16 v5, v1 row_shl:15
26+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
27+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
28+
29+
v_cvt_f32_bf16 v5, v1 row_shr:1
30+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff]
31+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
32+
33+
v_cvt_f32_bf16 v5, v1 row_shr:15
34+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
35+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
36+
37+
v_cvt_f32_bf16 v5, v1 row_ror:1
38+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff]
39+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
40+
41+
v_cvt_f32_bf16 v5, v1 row_ror:15
42+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
43+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
44+
45+
v_cvt_f32_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
46+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff]
47+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
48+
49+
v_cvt_f32_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
50+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
51+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
52+
53+
v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
54+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13]
55+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
56+
57+
v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
58+
// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
59+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
60+
61+
v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
62+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
63+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
3+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
4+
5+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
6+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
7+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8+
9+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
10+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
11+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
12+
13+
v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
14+
// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
15+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
3+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
4+
5+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
6+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
7+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8+
9+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
10+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
11+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
12+
13+
v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
14+
// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
15+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
16+
17+
v_cvt_f32_bf16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0]
18+
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
19+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
2+
3+
v_cvt_f32_bf16 v5, v1 clamp
4+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
5+
// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 clamp
6+
// GFX1250-ERR-NEXT:{{^}} ^
7+
8+
v_cvt_f32_bf16 v5, v1 mul:2
9+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
10+
// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 mul:2
11+
// GFX1250-ERR-NEXT:{{^}} ^
12+
13+
v_cvt_f32_bf16 v5, v1 div:2
14+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
15+
// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 div:2
16+
// GFX1250-ERR-NEXT:{{^}} ^

0 commit comments

Comments
 (0)