-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Add the support for v_cvt_f32_bf16
on gfx1250
#145632
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@rampitec For some reason, the instruction encoding works, but the decoder doesn't work. Those dpp decoding tests failed. It looks like we missed something here. |
Check generated lib/Target/AMDGPU/AMDGPUGenDisassemblerTables.inc. Search the instruction name. Go to AMDGPUDisassembler.cpp and add line processing that table. DecoderTableGFX125064? I.e., this?
and this:
|
Note that FAKE16 might not exist yet. I have couple of cases with TODO comments already, because we cannot upstream all at once, and we cannot rollback t16 support already in upstream. |
983f052
to
7484261
Compare
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Shilei Tian (shiltian) ChangesCo-authored-by: Shilei Tian <[email protected]> Patch is 56.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145632.diff 21 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 07a4292ef28bc..59c72fcbff18a 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -599,6 +599,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
DecW, Address, CS))
break;
+ if (isGFX1250() &&
+ tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
+ DecW, Address, CS))
+ break;
+
if (isGFX12() &&
tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
DecW, Address, CS))
@@ -661,6 +666,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
break;
+ if (isGFX1250() &&
+ tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
+ QW, Address, CS))
+ break;
+
if (isGFX12() &&
tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
Address, CS))
@@ -717,10 +727,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Address, CS))
break;
- // FIXME: Should use DecoderTableGFX1250_FAKE1632, but it is not generated
- // yet.
if (isGFX1250() &&
- tryDecodeInst(DecoderTableGFX125032, MI, DW, Address, CS))
+ tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
+ DW, Address, CS))
break;
if (isGFX12() &&
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 02b912bcfb9e0..4ef7a34261b6f 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -259,6 +259,12 @@ foreach vt = Reg32Types.types in {
>;
}
+let HasOMod = 0, HasClamp = 0 in {
+ def VOPProfile_CVT_F32_BF16_gfx1250_t16 : VOPProfile_True16 <VOP_F32_BF16>;
+ let HasOpSel = 1, EmitDstSel = 0 in
+ def VOPProfile_CVT_F32_BF16_gfx1250_fake16 : VOPProfile_Fake16 <VOP_F32_BF16>;
+} // End HasOMod = 0, HasClamp = 0
+
let isReMaterializable = 1 in {
let SchedRW = [WriteDoubleCvt] in {
// OMod clears exceptions when set in this instruction
@@ -309,8 +315,14 @@ let OtherPredicates = [UseRealTrue16Insts] in
let OtherPredicates = [UseFakeTrue16Insts] in
defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
-let SubtargetPredicate = HasBF16ConversionInsts in
-defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
+let SubtargetPredicate = HasGFX950Insts, OtherPredicates = [HasBF16ConversionInsts] in {
+ defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
+}
+let SubtargetPredicate = isGFX1250Plus, OtherPredicates = [HasBF16ConversionInsts] in {
+ defm V_CVT_F32_BF16_gfx1250 : VOP1Inst_t16_with_profiles <"v_cvt_f32_bf16_gfx1250", VOP_F32_BF16,
+ VOPProfile_CVT_F32_BF16_gfx1250_t16,
+ VOPProfile_CVT_F32_BF16_gfx1250_fake16>;
+}
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
@@ -980,6 +992,13 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
+multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250<
+ bits<9> op, string asmName = !tolower(NAME), string opName = NAME> {
+ defm opName#"_t16" :
+ VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_t16", asmName>;
+ defm opName#"_fake16":
+ VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>;
+}
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
@@ -1042,6 +1061,8 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>;
defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
+defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
+
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
new file mode 100644
index 0000000000000..9b2e506d4e043
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+
+v_cvt_f32_bf16 v5, v1
+// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, v127
+// GFX1250: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xe5,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, s1
+// GFX1250: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, s105
+// GFX1250: v_cvt_f32_bf16_e32 v5, s105 ; encoding: [0x69,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, vcc_lo
+// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, vcc_hi
+// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, ttmp15
+// GFX1250: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, m0
+// GFX1250: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7d,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, exec_lo
+// GFX1250: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, exec_hi
+// GFX1250: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, null
+// GFX1250: v_cvt_f32_bf16_e32 v5, null ; encoding: [0x7c,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, -1
+// GFX1250: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, 0.5
+// GFX1250: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, src_scc
+// GFX1250: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v127, 0x8000
+// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
new file mode 100644
index 0000000000000..1d1badc4f009b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -0,0 +1,50 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+
+v_cvt_f32_bf16 v5, v1
+// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, v127
+// GFX1250: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xe5,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, s1
+// GFX1250: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, s105
+// GFX1250: v_cvt_f32_bf16_e32 v5, s105 ; encoding: [0x69,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, vcc_lo
+// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, vcc_hi
+// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, ttmp15
+// GFX1250: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, m0
+// GFX1250: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7d,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, exec_lo
+// GFX1250: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, exec_hi
+// GFX1250: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, null
+// GFX1250: v_cvt_f32_bf16_e32 v5, null ; encoding: [0x7c,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, -1
+// GFX1250: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, 0.5
+// GFX1250: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v5, src_scc
+// GFX1250: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xe4,0x0a,0x7e]
+
+v_cvt_f32_bf16 v127, 0x8000
+// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+v_cvt_f32_bf16 v5, v1.h
+// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
new file mode 100644
index 0000000000000..18b6c91e2fb8c
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -0,0 +1,59 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_mirror
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_half_mirror
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_shl:1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_shl:15
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_shr:1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_shr:15
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_ror:1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_ror:15
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
new file mode 100644
index 0000000000000..0f71c46eb4725
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -0,0 +1,63 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_mirror
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_half_mirror
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_shl:1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_shl:15
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_shr:1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_shr:15
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_ror:1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_ror:15
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
new file mode 100644
index 0000000000000..c973022dbeca6
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -0,0 +1,15 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
new file mode 100644
index 0000000000000..8a5f3cba2fbc0
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -0,0 +1,19 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
...
[truncated]
|
7484261
to
a905b76
Compare
v_cvt_f32_bf16
on gfx1250v_cvt_f32_bf16
on gfx1250
2e6b53e
to
c653b0c
Compare
Co-authored-by: Shilei Tian <[email protected]>
c653b0c
to
aeadc62
Compare
Co-authored-by: Shilei Tian <[email protected]>
Co-authored-by: Shilei Tian [email protected]