diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index edb3a17ac07c6..94fa3e9b74c46 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -642,5 +642,7 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16 TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 3709b1ff52f35..3f7a2d8649740 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -10,3 +10,23 @@ void test_setprio_inc_wg() { __builtin_amdgcn_s_setprio_inc_wg(10); } + +// CHECK-LABEL: @test_cvt_pk_f16_fp8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[TMP2]], i64 0 +// CHECK-NEXT: store <2 x half> [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +void test_cvt_pk_f16_fp8(global half2* out, short a) +{ + out[0] = __builtin_amdgcn_cvt_pk_f16_fp8(a); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e6f0bf6276086..72b0aa01f71aa 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -588,6 +588,10 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; +def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">; + class AMDGPUCvtScaleF32Intrinsic : DefaultAttrsIntrinsic< [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_"#name>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index a7b08794fdf1b..50d297cd096a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4541,6 +4541,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_pknorm_u16: case Intrinsic::amdgcn_cvt_pk_i16: case Intrinsic::amdgcn_cvt_pk_u16: + case Intrinsic::amdgcn_cvt_pk_f16_fp8: case Intrinsic::amdgcn_fmed3: case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 4ef7a34261b6f..1f44cd7775ad9 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -729,6 +729,21 @@ let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] } } +// FIXME-TRUE16: True16 versions of these instructions are untested. +let HasExtSDWA = 0, HasOpSel = 1, EmitDstSel = 0, HasOMod = 0, HasModifiers = 1 in { +def VOPProfile_CVT_PK_F16_F8 : VOPProfile<[v2f16, i16, untyped, untyped]>; +def VOPProfile_CVT_PK_F16_F8_true16 : VOP3_Profile_True16; +def VOPProfile_CVT_PK_F16_F8_fake16 : VOP3_Profile_Fake16; +} + +let SubtargetPredicate = isGFX1250Plus in { + let mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { + defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8", + VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16, + int_amdgcn_cvt_pk_f16_fp8>; + } +} // End SubtargetPredicate = isGFX1250Plus + let SubtargetPredicate = isGFX10Plus in { defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT>; @@ -1062,6 +1077,7 @@ defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>; defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>; defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">; +defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll new file mode 100644 index 0000000000000..67b5381646e29 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-REAL16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s + +define amdgpu_ps float @test_cvt_pk_f16_fp8_v(i16 %a) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l +; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l +; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog + %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a) + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @test_cvt_pk_f16_fp8_s(i16 inreg %a) { +; GFX1250-LABEL: test_cvt_pk_f16_fp8_s: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_pk_f16_fp8 v0, s0 +; GFX1250-NEXT: ; return to shader part epilog + %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a) + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @test_cvt_pk_f16_fp8_v_hi(<2 x i16> %a) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.h +; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l +; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog + %a.1 = extractelement <2 x i16> %a, i32 1 + %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a.1) + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index 9b2e506d4e043..94ac6571935f8 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -45,3 +45,12 @@ v_cvt_f32_bf16 v5, src_scc v_cvt_f32_bf16 v127, 0x8000 // GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v2 +// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e] + +v_cvt_pk_f16_fp8 v1, s2 +// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e] + +v_cvt_pk_f16_fp8 v1, 100 +// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index 1d1badc4f009b..d6a0a95b60762 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -48,3 +48,12 @@ v_cvt_f32_bf16 v127, 0x8000 v_cvt_f32_bf16 v5, v1.h // GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e] + +v_cvt_pk_f16_fp8 v1, v2 +// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e] + +v_cvt_pk_f16_fp8 v1, s2 +// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e] + +v_cvt_pk_f16_fp8 v1, 100 +// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s index 18b6c91e2fb8c..674a4f738d8a1 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -57,3 +57,7 @@ v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s index 0f71c46eb4725..2ac6e0962fc89 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -61,3 +61,11 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl: v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2.h quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s index c973022dbeca6..de01522820134 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s @@ -13,3 +13,7 @@ v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s index 8a5f3cba2fbc0..6d11be647001b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s @@ -17,3 +17,11 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_cvt_f32_bf16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s index 3ddf244c39af5..70879ddf0b50f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s @@ -1,5 +1,15 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s +v_cvt_pk_f16_fp8 v1, v2 clamp +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 clamp +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_pk_f16_fp8 v1, v2 mul:2 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 mul:2 +// GFX1250-ERR-NEXT:{{^}} ^ + v_cvt_f32_bf16 v5, v1 clamp // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 clamp diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s index 6a800558c1a49..6354820f96c16 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -75,3 +75,15 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1] v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1] // GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v150 +// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v150 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, s2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s index 99e332ec8c8fd..a3596d4332c55 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -78,3 +78,15 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1] v_cvt_f32_bf16_e64 v5, v128.h // GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v150 +// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v150 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, s2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s index ed8636fda1321..3fd7d00cd4190 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s @@ -45,3 +45,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:15 v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s index 76720fc2ceadc..a2bc2b4310c74 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s @@ -49,3 +49,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf v_cvt_f32_bf16_e64_dpp v5, v128.h quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128.h quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s index e5e46750bf358..c53ba6958e47a 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s @@ -5,3 +5,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s index 664509c41b287..0bc0e2baa2ce7 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s @@ -9,3 +9,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] v_cvt_f32_bf16_e64_dpp v5, v128.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index 2ad9a106099f4..1d972ea09ef57 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -49,3 +49,13 @@ 0x81,0xe5,0x0a,0x7e # GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e] + +0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00] + +0x02,0xea,0x02,0x7e +# GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e] + +0x02,0xeb,0x02,0x7e +# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt index 9607dc06dbdab..53a5081e58e8f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt @@ -46,3 +46,11 @@ 0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff # GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff] + +0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] + +0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt index 29097f93e4471..8226d1b5dc252 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt @@ -13,3 +13,11 @@ 0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05 # GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05] + +0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] + +0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt index 058878599128a..ed803483a79b1 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -47,3 +47,18 @@ 0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00 # GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00] # GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00] + +0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00] + +0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v150.l ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00] + +0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v150.h op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00] + +0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.h op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt index 89e8935a06dea..29fc7d8fac5e1 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt @@ -49,3 +49,11 @@ 0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff # GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] # GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v128 op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] + +0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] + +0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt index cc6c67509475b..266732ecf1efb 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt @@ -9,3 +9,11 @@ 0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] # GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v128 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + +0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + +0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]