Skip to content

[AMDGPU] Add support for v_cvt_pk_f16_bf8 on gfx1250 #145753

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")

TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")

#undef BUILTIN
#undef TARGET_BUILTIN
20 changes: 20 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,23 @@ void test_cvt_pk_f16_fp8(global half2* out, short a)
{
out[0] = __builtin_amdgcn_cvt_pk_f16_fp8(a);
}

// CHECK-LABEL: @test_cvt_pk_f16_bf8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[TMP2]], i64 0
// CHECK-NEXT: store <2 x half> [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 4
// CHECK-NEXT: ret void
//
void test_cvt_pk_f16_bf8(global half2* out, short a)
{
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,10 @@ def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;

def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;

class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
[DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4542,6 +4542,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pk_i16:
case Intrinsic::amdgcn_cvt_pk_u16:
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
case Intrinsic::amdgcn_cvt_pk_f16_bf8:
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,9 @@ let SubtargetPredicate = isGFX1250Plus in {
defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8",
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
int_amdgcn_cvt_pk_f16_fp8>;
defm V_CVT_PK_F16_BF8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_bf8",
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
int_amdgcn_cvt_pk_f16_bf8>;
}
} // End SubtargetPredicate = isGFX1250Plus

Expand Down Expand Up @@ -1078,6 +1081,7 @@ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;

defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;

//===----------------------------------------------------------------------===//
// GFX10.
Expand Down
35 changes: 35 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,41 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s

define amdgpu_ps float @test_cvt_pk_f16_bf8_v(i16 %a) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_bf8_v:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_bf8_v:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_bf8_v:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_bf8_v:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a)
%ret = bitcast <2 x half> %cvt to float
ret float %ret
}

define amdgpu_ps float @test_cvt_pk_f16_bf8_s(i16 inreg %a) {
; GFX1250-LABEL: test_cvt_pk_f16_bf8_s:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_cvt_pk_f16_bf8 v0, s0
; GFX1250-NEXT: ; return to shader part epilog
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a)
%ret = bitcast <2 x half> %cvt to float
ret float %ret
}

define amdgpu_ps float @test_cvt_pk_f16_fp8_v(i16 %a) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v:
; GFX1250-SDAG-REAL16: ; %bb.0:
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ v_cvt_f32_bf16 v5, src_scc
v_cvt_f32_bf16 v127, 0x8000
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]

v_cvt_pk_f16_bf8 v1, v2
// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]

v_cvt_pk_f16_bf8 v1, s2
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]

v_cvt_pk_f16_bf8 v1, 100
// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v2
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]

Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ v_cvt_f32_bf16 v127, 0x8000
v_cvt_f32_bf16 v5, v1.h
// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]

v_cvt_pk_f16_bf8 v1, v2
// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]

v_cvt_pk_f16_bf8 v1, s2
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]

v_cvt_pk_f16_bf8 v1, 100
// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v2
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]

Expand Down
4 changes: 4 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,7 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
v_cvt_pk_f16_fp8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v2.h quad_perm:[0,1,2,3]
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
v_cvt_pk_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,11 @@ v_cvt_pk_f16_fp8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pk_f16_fp8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xec,0x02,0x7e,0x82,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
10 changes: 10 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s

v_cvt_pk_f16_bf8 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_bf8 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_cvt_pk_f16_bf8 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_bf8 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_cvt_pk_f16_fp8 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 clamp
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1]
v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00]

v_cvt_pk_f16_bf8 v1, v150
// GFX1250: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00]

v_cvt_pk_f16_bf8 v1, v2 op_sel:[1]
// GFX1250: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00]

v_cvt_pk_f16_bf8 v1, v150 op_sel:[1]
// GFX1250: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00]

v_cvt_pk_f16_bf8 v1, s2 op_sel:[1]
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v150
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]

Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
v_cvt_f32_bf16_e64 v5, v128.h
// GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00]

v_cvt_pk_f16_bf8 v1, v150
// GFX1250: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00]

v_cvt_pk_f16_bf8 v1, v2 op_sel:[1]
// GFX1250: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00]

v_cvt_pk_f16_bf8 v1, v150 op_sel:[1]
// GFX1250: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00]

v_cvt_pk_f16_bf8 v1, s2 op_sel:[1]
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v150
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]

Expand Down
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v128 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ v_cvt_f32_bf16_e64_dpp v5, v128.h quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v128.h quad_perm:[0,1,2,3]
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s

v_cvt_pk_f16_bf8 v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v2 op_sel:[1] dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v2 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s

v_cvt_pk_f16_bf8 v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_bf8 v1, v128.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@
0x81,0xe5,0x0a,0x7e
# GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]

0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00
# GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]

0x02,0xec,0x02,0x7e
# GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]

0x02,0xed,0x02,0x7e
# GFX1250-REAL16: v_cvt_pk_f16_bf8 v1, v2.l ; encoding: [0x02,0xed,0x02,0x7e]
# GFX1250-FAKE16: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]

0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00
# GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]

Expand Down
8 changes: 8 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@
0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff
# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]

0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff
# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]

0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff]

0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
Expand Down
Loading
Loading