Skip to content

[AMDGPU] Add support for v_cvt_pk_f16_fp8 on gfx1250 #145747

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -642,5 +642,7 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16

TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")

TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")

#undef BUILTIN
#undef TARGET_BUILTIN
20 changes: 20 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,23 @@
void test_setprio_inc_wg() {
__builtin_amdgcn_s_setprio_inc_wg(10);
}

// CHECK-LABEL: @test_cvt_pk_f16_fp8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[TMP2]], i64 0
// CHECK-NEXT: store <2 x half> [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 4
// CHECK-NEXT: ret void
//
void test_cvt_pk_f16_fp8(global half2* out, short a)
{
out[0] = __builtin_amdgcn_cvt_pk_f16_fp8(a);
}
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,10 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;

def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;

class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
[DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4541,6 +4541,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pknorm_u16:
case Intrinsic::amdgcn_cvt_pk_i16:
case Intrinsic::amdgcn_cvt_pk_u16:
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,21 @@ let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts]
}
}

// FIXME-TRUE16: True16 versions of these instructions are untested.
let HasExtSDWA = 0, HasOpSel = 1, EmitDstSel = 0, HasOMod = 0, HasModifiers = 1 in {
def VOPProfile_CVT_PK_F16_F8 : VOPProfile<[v2f16, i16, untyped, untyped]>;
def VOPProfile_CVT_PK_F16_F8_true16 : VOP3_Profile_True16<VOPProfile_CVT_PK_F16_F8>;
def VOPProfile_CVT_PK_F16_F8_fake16 : VOP3_Profile_Fake16<VOPProfile_CVT_PK_F16_F8>;
}

let SubtargetPredicate = isGFX1250Plus in {
let mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in {
defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8",
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
int_amdgcn_cvt_pk_f16_fp8>;
}
} // End SubtargetPredicate = isGFX1250Plus

let SubtargetPredicate = isGFX10Plus in {
defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>;

Expand Down Expand Up @@ -1062,6 +1077,7 @@ defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;

defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;

//===----------------------------------------------------------------------===//
// GFX10.
Expand Down
72 changes: 72 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-REAL16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s

define amdgpu_ps float @test_cvt_pk_f16_fp8_v(i16 %a) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a)
%ret = bitcast <2 x half> %cvt to float
ret float %ret
}

define amdgpu_ps float @test_cvt_pk_f16_fp8_s(i16 inreg %a) {
; GFX1250-LABEL: test_cvt_pk_f16_fp8_s:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: v_cvt_pk_f16_fp8 v0, s0
; GFX1250-NEXT: ; return to shader part epilog
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a)
%ret = bitcast <2 x half> %cvt to float
ret float %ret
}

define amdgpu_ps float @test_cvt_pk_f16_fp8_v_hi(<2 x i16> %a) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.h
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi:
; GFX1250-SDAG-FAKE16: ; %bb.0:
; GFX1250-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
%a.1 = extractelement <2 x i16> %a, i32 1
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a.1)
%ret = bitcast <2 x half> %cvt to float
ret float %ret
}
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,12 @@ v_cvt_f32_bf16 v5, src_scc

v_cvt_f32_bf16 v127, 0x8000
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v2
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]

v_cvt_pk_f16_fp8 v1, s2
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e]

v_cvt_pk_f16_fp8 v1, 100
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
9 changes: 9 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,12 @@ v_cvt_f32_bf16 v127, 0x8000

v_cvt_f32_bf16 v5, v1.h
// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]

v_cvt_pk_f16_fp8 v1, v2
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]

v_cvt_pk_f16_fp8 v1, s2
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e]

v_cvt_pk_f16_fp8 v1, 100
// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
4 changes: 4 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@ v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,11 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2.h quad_perm:[0,1,2,3]
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
4 changes: 4 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,11 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
v_cvt_f32_bf16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
10 changes: 10 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s

v_cvt_pk_f16_fp8 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 clamp
// GFX1250-ERR-NEXT:{{^}} ^

v_cvt_pk_f16_fp8 v1, v2 mul:2
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 mul:2
// GFX1250-ERR-NEXT:{{^}} ^

v_cvt_f32_bf16 v5, v1 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 clamp
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,15 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1]

v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v150
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v2 op_sel:[1]
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v150 op_sel:[1]
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00]

v_cvt_pk_f16_fp8 v1, s2 op_sel:[1]
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00]
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,15 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]

v_cvt_f32_bf16_e64 v5, v128.h
// GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v150
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v2 op_sel:[1]
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00]

v_cvt_pk_f16_fp8 v1, v150 op_sel:[1]
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00]

v_cvt_pk_f16_fp8 v1, s2 op_sel:[1]
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00]
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:15
v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
v_cvt_f32_bf16_e64_dpp v5, v128.h quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v128.h quad_perm:[0,1,2,3]
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,11 @@
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8 changes: 8 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,11 @@ v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
v_cvt_f32_bf16_e64_dpp v5, v128.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

v_cvt_pk_f16_fp8 v1, v128.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
10 changes: 10 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,13 @@

0x81,0xe5,0x0a,0x7e
# GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]

0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00
# GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]

0x02,0xea,0x02,0x7e
# GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e]

0x02,0xeb,0x02,0x7e
# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e]
# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
8 changes: 8 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,11 @@

0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff
# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]

0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]

0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff]
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff]
8 changes: 8 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@

0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05
# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]

0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]

0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]
Loading
Loading