Skip to content

Commit 76ed960

Browse files
committed
[AMDGPU] Add support for v_cvt_pk_f16_bf8 on gfx1250
Co-authored-by: Shilei Tian <[email protected]>
1 parent 5e439f7 commit 76ed960

25 files changed

+230
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,7 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16
643643
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
644644

645645
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
646+
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
646647

647648
#undef BUILTIN
648649
#undef TARGET_BUILTIN

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,23 @@ void test_cvt_pk_f16_fp8(global half2* out, short a)
3030
{
3131
out[0] = __builtin_amdgcn_cvt_pk_f16_fp8(a);
3232
}
33+
34+
// CHECK-LABEL: @test_cvt_pk_f16_bf8(
35+
// CHECK-NEXT: entry:
36+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
37+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
38+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
39+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
40+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
41+
// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
42+
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2
43+
// CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 [[TMP0]])
44+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
45+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[TMP2]], i64 0
46+
// CHECK-NEXT: store <2 x half> [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 4
47+
// CHECK-NEXT: ret void
48+
//
49+
void test_cvt_pk_f16_bf8(global half2* out, short a)
50+
{
51+
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
52+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,10 @@ def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
592592
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
593593
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
594594

595+
def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
596+
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
597+
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
598+
595599
class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
596600
[DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
597601
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4542,6 +4542,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
45424542
case Intrinsic::amdgcn_cvt_pk_i16:
45434543
case Intrinsic::amdgcn_cvt_pk_u16:
45444544
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
4545+
case Intrinsic::amdgcn_cvt_pk_f16_bf8:
45454546
case Intrinsic::amdgcn_fmed3:
45464547
case Intrinsic::amdgcn_cubeid:
45474548
case Intrinsic::amdgcn_cubema:

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,9 @@ let SubtargetPredicate = isGFX1250Plus in {
741741
defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8",
742742
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
743743
int_amdgcn_cvt_pk_f16_fp8>;
744+
defm V_CVT_PK_F16_BF8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_bf8",
745+
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
746+
int_amdgcn_cvt_pk_f16_bf8>;
744747
}
745748
} // End SubtargetPredicate = isGFX1250Plus
746749

@@ -1078,6 +1081,7 @@ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
10781081

10791082
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
10801083
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
1084+
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
10811085

10821086
//===----------------------------------------------------------------------===//
10831087
// GFX10.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,41 @@
44
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s
55
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s
66

7+
define amdgpu_ps float @test_cvt_pk_f16_bf8_v(i16 %a) {
8+
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_bf8_v:
9+
; GFX1250-SDAG-REAL16: ; %bb.0:
10+
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l
11+
; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
12+
;
13+
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_bf8_v:
14+
; GFX1250-SDAG-FAKE16: ; %bb.0:
15+
; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0
16+
; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
17+
;
18+
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_bf8_v:
19+
; GFX1250-GISEL-REAL16: ; %bb.0:
20+
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l
21+
; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
22+
;
23+
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_bf8_v:
24+
; GFX1250-GISEL-FAKE16: ; %bb.0:
25+
; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0
26+
; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
27+
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a)
28+
%ret = bitcast <2 x half> %cvt to float
29+
ret float %ret
30+
}
31+
32+
define amdgpu_ps float @test_cvt_pk_f16_bf8_s(i16 inreg %a) {
33+
; GFX1250-LABEL: test_cvt_pk_f16_bf8_s:
34+
; GFX1250: ; %bb.0:
35+
; GFX1250-NEXT: v_cvt_pk_f16_bf8 v0, s0
36+
; GFX1250-NEXT: ; return to shader part epilog
37+
%cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a)
38+
%ret = bitcast <2 x half> %cvt to float
39+
ret float %ret
40+
}
41+
742
define amdgpu_ps float @test_cvt_pk_f16_fp8_v(i16 %a) {
843
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v:
944
; GFX1250-SDAG-REAL16: ; %bb.0:

llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ v_cvt_f32_bf16 v5, src_scc
4646
v_cvt_f32_bf16 v127, 0x8000
4747
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
4848

49+
v_cvt_pk_f16_bf8 v1, v2
50+
// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]
51+
52+
v_cvt_pk_f16_bf8 v1, s2
53+
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]
54+
55+
v_cvt_pk_f16_bf8 v1, 100
56+
// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]
57+
4958
v_cvt_pk_f16_fp8 v1, v2
5059
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
5160

llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ v_cvt_f32_bf16 v127, 0x8000
4949
v_cvt_f32_bf16 v5, v1.h
5050
// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]
5151

52+
v_cvt_pk_f16_bf8 v1, v2
53+
// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]
54+
55+
v_cvt_pk_f16_bf8 v1, s2
56+
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]
57+
58+
v_cvt_pk_f16_bf8 v1, 100
59+
// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]
60+
5261
v_cvt_pk_f16_fp8 v1, v2
5362
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
5463

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,7 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
6161
v_cvt_pk_f16_fp8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
6262
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
6363
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
64+
65+
v_cvt_pk_f16_bf8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
66+
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
6262
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
6363
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
6464

65+
v_cvt_pk_f16_bf8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
66+
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
67+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
68+
69+
v_cvt_pk_f16_bf8 v1, v2.h quad_perm:[0,1,2,3]
70+
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff]
71+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
72+
6573
v_cvt_pk_f16_fp8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
6674
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
6775
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,7 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
1717
v_cvt_pk_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
1818
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
1919
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
20+
21+
v_cvt_pk_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
22+
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05]
23+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,11 @@ v_cvt_pk_f16_fp8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
2525
v_cvt_pk_f16_fp8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0]
2626
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]
2727
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
28+
29+
v_cvt_pk_f16_bf8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
30+
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05]
31+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
32+
33+
v_cvt_pk_f16_bf8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0]
34+
// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xec,0x02,0x7e,0x82,0x77,0x39,0x05]
35+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
22

3+
v_cvt_pk_f16_bf8 v1, v2 clamp
4+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
5+
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_bf8 v1, v2 clamp
6+
// GFX1250-ERR-NEXT:{{^}} ^
7+
8+
v_cvt_pk_f16_bf8 v1, v2 mul:2
9+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
10+
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_bf8 v1, v2 mul:2
11+
// GFX1250-ERR-NEXT:{{^}} ^
12+
313
v_cvt_pk_f16_fp8 v1, v2 clamp
414
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
515
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 clamp

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,18 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1]
7676
v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
7777
// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00]
7878

79+
v_cvt_pk_f16_bf8 v1, v150
80+
// GFX1250: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00]
81+
82+
v_cvt_pk_f16_bf8 v1, v2 op_sel:[1]
83+
// GFX1250: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00]
84+
85+
v_cvt_pk_f16_bf8 v1, v150 op_sel:[1]
86+
// GFX1250: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00]
87+
88+
v_cvt_pk_f16_bf8 v1, s2 op_sel:[1]
89+
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00]
90+
7991
v_cvt_pk_f16_fp8 v1, v150
8092
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]
8193

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,18 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
7979
v_cvt_f32_bf16_e64 v5, v128.h
8080
// GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00]
8181

82+
v_cvt_pk_f16_bf8 v1, v150
83+
// GFX1250: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00]
84+
85+
v_cvt_pk_f16_bf8 v1, v2 op_sel:[1]
86+
// GFX1250: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00]
87+
88+
v_cvt_pk_f16_bf8 v1, v150 op_sel:[1]
89+
// GFX1250: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00]
90+
91+
v_cvt_pk_f16_bf8 v1, s2 op_sel:[1]
92+
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00]
93+
8294
v_cvt_pk_f16_fp8 v1, v150
8395
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]
8496

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
4646
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
4747
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
4848

49+
v_cvt_pk_f16_bf8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
50+
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
51+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
52+
53+
v_cvt_pk_f16_bf8 v1, v128 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
54+
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
55+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
56+
4957
v_cvt_pk_f16_fp8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
5058
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
5159
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ v_cvt_f32_bf16_e64_dpp v5, v128.h quad_perm:[3,2,1,0]
5050
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
5151
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
5252

53+
v_cvt_pk_f16_bf8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
54+
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
55+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
56+
57+
v_cvt_pk_f16_bf8 v1, v128.h quad_perm:[0,1,2,3]
58+
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff]
59+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
60+
5361
v_cvt_pk_f16_fp8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
5462
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
5563
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
33
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
44

5+
v_cvt_pk_f16_bf8 v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1
6+
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
7+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8+
9+
v_cvt_pk_f16_bf8 v1, v2 op_sel:[1] dpp8:[7,6,5,4,3,2,1,0]
10+
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v2 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05]
11+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
12+
513
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
614
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
715
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
33
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
44

5+
v_cvt_pk_f16_bf8 v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1
6+
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
7+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8+
9+
v_cvt_pk_f16_bf8 v1, v128.h dpp8:[7,6,5,4,3,2,1,0]
10+
// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
11+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
12+
513
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
614
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
715
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@
5050
0x81,0xe5,0x0a,0x7e
5151
# GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]
5252

53+
0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00
54+
# GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]
55+
56+
0x02,0xec,0x02,0x7e
57+
# GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]
58+
59+
0x02,0xed,0x02,0x7e
60+
# GFX1250-REAL16: v_cvt_pk_f16_bf8 v1, v2.l ; encoding: [0x02,0xed,0x02,0x7e]
61+
# GFX1250-FAKE16: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]
62+
5363
0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00
5464
# GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00]
5565

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@
4747
0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff
4848
# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
4949

50+
0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff
51+
# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
52+
# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
53+
54+
0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff
55+
# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff]
56+
# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff]
57+
5058
0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff
5159
# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
5260
# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]

0 commit comments

Comments
 (0)