Skip to content

Commit

Permalink
[AIE2P] legalizer support for G_FMUL
Browse files Browse the repository at this point in the history
  • Loading branch information
khallouh committed Feb 17, 2025
1 parent 9ef0eb3 commit a8dac06
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 2 deletions.
65 changes: 65 additions & 0 deletions llvm/lib/Target/AIE/AIELegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,71 @@ bool AIELegalizerHelper::legalizeG_FABS(LegalizerHelper &Helper,
return true;
}

bool AIELegalizerHelper::legalizeG_FMUL(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

const Register DstReg = MI.getOperand(0).getReg();
Register SrcLHS = MI.getOperand(1).getReg();
Register SrcRHS = MI.getOperand(2).getReg();

assert(MRI.getType(DstReg) == LLT::scalar(16) &&
"Expected bfloat16 type in custom legalization.");

const LLT InsertVecLLT = ST.isAIE2P() ? V64BF16 : V64BF16;
const Register IdxReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
const Register UndefVec = MIRBuilder.buildUndef(InsertVecLLT).getReg(0);

const unsigned InsertEltOpc =
ST.getInstrInfo()->getGenericInsertVectorEltOpcode();
SrcLHS =
MIRBuilder
.buildInstr(InsertEltOpc, {InsertVecLLT}, {UndefVec, SrcLHS, IdxReg})
.getReg(0);
SrcRHS =
MIRBuilder
.buildInstr(InsertEltOpc, {InsertVecLLT}, {UndefVec, SrcRHS, IdxReg})
.getReg(0);

const Register UndefVec1 = MIRBuilder.buildUndef(V32FP32).getReg(0);

const Register ConcatLHS = MRI.createGenericVirtualRegister(V64FP32);
const Register ConcatRHS = MRI.createGenericVirtualRegister(V64FP32);
MIRBuilder.buildConcatVectors(
ConcatLHS,
{MIRBuilder.buildBitcast(V32FP32, SrcLHS).getReg(0), UndefVec1});
MIRBuilder.buildConcatVectors(
ConcatRHS,
{MIRBuilder.buildBitcast(V32FP32, SrcRHS).getReg(0), UndefVec1});
SrcLHS = ConcatLHS;
SrcRHS = ConcatRHS;

Register Res =
MIRBuilder.buildInstr(MI.getOpcode(), {V64FP32}, {SrcLHS, SrcRHS})
.getReg(0);

Res = MIRBuilder.buildUnmerge(V32ACC32, Res).getReg(0);

const int VecSize = MRI.getType(Res).getSizeInBits();
const LLT DstLLT = ST.isAIE2P() ? V32BF16 : V16BF16;
Res = MIRBuilder
.buildIntrinsic(getFpTrunc32ToBF16IntrID(ST, VecSize), {DstLLT},
true, false)
.addUse(Res)
.getReg(0);

const unsigned ExtractEltOpc =
ST.getInstrInfo()->getGenericExtractVectorEltOpcode(/*SignExt*/ true);
Res = MIRBuilder.buildInstr(ExtractEltOpc, {S32}, {Res, IdxReg}).getReg(0);
Res = MIRBuilder.buildAssertInstr(TargetOpcode::G_ASSERT_SEXT, {S32}, Res, 16)
.getReg(0);
MIRBuilder.buildTrunc(DstReg, Res);

MI.eraseFromParent();
return true;
}

bool AIELegalizerHelper::legalizeG_FADD_G_FSUB(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/AIELegalizerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ class AIELegalizerHelper {
const LLT V32BF16 = LLT::fixed_vector(32, 16);
const LLT V32FP32 = LLT::fixed_vector(32, 32);
const LLT V32ACC32 = LLT::fixed_vector(32, 32);
const LLT V64BF16 = LLT::fixed_vector(64, 16);
const LLT V64FP32 = LLT::fixed_vector(64, 32);
const LLT V128BF16 = LLT::fixed_vector(128, 16);

public:
AIELegalizerHelper(const AIEBaseSubtarget &ST);
Expand Down Expand Up @@ -70,6 +72,7 @@ class AIELegalizerHelper {
bool legalizeG_FPEXT(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeG_FABS(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeG_FADD_G_FSUB(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeG_FMUL(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeG_SELECT(LegalizerHelper &Helper, MachineInstr &MI,
const unsigned MaxBitSize = 512) const;
bool legalizeG_BITCAST(LegalizerHelper &Helper, MachineInstr &MI) const;
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,17 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)

getActionDefinitionsBuilder(G_FABS).customFor({S16, S32, S64}).scalarize(0);

getActionDefinitionsBuilder(G_FMUL)
.legalFor({AccV64S32, AccV32S32})
.customFor({S16})
.libcallFor({S32, S64});

getActionDefinitionsBuilder({G_FADD, G_FSUB})
.legalFor({AccV64S32})
.customFor({S16})
.libcallFor({S32, S64});

getActionDefinitionsBuilder({G_FMUL, G_FDIV, G_FREM})
getActionDefinitionsBuilder({G_FDIV, G_FREM})
.clampScalar(0, S32, S64)
.libcallFor({S32, S64});

Expand Down Expand Up @@ -716,6 +721,8 @@ bool AIE2PLegalizerInfo::legalizeCustom(
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
return AIEHelper.legalizeG_FADD_G_FSUB(Helper, MI);
case TargetOpcode::G_FMUL:
return AIEHelper.legalizeG_FMUL(Helper, MI);
case TargetOpcode::G_BUILD_VECTOR:
return AIEHelper.legalizeG_BUILD_VECTOR(Helper, MI);
case TargetOpcode::G_UNMERGE_VALUES:
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AIE/GlobalISel/legalize-fmul.mir
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

# RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2 --check-prefix=COMMON --check-prefix=AIE2 %s
# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p --check-prefix=COMMON --check-prefix=AIE2P %s

---
name: test_fmul_bfloat16
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-fmul.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck %s

---
name: test_fmul_bfloat16
body: |
bb.0:
liveins: $r1, $r2
; CHECK-LABEL: name: test_fmul_bfloat16
; CHECK: liveins: $r1, $r2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r2
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<64 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<64 x s16>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32)
; CHECK-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<64 x s16>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[TRUNC1]](s16), [[C]](s32)
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[AIE_INSERT_VECTOR_ELT]](<64 x s16>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<32 x s32>), [[DEF1]](<32 x s32>)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[AIE_INSERT_VECTOR_ELT1]](<64 x s16>)
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[BITCAST1]](<32 x s32>), [[DEF1]](<32 x s32>)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<64 x s32>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<32 x s32>), [[UV1:%[0-9]+]]:_(<32 x s32>) = G_UNMERGE_VALUES [[FMUL]](<64 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<32 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v32accfloat.to.v32bf16), [[UV]](<32 x s32>)
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[INT]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16
; CHECK-NEXT: $r0 = COPY [[ASSERT_SEXT]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $r0
%0:_(s32) = COPY $r1
%1:_(s16) = G_TRUNC %0(s32)
%2:_(s32) = COPY $r2
%3:_(s16) = G_TRUNC %2(s32)
%4:_(s16) = G_FMUL %1, %3
%5:_(s32) = G_ANYEXT %4(s16)
$r0 = COPY %5(s32)
PseudoRET implicit $lr, implicit $r0
...

0 comments on commit a8dac06

Please sign in to comment.