Skip to content

Commit

Permalink
fixup! [AIE2P] legalizer support for G_FMUL
Browse files Browse the repository at this point in the history
  • Loading branch information
khallouh committed Feb 19, 2025
1 parent 10084d0 commit f545a2c
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 28 deletions.
50 changes: 25 additions & 25 deletions llvm/lib/Target/AIE/AIELegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/IR/IntrinsicsAIE2.h"
#include "llvm/IR/IntrinsicsAIE2P.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>

namespace llvm {

Expand Down Expand Up @@ -1159,54 +1160,53 @@ bool AIELegalizerHelper::legalizeG_FABS(LegalizerHelper &Helper,

bool AIELegalizerHelper::legalizeG_FMUL(LegalizerHelper &Helper,
MachineInstr &MI) const {
assert(ST.isAIE2P() && "Custom legalization supported for AIE2P only");

MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

const Register DstReg = MI.getOperand(0).getReg();
Register SrcLHS = MI.getOperand(1).getReg();
Register SrcRHS = MI.getOperand(2).getReg();

assert(MRI.getType(DstReg) == LLT::scalar(16) &&
"Expected bfloat16 type in custom legalization.");

const LLT InsertVecLLT = ST.isAIE2P() ? V64BF16 : V64BF16;
const Register IdxReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
const Register UndefVec = MIRBuilder.buildUndef(InsertVecLLT).getReg(0);
Register SrcLHS = MI.getOperand(1).getReg();
Register SrcRHS = MI.getOperand(2).getReg();

const LLT InsertVecLLT = V64BF16;
const unsigned InsertEltOpc =
ST.getInstrInfo()->getGenericInsertVectorEltOpcode();
SrcLHS =
MIRBuilder
.buildInstr(InsertEltOpc, {InsertVecLLT}, {UndefVec, SrcLHS, IdxReg})
.getReg(0);
SrcRHS =
MIRBuilder
.buildInstr(InsertEltOpc, {InsertVecLLT}, {UndefVec, SrcRHS, IdxReg})
.getReg(0);

const Register UndefVec1 = MIRBuilder.buildUndef(V32FP32).getReg(0);
const Register IdxReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
const Register UndefVec512 = MIRBuilder.buildUndef(InsertVecLLT).getReg(0);

SrcLHS = MIRBuilder
.buildInstr(InsertEltOpc, {InsertVecLLT},
{UndefVec512, SrcLHS, IdxReg})
.getReg(0);
SrcRHS = MIRBuilder
.buildInstr(InsertEltOpc, {InsertVecLLT},
{UndefVec512, SrcRHS, IdxReg})
.getReg(0);

const Register UndefVec1024 = MIRBuilder.buildUndef(V32FP32).getReg(0);

SrcLHS = MIRBuilder.buildBitcast(V32FP32, SrcLHS).getReg(0);
SrcRHS = MIRBuilder.buildBitcast(V32FP32, SrcRHS).getReg(0);
const Register ConcatLHS = MRI.createGenericVirtualRegister(V64FP32);
const Register ConcatRHS = MRI.createGenericVirtualRegister(V64FP32);
MIRBuilder.buildConcatVectors(
ConcatLHS,
{MIRBuilder.buildBitcast(V32FP32, SrcLHS).getReg(0), UndefVec1});
MIRBuilder.buildConcatVectors(
ConcatRHS,
{MIRBuilder.buildBitcast(V32FP32, SrcRHS).getReg(0), UndefVec1});
MIRBuilder.buildConcatVectors(ConcatLHS, {SrcLHS, UndefVec1024});
MIRBuilder.buildConcatVectors(ConcatRHS, {SrcRHS, UndefVec1024});
SrcLHS = ConcatLHS;
SrcRHS = ConcatRHS;

Register Res =
MIRBuilder.buildInstr(MI.getOpcode(), {V64FP32}, {SrcLHS, SrcRHS})
.getReg(0);

Res = MIRBuilder.buildUnmerge(V32ACC32, Res).getReg(0);

Res = MIRBuilder.buildUnmerge(V32FP32, Res).getReg(0);
const int VecSize = MRI.getType(Res).getSizeInBits();
const LLT DstLLT = ST.isAIE2P() ? V32BF16 : V16BF16;
Res = MIRBuilder
.buildIntrinsic(getFpTrunc32ToBF16IntrID(ST, VecSize), {DstLLT},
.buildIntrinsic(getFpTrunc32ToBF16IntrID(ST, VecSize), {V32BF16},
true, false)
.addUse(Res)
.getReg(0);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
getActionDefinitionsBuilder(G_FABS).customFor({S16, S32, S64}).scalarize(0);

getActionDefinitionsBuilder(G_FMUL)
.legalFor({AccV64S32, AccV32S32})
.legalFor({AccV64S32, AccV32S32, V16S32})
.customFor({S16})
.libcallFor({S32, S64});

Expand Down
66 changes: 64 additions & 2 deletions llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-fmul.mir
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck %s

---
name: test_fmul_bfloat16
name: test_fmul_s16
body: |
bb.0:
liveins: $r1, $r2
; CHECK-LABEL: name: test_fmul_bfloat16
; CHECK-LABEL: name: test_fmul_s16
; CHECK: liveins: $r1, $r2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1
Expand Down Expand Up @@ -44,3 +44,65 @@ body: |
$r0 = COPY %5(s32)
PseudoRET implicit $lr, implicit $r0
...

---
name: test_fmul_vec_2048
body: |
bb.0:
liveins: $dm0, $dm1
; CHECK-LABEL: name: test_fmul_vec_2048
; CHECK: liveins: $dm0, $dm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s32>) = COPY $dm0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s32>) = COPY $dm1
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<64 x s32>) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: $dm0 = COPY [[FMUL]](<64 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0
%0:_(<64 x s32>) = COPY $dm0
%1:_(<64 x s32>) = COPY $dm1
%2:_(<64 x s32>) = G_FMUL %0, %1
$dm0 = COPY %2(<64 x s32>)
PseudoRET implicit $lr, implicit $dm0
...

---
name: test_fmul_vec_1024
body: |
bb.0:
liveins: $dm0, $dm1
; CHECK-LABEL: name: test_fmul_vec_1024
; CHECK: liveins: $dm0, $dm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $cml0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s32>) = COPY $cml1
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<32 x s32>) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: $cml0 = COPY [[FMUL]](<32 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $cml0
%0:_(<32 x s32>) = COPY $cml0
%1:_(<32 x s32>) = COPY $cml1
%2:_(<32 x s32>) = G_FMUL %0, %1
$cml0 = COPY %2(<32 x s32>)
PseudoRET implicit $lr, implicit $cml0
...

---
name: test_fmul_vec_512
body: |
bb.0:
liveins: $dm0, $dm1
; CHECK-LABEL: name: test_fmul_vec_512
; CHECK: liveins: $dm0, $dm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $bmll0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $bmll1
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<16 x s32>) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: $bmll0 = COPY [[FMUL]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $bmll0
%0:_(<16 x s32>) = COPY $bmll0
%1:_(<16 x s32>) = COPY $bmll1
%2:_(<16 x s32>) = G_FMUL %0, %1
$bmll0 = COPY %2(<16 x s32>)
PseudoRET implicit $lr, implicit $bmll0
...


0 comments on commit f545a2c

Please sign in to comment.