From 73f33f64fb90d23fbc4b7710aa9bed46a9e65eeb Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Wed, 10 Sep 2025 06:10:37 -0600 Subject: [PATCH] [AIE2P] Add pseudo to handle VMAC_f_vmac_bf_vmul_bf_core_X_X --- llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 28 +++++++++++++++++++ llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td | 6 ++++ .../Target/AIE/aie2p/AIE2PInstrPatterns.td | 4 +-- .../aie2p/GlobalIsel/inst-select-vmult.mir | 5 ++-- .../CodeGen/AIE/aie2p/postrapseudos/vmac.mir | 28 +++++++++++++++++++ 5 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AIE/aie2p/postrapseudos/vmac.mir diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 4e9b76f1a981..40893c243fc4 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -1226,11 +1226,34 @@ unsigned AIE2PInstrInfo::getCycleSeparatorOpcode() const { return AIE2P::CYCLE_SEPARATOR; } +static void expandVMACACC1024Pseudo(MachineInstr &MI, unsigned Opcode, + MachineBasicBlock &MBB, DebugLoc DL, + const AIE2PInstrInfo *TII, + const TargetRegisterInfo *TRI) { + const Register Dst = MI.getOperand(0).getReg(); + const Register AccSrc = MI.getOperand(1).getReg(); + const Register VecSrc1 = MI.getOperand(2).getReg(); + const Register VecSrc2 = MI.getOperand(3).getReg(); + const Register ConfSrc = MI.getOperand(4).getReg(); + + // Turn the 1024-bit accumulator source register into its corresponding + // 2048-bit super register + const Register AccSuperReg = TRI->getMatchingSuperReg( + AccSrc, AIE2P::sub_1024_acc_lo, &AIE2P::ACC2048RegClass); + BuildMI(MBB, MI, DL, TII->get(Opcode), Dst) + .addReg(AccSuperReg, getKillRegState(MI.getOperand(1).isKill())) + .addReg(VecSrc1, getKillRegState(MI.getOperand(2).isKill())) + .addReg(VecSrc2, getKillRegState(MI.getOperand(3).isKill())) + .addReg(ConfSrc, getKillRegState(MI.getOperand(4).isKill())); + MI.eraseFromParent(); +} + // Note: Some pseudos like spill/reload are already expanded in // eliminateFrameIndex. bool AIE2PInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { auto DL = MI.getDebugLoc(); MachineBasicBlock &MBB = *MI.getParent(); + auto *TRI = MI.getMF()->getSubtarget().getRegisterInfo(); switch (MI.getOpcode()) { case AIE2P::PseudoMove: { Register Dst = MI.getOperand(0).getReg(); @@ -1241,6 +1264,11 @@ bool AIE2PInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); return true; } + case AIE2P::VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024: { + expandVMACACC1024Pseudo(MI, AIE2P::VMAC_f_vmac_bf_vmul_bf_core_X_X, MBB, DL, + this, TRI); + return true; + } } return false; } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td index 14822e9880b6..0dd17f459341 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td @@ -338,5 +338,11 @@ def VLDA_E_SPILL : Pseudo<(outs EXPVEC64:$dst), (ins c12n_step4:$imm), "vlda_e_s } } +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +let Itinerary = II_VMAC_f_vmac_bf_vmul_bf_core_X_X, Defs = [srFPFlags], Uses = [crFPMask] in + def VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024 : Pseudo <(outs eDM:$dst), (ins eCML:$acc1, OP_mXv:$s1, OP_mXw:$s2, eR:$acc), "vmac.f", "$dst, $acc1, $s1, $s2, $acc">; +} + + include "aie2p/AIE2PInstrPatterns.td" diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td index 80ca8c4b723c..5fa25da79115 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td @@ -324,9 +324,7 @@ def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_addmac_conf VEC512:$s1, def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_mac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, eR:$acc), (EXTRACT_SUBREG - (VMAC_f_vmac_bf_vmul_bf_core_X_X - (REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo), - VEC512:$s1, VEC512:$s2, eR:$acc), + (VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024 ACC1024:$acc1, VEC512:$s1, VEC512:$s2, eR:$acc), sub_1024_acc_lo)>; def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc), diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vmult.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vmult.mir index 54ff8e9d5d8e..932ce47d1cca 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vmult.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vmult.mir @@ -467,9 +467,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ecml = COPY $cml1 ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 60 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc2048 = REG_SEQUENCE [[COPY2]], %subreg.sub_1024_acc_lo - ; CHECK-NEXT: [[VMAC_f_vmac_bf_vmul_bf_core_X_X:%[0-9]+]]:edm = VMAC_f_vmac_bf_vmul_bf_core_X_X [[REG_SEQUENCE]], [[COPY]], [[COPY1]], [[MOV_RLC_imm11_pseudo]], implicit-def dead $srfpflags, implicit $crfpmask - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ecml = COPY [[VMAC_f_vmac_bf_vmul_bf_core_X_X]].sub_1024_acc_lo + ; CHECK-NEXT: [[VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024_:%[0-9]+]]:edm = VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024 [[COPY2]], [[COPY]], [[COPY1]], [[MOV_RLC_imm11_pseudo]], implicit-def dead $srfpflags, implicit $crfpmask + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ecml = COPY [[VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024_]].sub_1024_acc_lo ; CHECK-NEXT: $cml0 = COPY [[COPY3]] ; CHECK-NEXT: PseudoRET implicit $lr, implicit $cml0 %1:vregbank(<32 x s16>) = COPY $x0 diff --git a/llvm/test/CodeGen/AIE/aie2p/postrapseudos/vmac.mir b/llvm/test/CodeGen/AIE/aie2p/postrapseudos/vmac.mir new file mode 100644 index 000000000000..a6e2f20934fe --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/postrapseudos/vmac.mir @@ -0,0 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -verify-machineinstrs --march=aie2p -run-pass=postrapseudos %s -o - | FileCheck %s + +... +--- +name: expand_VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024 +alignment: 1 +body: | + bb.0: + liveins: $cml1, $x0, $x2 + + ; CHECK-LABEL: name: expand_VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024 + ; CHECK: liveins: $cml1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r0 = MOV_RLC_imm11_pseudo 60 + ; CHECK-NEXT: $dm0 = VMAC_f_vmac_bf_vmul_bf_core_X_X killed $dm1, killed $x0, killed $x2, killed $r0, implicit-def $srfpflags, implicit $crfpmask + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $cml0 + renamable $r0 = MOV_RLC_imm11_pseudo 60 + renamable $dm0 = VMAC_f_vmac_bf_vmul_bf_core_X_X_ACC1024 killed renamable $cml1, killed renamable $x0, killed renamable $x2, killed renamable $r0, implicit-def dead $srfpflags, implicit $crfpmask + PseudoRET implicit $lr, implicit $cml0 + +...