Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AIEX] Combine G_SHUFFLE_VECTOR to COPY #346

Merged
merged 2 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/AIECombine.td
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ def combine_vector_shuffle_extract_subvec : GICombineRule<
[{ return matchShuffleToExtractSubvec(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def combine_vector_shuffle_to_copy : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR): $root,
[{ return matchShuffleToCopy(*${root}, MRI, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def AIE2PreLegalizerCombiner
: GICombiner<"AIE2PreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector,
all_combines, combine_S20NarrowingOpt,
Expand All @@ -99,6 +105,7 @@ def AIE2PreLegalizerCombiner

def AIE2PPreLegalizerCombiner
: GICombiner<"AIE2PPreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector,
combine_vector_shuffle_to_copy,
combine_vector_shuffle_extract_subvec,
all_combines, combine_S20NarrowingOpt,
combine_globalval_offset,
Expand Down
213 changes: 139 additions & 74 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,69 @@ cl::opt<bool> CombineVecShiftByZero(
"aie-combine-vec-shift-by-zero", cl::init(true), cl::Hidden,
cl::desc("Combine vectors shift by zero into copies."));

bool MaskMatch::isValidMask(ArrayRef<int> Mask) const {
bool FirstNotUndef = true;
for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
if (Mask[Idx] == -1)
continue;

// Find the start value of the mask
if (FirstNotUndef) {
// Get the start value
const unsigned MaskStart = Mask[Idx] - (Period == 0 ? Idx : Idx % Period);

if (MaskStart != Height)
return false;

FirstNotUndef = false;
}

// Check not undef values (not -1) of the mask
if ((unsigned)Mask[Idx] != getMaskValue(Idx))
return false;
}

return true;
}

bool MaskMatch::isMaskWithAllUndefs(ArrayRef<int> Mask) {
for (unsigned I = 0; I < Mask.size(); ++I) {
if (Mask[I] != -1)
return false;
}
return true;
}

std::optional<unsigned> MaskMatch::getHeight(ArrayRef<int> Mask,
unsigned Period) {
for (unsigned I = 0; I < Mask.size(); ++I) {
if (Mask[I] != -1)
return Mask[I] - (Period == 0 ? I : I % Period);
}
return std::nullopt;
}

/// This function returns the unique index in the shuffle mask \p Mask if the
/// unique index exists.
std::optional<int> MaskMatch::getUniqueIndex(ArrayRef<int> Mask) {
std::optional<int> UniqOpIdx;
for (unsigned I = 0; I < Mask.size(); I++) {
int Idx = Mask[I];
if (Idx == -1)
continue;

if (!UniqOpIdx) {
UniqOpIdx = Idx;
continue;
}

if (UniqOpIdx != Idx) {
return std::nullopt;
}
}
return UniqOpIdx;
}

MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI,
CombinerHelper &Helper,
AIELoadStoreCombineMatchData &MatchData,
Expand Down Expand Up @@ -1765,6 +1828,15 @@ bool llvm::matchBroadcastElement(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}

/// \returns true if it is possible to combine the shuffle vector to VSEL.
/// E.g.:
/// From : %0:_(<16 x s32>) = COPY $x0
/// %1:_(<16 x s32>) = COPY $x1
/// %2:_(<16 x s32>) = G_SHUFFLE_VECTOR %X(<16 x s32>), %1(<16 x s32>),
/// shufflemask(0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
/// 31)
/// To : %3:_(s32) = G_CONSTANT i32 65520
/// %4:_(<16 x s32>) = G_AIE_VSEL %0, %1, %3(s32)
bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI,
const AIEBaseInstrInfo &TII,
BuildFnTy &MatchInfo) {
Expand All @@ -1785,13 +1857,10 @@ bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI,
if (NumDstElems != NumSrcElems)
return false;

// Check that the shuffle mask can be converted into VSel mask:
// The mask contains only -1
if (std::all_of(Mask.begin(), Mask.end(),
[&](int Value) { return Value == -1; })) {
if (MaskMatch::isMaskWithAllUndefs(Mask))
return false;
}

// Check that the shuffle mask can be converted into VSel mask:
// 1. The shuffle mask doesn't contain indices that correspond to the same
// index in Src1 and Src2, i.e., for each i only the i-th element from Src1 or
// the i-th element from Src2 is used.
Expand Down Expand Up @@ -1820,27 +1889,6 @@ bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}

/// This function returns the unique index in the shuffle mask \p Mask if the
/// unique index exists.
static std::optional<int> getUniqueIndex(ArrayRef<int> Mask) {
std::optional<int> UniqOpIdx;
for (unsigned I = 0; I < Mask.size(); I++) {
int Idx = Mask[I];
if (Idx < 0)
continue;

if (!UniqOpIdx) {
UniqOpIdx = Idx;
continue;
}

if (UniqOpIdx != Idx) {
return std::nullopt;
}
}
return UniqOpIdx;
}

/// \returns true if it is possible to combine the shuffle vector with a mask
/// that extracts an element from the first source vector and broadcasts
/// it. E.g.:
Expand All @@ -1856,7 +1904,7 @@ static bool matchShuffleToVecEltBroadcast(MachineInstr &MI,
BuildFnTy &MatchInfo) {
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();

std::optional<int> UniqOpIdx = getUniqueIndex(Mask);
std::optional<int> UniqOpIdx = MaskMatch::getUniqueIndex(Mask);
if (!UniqOpIdx)
return false;

Expand All @@ -1873,24 +1921,6 @@ static bool matchShuffleToVecEltBroadcast(MachineInstr &MI,
return true;
}

/// A sequential mask with \p StartValue and \p NumElems is generated. If \p
/// Mask is equivalent to the generated sequential mask, the method returns
/// true. Otherwise, false.
static bool checkSequentialMask(const ArrayRef<int> Mask, unsigned StartValue,
unsigned NumElems) {
if (Mask.size() != NumElems)
return false;

auto SeqMask = createSequentialMask(StartValue, NumElems, 0);

for (unsigned I = 0; I < NumElems; ++I) {
if (Mask[I] != -1 && Mask[I] != SeqMask[I])
return false;
}

return true;
}

/// Check prerequisites to extract a subvector
static bool checkExtractSubvectorPrerequisites(const AIEBaseInstrInfo &TII,
const LLT DstTy,
Expand Down Expand Up @@ -2124,12 +2154,15 @@ bool llvm::matchShuffleToExtractSubvec(MachineInstr &MI,
if (NumSrc1Elems % NumDstElems != 0)
return false;

if (MaskMatch::isMaskWithAllUndefs(Mask))
return false;

const unsigned NumSubVectors = NumSrc1Elems / NumDstElems;
auto GetSubvecExtractIdx = [=, &Mask]() -> std::optional<unsigned> {
for (unsigned SubVecIdx = 0; SubVecIdx < NumSubVectors; ++SubVecIdx) {
if (checkSequentialMask(Mask, SubVecIdx * NumDstElems, NumDstElems)) {
MaskMatch SequentialMask{/*Height*/ SubVecIdx * NumDstElems};
if (SequentialMask.isValidMask(Mask))
return SubVecIdx;
}
}

return std::nullopt;
Expand Down Expand Up @@ -2189,30 +2222,17 @@ static bool matchShuffleToSubvecBroadcast(MachineInstr &MI,
if (Mask[0] != -1 && Mask[0] % SplatMaskLen != 0)
return std::nullopt;

// Find the start value of the splat mask and check that the mask is valid
bool ValidMask = true;
int SplatMaskStart = -1;
for (unsigned I = 0; I < MaskSize; ++I) {
if (Mask[I] == -1)
continue;

if (SplatMaskStart == -1) {
// First Mask[I]!=-1
// Get the start value
SplatMaskStart = Mask[I] - I % SplatMaskLen;

if (SplatMaskStart % SplatMaskLen != 0)
return std::nullopt;

} else if ((unsigned)Mask[I] != SplatMaskStart + I % SplatMaskLen) {
// Check the rest not undef values (not -1) of the mask
ValidMask = false;
break;
}
}
// Get Height (start value)
std::optional<unsigned> Height =
MaskMatch::getHeight(Mask, /*Period*/ SplatMaskLen);
if (!Height)
return std::nullopt;

if (ValidMask)
return std::make_pair(SplatMaskStart, SplatMaskLen);
// Check the mask
MaskMatch SequentialPeriodicMask{/*Height*/ Height.value(),
/*Period*/ SplatMaskLen};
if (SequentialPeriodicMask.isValidMask(Mask))
return std::make_pair(Height.value(), SplatMaskLen);
}
return std::nullopt;
};
Expand Down Expand Up @@ -2272,10 +2292,11 @@ static bool matchShuffleToVecBroadcast(MachineInstr &MI,
return false;
}

for (unsigned I = 0; I < Mask.size(); ++I) {
if (Mask[I] != -1 && (unsigned)Mask[I] != I % NumSrcElems)
return false;
}
// Check the mask
MaskMatch SequentialPeriodicMask{/*Height*/ 0,
/*Period*/ NumSrcElems};
if (!SequentialPeriodicMask.isValidMask(Mask))
return false;

MatchInfo = [=, &MRI](MachineIRBuilder &B) {
buildBroadcastVector(B, MRI, Src1Reg, DstReg);
Expand All @@ -2288,6 +2309,12 @@ bool llvm::matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
const AIEBaseInstrInfo &TII,
BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();

if (MaskMatch::isMaskWithAllUndefs(Mask))
return false;

if (matchShuffleToVecBroadcast(MI, MRI, TII, MatchInfo))
return true;
if (matchShuffleToVecEltBroadcast(MI, MRI, TII, MatchInfo))
Expand All @@ -2296,3 +2323,41 @@ bool llvm::matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
return false;
}

/// Match something like this:
/// %1:_(<2 x s32>) = COPY $x0
/// %2:_(<2 x s32>) = G_IMPLICIT_DEF
/// %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>),
/// shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)

/// To convert to:
/// %0:_(<16 x s32>) = COPY $x0
bool llvm::matchShuffleToCopy(MachineInstr &MI, MachineRegisterInfo &MRI,
BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

const Register DstReg = MI.getOperand(0).getReg();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use something like this:

const auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();

?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I guess that this could trigger some clang frontend bug....

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this I am getting the following error: " error: reference to local binding 'DstReg' declared in enclosing function 'llvm::matchShuffleToCopy'
MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(DstReg, Src1Reg); };"

and the same error for Src1Reg.

Copy link
Collaborator

@andcarminati andcarminati Feb 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that this is a clang bug.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That means that you can explain every line from the C++ standard related to structural bindings? Respect.

const Register Src1Reg = MI.getOperand(1).getReg();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();

const LLT DstTy = MRI.getType(DstReg);
const LLT Src1Ty = MRI.getType(Src1Reg);
if (DstTy != Src1Ty)
return false;

const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
if (Mask.size() != NumSrcElems)
return false;

if (MaskMatch::isMaskWithAllUndefs(Mask))
return false;

// Check that the mask is sequential
MaskMatch SequentialMask{/*Height*/ 0};
if (!SequentialMask.isValidMask(Mask))
return false;

MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(DstReg, Src1Reg); };

return true;
}
31 changes: 31 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,34 @@ struct AIELoadStoreCombineMatchData {
bool RemoveInstr;
};

/// The mask is represented by a sawtooth function F with Period, Height and
/// Amplitude, i.e., F(idx + Period) = F(idx) = Height + idx * Amplitude, where
/// idx >= 0.
/// Example: mask = (4, 6, 8, 4, 6, 8) <=> Height=4, Amplitude=2, Period=3
class MaskMatch {
public:
MaskMatch(unsigned MaskHeight, unsigned MaskPeriod = 0, int MaskAmplitude = 1)
: Period{MaskPeriod}, Height{MaskHeight}, Amplitude{MaskAmplitude} {}

bool isValidMask(ArrayRef<int> Mask) const;
unsigned getHeight() const { return Height; }

static bool isMaskWithAllUndefs(ArrayRef<int> Mask);
static std::optional<unsigned> getHeight(ArrayRef<int> Mask, unsigned Period);
static std::optional<int> getUniqueIndex(ArrayRef<int> Mask);

protected:
unsigned getMaskValue(unsigned Idx) const {
unsigned BaseIdx = Period == 0 ? Idx : Idx % Period;
return Height + BaseIdx * Amplitude;
}

unsigned Period = 0;
unsigned Height = 0;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had imagined these as std::optional, indicating that they were free parameters. Then, the match function, isValidMask, would use this freedom to make it match the given mask. Matching on a mask without dont-care values, it would fix Height on the first element, the Amplitude on the second, and period when the sawtooth ended. It would return false the first time it couldn't adjust a free parameter. If you would know the period that you were looking for, you would initialize it with concrete values using a constructor, or a setter function.
I guess that for our purposes, this generalized sawtooth is a reasonable base, although we could make the base class even more generic. For instance, we could match the different transpose modes for VSHUFFLE in this form as well.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But don't pursue this idea at this point. It is already good to have it factored this far.

/// Negative amplitude can be used for reverse mask patterns.
int Amplitude = 1;
};

/// Look for any PtrAdd instruction that use the same base as \a MI that can be
/// combined with it and stores it in \a MatchData
/// \return true if an instruction is found
Expand Down Expand Up @@ -211,6 +239,9 @@ bool matchShuffleToExtractSubvec(MachineInstr &MI, MachineRegisterInfo &MRI,
const AIEBaseInstrInfo &TII,
BuildFnTy &MatchInfo);

bool matchShuffleToCopy(MachineInstr &MI, MachineRegisterInfo &MRI,
BuildFnTy &MatchInfo);

} // namespace llvm

#endif
Loading