-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LV] Add initial legality checks for ee loops with stores #145663
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
huntergr-arm
wants to merge
1
commit into
llvm:main
Choose a base branch
from
huntergr-arm:ee-with-store-legality-checks
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Graham Hunter (huntergr-arm) ChangesSeparating out the next piece of EE-with-stores Full diff: https://github.com/llvm/llvm-project/pull/145663.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..aa34daf596b6b 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -407,6 +407,14 @@ class LoopVectorizationLegality {
return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
}
+ /// Returns true if this is an early exit loop containing a store.
+ bool isConditionCopyRequired() const { return EarlyExitLoad.has_value(); }
+
+ /// Returns the load instruction, if any, directly used for an exit comparison
+ /// in and early exit loop containing state-changing or potentially-faulting
+ /// operations.
+ std::optional<LoadInst *> getEarlyExitLoad() const { return EarlyExitLoad; }
+
/// Return true if there is store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
@@ -536,6 +544,12 @@ class LoopVectorizationLegality {
/// additional cases safely.
bool isVectorizableEarlyExitLoop();
+ /// Clears any current early exit data gathered if a check failed.
+ void clearEarlyExitData() {
+ UncountableEdge = std::nullopt;
+ EarlyExitLoad = std::nullopt;
+ }
+
/// Return true if all of the instructions in the block can be speculatively
/// executed, and record the loads/stores that require masking.
/// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -654,6 +668,10 @@ class LoopVectorizationLegality {
/// Keep track of the loop edge to an uncountable exit, comprising a pair
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
+
+ /// Keep track of the load used for early exits where state-changing or
+ /// potentially faulting operations occur inside the loop.
+ std::optional<LoadInst *> EarlyExitLoad;
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 969d225c6ef2e..eec660e5958ca 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1207,8 +1208,42 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
});
}
- if (!LAI->canVectorizeMemory())
- return canVectorizeIndirectUnsafeDependences();
+ if (LAI->canVectorizeMemory()) {
+ // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
+ // since we're (potentially) doing the load out of its normal order
+ // in the loop and that may throw off dependency checking.
+ // A forward dependency should be fine, but a backwards dep may not
+ // be even if LAA thinks it is due to performing the load for the
+ // vector iteration i+1 in vector iteration i.
+ if (isConditionCopyRequired()) {
+ const MemoryDepChecker &DepChecker = LAI->getDepChecker();
+ const auto *Deps = DepChecker.getDependences();
+
+ for (const MemoryDepChecker::Dependence &Dep : *Deps) {
+ if (Dep.getDestination(DepChecker) == EarlyExitLoad ||
+ Dep.getSource(DepChecker) == EarlyExitLoad) {
+ // Refine language a little? This currently only applies when a store
+ // is present in the early exit loop.
+ reportVectorizationFailure(
+ "No dependencies allowed for early exit condition load",
+ "Early exit condition loads may not have a dependence with "
+ "another"
+ " memory operation.",
+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+ return false;
+ }
+ }
+ }
+ } else {
+ if (!isConditionCopyRequired())
+ return canVectorizeIndirectUnsafeDependences();
+ reportVectorizationFailure(
+ "Cannot vectorize unsafe dependencies in state-changing early exit "
+ "loop.",
+ "Unable to vectorize memory in an early exit loop with store",
+ "CantVectorizeUnsafeDependencyForEELoopWithStore", ORE, TheLoop);
+ return false;
+ }
if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
reportVectorizationFailure("We don't allow storing to uniform addresses",
@@ -1747,16 +1782,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
}
};
+ bool HasStore = false;
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ HasStore = true;
+ if (SI->isSimple())
+ continue;
+
+ reportVectorizationFailure(
+ "Complex writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with complex writes to memory",
+ "WritesInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
if (I.mayWriteToMemory()) {
// We don't support writes to memory.
reportVectorizationFailure(
- "Writes to memory unsupported in early exit loops",
- "Cannot vectorize early exit loop with writes to memory",
+ "Complex writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with complex writes to memory",
"WritesInEarlyExitLoop", ORE, TheLoop);
return false;
- } else if (!IsSafeOperation(&I)) {
+ }
+
+ if (!IsSafeOperation(&I)) {
reportVectorizationFailure("Early exit loop contains operations that "
"cannot be speculatively executed",
"UnsafeOperationsEarlyExitLoop", ORE,
@@ -1771,13 +1821,65 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
// TODO: Handle loops that may fault.
Predicates.clear();
- if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
- &Predicates)) {
- reportVectorizationFailure(
- "Loop may fault",
- "Cannot vectorize potentially faulting early exit loop",
- "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
- return false;
+ if (HasStore) {
+ // Record load for analysis by isDereferenceableAndAlignedInLoop
+ // and later by dependence analysis.
+ if (BranchInst *Br = dyn_cast<BranchInst>(
+ SingleUncountableEdge->first->getTerminator())) {
+ // FIXME: Handle exit conditions with multiple users, more complex exit
+ // conditions than br(icmp(load, loop_inv)).
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+ if (Cmp && Cmp->hasOneUse() &&
+ TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+ LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+ if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
+ if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(),
+ *DT, AC, &Predicates)) {
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(TheLoop);
+ // FIXME: We may have multiple levels of conditional loads, so will
+ // need to improve on outright rejection at some point.
+ if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
+ EarlyExitLoad = Load;
+ } else {
+ reportVectorizationFailure(
+ "Early exit condition load not guaranteed to execute",
+ "Cannot vectorize early exit loop when condition load is not "
+ "guaranteed to execute",
+ "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+ }
+ } else {
+ reportVectorizationFailure(
+ "Uncounted loop condition not known safe",
+ "Cannot vectorize early exit loop with "
+ "possibly unsafe condition load",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+ }
+ }
+ }
+
+ if (!EarlyExitLoad) {
+ reportVectorizationFailure(
+ "Early exit loop with store but no condition load",
+ "Cannot vectorize early exit loop with store but no condition load",
+ "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+ } else {
+ // Read-only loop.
+ // FIXME: as with the loops with stores, only the loads contributing to
+ // the loop condition need to be guaranteed dereferenceable and
+ // aligned.
+ if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
+ &Predicates)) {
+ reportVectorizationFailure(
+ "Loop may fault",
+ "Cannot vectorize potentially faulting early exit loop",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
}
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
@@ -1861,7 +1963,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
} else {
if (!isVectorizableEarlyExitLoop()) {
- UncountableEdge = std::nullopt;
+ clearEarlyExitData();
if (DoExtraAnalysis)
Result = false;
else
@@ -1879,6 +1981,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
}
+ // Bail out for state-changing EE loops for now.
+ if (EarlyExitLoad) {
+ reportVectorizationFailure(
+ "Writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with writes to memory",
+ "WritesInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
if (Result) {
LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
<< (LAI->getRuntimePointerChecking()->Need
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 3a8aec34dfe43..2578260fe878d 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
; return 0;
; }
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with possibly unsafe condition load
; CHECK: remark: source.cpp:5:9: loop not vectorized
; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 7c80dad006952..6724c703e4940 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -3,7 +3,7 @@
define i64 @loop_contains_store(ptr %dest) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops
+; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
entry:
%p1 = alloca [1024 x i8]
call void @init_mem(ptr %p1, i64 1024)
@@ -56,7 +56,7 @@ exit:
define void @loop_contains_store_ee_condition_is_invariant(ptr dereferenceable(40) noalias %array, i16 %ee.val) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_ee_condition_is_invariant'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
entry:
br label %for.body
@@ -80,7 +80,7 @@ exit:
define void @loop_contains_store_fcmp_condition(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp_condition'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
entry:
br label %for.body
@@ -106,7 +106,7 @@ exit:
define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: No dependencies allowed for early exit condition load.
entry:
%pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8
br label %for.body
@@ -135,7 +135,7 @@ exit:
define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe.
entry:
%unknown.offset = call i64 @get_an_unknown_offset()
%unknown.cmp = icmp ult i64 %unknown.offset, 20
@@ -149,10 +149,10 @@ for.body:
%data = load i16, ptr %st.addr, align 2
%inc = add nsw i16 %data, 1
store i16 %inc, ptr %st.addr, align 2
- %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
%ee.val = load i16, ptr %ee.addr, align 2
%ee.cond = icmp sgt i16 %ee.val, 500
- %some.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
+ %some.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
store i16 42, ptr %some.addr, align 2
br i1 %ee.cond, label %exit, label %for.inc
@@ -194,7 +194,7 @@ exit:
define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe.
entry:
br label %for.body
@@ -220,7 +220,7 @@ exit:
define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Complex writes to memory unsupported in early exit loops.
entry:
br label %for.body
@@ -324,7 +324,7 @@ exit:
define void @loop_contains_store_condition_load_is_chained(ptr dereferenceable(40) noalias %array, ptr align 8 dereferenceable(160) readonly %offsets, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_is_chained'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe.
entry:
br label %for.body
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Separating out the next piece of EE-with-stores