Skip to content

[LV] Add initial legality checks for ee loops with stores #145663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

huntergr-arm
Copy link
Collaborator

Separating out the next piece of EE-with-stores

@llvmbot
Copy link
Member

llvmbot commented Jun 25, 2025

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Graham Hunter (huntergr-arm)

Changes

Separating out the next piece of EE-with-stores


Full diff: https://github.com/llvm/llvm-project/pull/145663.diff

4 Files Affected:

  • (modified) llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h (+18)
  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+124-13)
  • (modified) llvm/test/Transforms/LoopVectorize/control-flow.ll (+1-1)
  • (modified) llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll (+10-10)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..aa34daf596b6b 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -407,6 +407,14 @@ class LoopVectorizationLegality {
     return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
   }
 
+  /// Returns true if this is an early exit loop containing a store.
+  bool isConditionCopyRequired() const { return EarlyExitLoad.has_value(); }
+
+  /// Returns the load instruction, if any, directly used for an exit comparison
+  /// in and early exit loop containing state-changing or potentially-faulting
+  /// operations.
+  std::optional<LoadInst *> getEarlyExitLoad() const { return EarlyExitLoad; }
+
   /// Return true if there is store-load forwarding dependencies.
   bool isSafeForAnyStoreLoadForwardDistances() const {
     return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
@@ -536,6 +544,12 @@ class LoopVectorizationLegality {
   /// additional cases safely.
   bool isVectorizableEarlyExitLoop();
 
+  /// Clears any current early exit data gathered if a check failed.
+  void clearEarlyExitData() {
+    UncountableEdge = std::nullopt;
+    EarlyExitLoad = std::nullopt;
+  }
+
   /// Return true if all of the instructions in the block can be speculatively
   /// executed, and record the loads/stores that require masking.
   /// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -654,6 +668,10 @@ class LoopVectorizationLegality {
   /// Keep track of the loop edge to an uncountable exit, comprising a pair
   /// of (Exiting, Exit) blocks, if there is exactly one early exit.
   std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
+
+  /// Keep track of the load used for early exits where state-changing or
+  /// potentially faulting operations occur inside the loop.
+  std::optional<LoadInst *> EarlyExitLoad;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 969d225c6ef2e..eec660e5958ca 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1207,8 +1208,42 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     });
   }
 
-  if (!LAI->canVectorizeMemory())
-    return canVectorizeIndirectUnsafeDependences();
+  if (LAI->canVectorizeMemory()) {
+    // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
+    //        since we're (potentially) doing the load out of its normal order
+    //        in the loop and that may throw off dependency checking.
+    //        A forward dependency should be fine, but a backwards dep may not
+    //        be even if LAA thinks it is due to performing the load for the
+    //        vector iteration i+1 in vector iteration i.
+    if (isConditionCopyRequired()) {
+      const MemoryDepChecker &DepChecker = LAI->getDepChecker();
+      const auto *Deps = DepChecker.getDependences();
+
+      for (const MemoryDepChecker::Dependence &Dep : *Deps) {
+        if (Dep.getDestination(DepChecker) == EarlyExitLoad ||
+            Dep.getSource(DepChecker) == EarlyExitLoad) {
+          // Refine language a little? This currently only applies when a store
+          // is present in the early exit loop.
+          reportVectorizationFailure(
+              "No dependencies allowed for early exit condition load",
+              "Early exit condition loads may not have a dependence with "
+              "another"
+              " memory operation.",
+              "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+          return false;
+        }
+      }
+    }
+  } else {
+    if (!isConditionCopyRequired())
+      return canVectorizeIndirectUnsafeDependences();
+    reportVectorizationFailure(
+        "Cannot vectorize unsafe dependencies in state-changing early exit "
+        "loop.",
+        "Unable to vectorize memory in an early exit loop with store",
+        "CantVectorizeUnsafeDependencyForEELoopWithStore", ORE, TheLoop);
+    return false;
+  }
 
   if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
     reportVectorizationFailure("We don't allow storing to uniform addresses",
@@ -1747,16 +1782,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
     }
   };
 
+  bool HasStore = false;
   for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+        HasStore = true;
+        if (SI->isSimple())
+          continue;
+
+        reportVectorizationFailure(
+            "Complex writes to memory unsupported in early exit loops",
+            "Cannot vectorize early exit loop with complex writes to memory",
+            "WritesInEarlyExitLoop", ORE, TheLoop);
+        return false;
+      }
+
       if (I.mayWriteToMemory()) {
         // We don't support writes to memory.
         reportVectorizationFailure(
-            "Writes to memory unsupported in early exit loops",
-            "Cannot vectorize early exit loop with writes to memory",
+            "Complex writes to memory unsupported in early exit loops",
+            "Cannot vectorize early exit loop with complex writes to memory",
             "WritesInEarlyExitLoop", ORE, TheLoop);
         return false;
-      } else if (!IsSafeOperation(&I)) {
+      }
+
+      if (!IsSafeOperation(&I)) {
         reportVectorizationFailure("Early exit loop contains operations that "
                                    "cannot be speculatively executed",
                                    "UnsafeOperationsEarlyExitLoop", ORE,
@@ -1771,13 +1821,65 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
 
   // TODO: Handle loops that may fault.
   Predicates.clear();
-  if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
-                                     &Predicates)) {
-    reportVectorizationFailure(
-        "Loop may fault",
-        "Cannot vectorize potentially faulting early exit loop",
-        "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
-    return false;
+  if (HasStore) {
+    // Record load for analysis by isDereferenceableAndAlignedInLoop
+    // and later by dependence analysis.
+    if (BranchInst *Br = dyn_cast<BranchInst>(
+            SingleUncountableEdge->first->getTerminator())) {
+      // FIXME: Handle exit conditions with multiple users, more complex exit
+      //        conditions than br(icmp(load, loop_inv)).
+      ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+      if (Cmp && Cmp->hasOneUse() &&
+          TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+        LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+        if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
+          if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(),
+                                                *DT, AC, &Predicates)) {
+            ICFLoopSafetyInfo SafetyInfo;
+            SafetyInfo.computeLoopSafetyInfo(TheLoop);
+            // FIXME: We may have multiple levels of conditional loads, so will
+            //        need to improve on outright rejection at some point.
+            if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
+              EarlyExitLoad = Load;
+            } else {
+              reportVectorizationFailure(
+                  "Early exit condition load not guaranteed to execute",
+                  "Cannot vectorize early exit loop when condition load is not "
+                  "guaranteed to execute",
+                  "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+            }
+          } else {
+            reportVectorizationFailure(
+                "Uncounted loop condition not known safe",
+                "Cannot vectorize early exit loop with "
+                "possibly unsafe condition load",
+                "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+            return false;
+          }
+        }
+      }
+    }
+
+    if (!EarlyExitLoad) {
+      reportVectorizationFailure(
+          "Early exit loop with store but no condition load",
+          "Cannot vectorize early exit loop with store but no condition load",
+          "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
+      return false;
+    }
+  } else {
+    // Read-only loop.
+    // FIXME: as with the loops with stores, only the loads contributing to
+    //        the loop condition need to be guaranteed dereferenceable and
+    //        aligned.
+    if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
+                                       &Predicates)) {
+      reportVectorizationFailure(
+          "Loop may fault",
+          "Cannot vectorize potentially faulting early exit loop",
+          "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+      return false;
+    }
   }
 
   [[maybe_unused]] const SCEV *SymbolicMaxBTC =
@@ -1861,7 +1963,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
         return false;
     } else {
       if (!isVectorizableEarlyExitLoop()) {
-        UncountableEdge = std::nullopt;
+        clearEarlyExitData();
         if (DoExtraAnalysis)
           Result = false;
         else
@@ -1879,6 +1981,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
+  // Bail out for state-changing EE loops for now.
+  if (EarlyExitLoad) {
+    reportVectorizationFailure(
+        "Writes to memory unsupported in early exit loops",
+        "Cannot vectorize early exit loop with writes to memory",
+        "WritesInEarlyExitLoop", ORE, TheLoop);
+    return false;
+  }
+
   if (Result) {
     LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
                       << (LAI->getRuntimePointerChecking()->Need
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 3a8aec34dfe43..2578260fe878d 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
 ;   return 0;
 ; }
 
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with possibly unsafe condition load
 ; CHECK: remark: source.cpp:5:9: loop not vectorized
 
 ; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 7c80dad006952..6724c703e4940 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -3,7 +3,7 @@
 
 define i64 @loop_contains_store(ptr %dest) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
 entry:
   %p1 = alloca [1024 x i8]
   call void @init_mem(ptr %p1, i64 1024)
@@ -56,7 +56,7 @@ exit:
 
 define void @loop_contains_store_ee_condition_is_invariant(ptr dereferenceable(40) noalias %array, i16 %ee.val) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_ee_condition_is_invariant'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
 entry:
   br label %for.body
 
@@ -80,7 +80,7 @@ exit:
 
 define void @loop_contains_store_fcmp_condition(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp_condition'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
 entry:
   br label %for.body
 
@@ -106,7 +106,7 @@ exit:
 
 define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: No dependencies allowed for early exit condition load.
 entry:
   %pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8
   br label %for.body
@@ -135,7 +135,7 @@ exit:
 
 define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
 entry:
   %unknown.offset = call i64 @get_an_unknown_offset()
   %unknown.cmp = icmp ult i64 %unknown.offset, 20
@@ -149,10 +149,10 @@ for.body:
   %data = load i16, ptr %st.addr, align 2
   %inc = add nsw i16 %data, 1
   store i16 %inc, ptr %st.addr, align 2
-  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
   %ee.val = load i16, ptr %ee.addr, align 2
   %ee.cond = icmp sgt i16 %ee.val, 500
-  %some.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
+  %some.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
   store i16 42, ptr %some.addr, align 2
   br i1 %ee.cond, label %exit, label %for.inc
 
@@ -194,7 +194,7 @@ exit:
 
 define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
 entry:
   br label %for.body
 
@@ -220,7 +220,7 @@ exit:
 
 define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Complex writes to memory unsupported in early exit loops.
 entry:
   br label %for.body
 
@@ -324,7 +324,7 @@ exit:
 
 define void @loop_contains_store_condition_load_is_chained(ptr dereferenceable(40) noalias %array, ptr align 8 dereferenceable(160) readonly %offsets, ptr align 2 dereferenceable(40) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_is_chained'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
 entry:
   br label %for.body
 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants