Skip to content

[InstCombine] Fold icmp of gep chains #146714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

nikic
Copy link
Contributor

@nikic nikic commented Jul 2, 2025

This extends #144065 to the general case of an icmp between two GEP chains that have a common base.

@nikic nikic requested a review from dtcxzyw July 2, 2025 14:58
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jul 2, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 2, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Nikita Popov (nikic)

Changes

This extends #144065 to the general case of an icmp between two GEP chains that have a common base.


Full diff: https://github.com/llvm/llvm-project/pull/146714.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+11-13)
  • (modified) llvm/test/Transforms/InstCombine/icmp-gep.ll (+148)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6737b50405ee2..1b13a11e4652f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -752,8 +752,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
 
     // If the base pointers are different, but the indices are the same, just
     // compare the base pointer.
-    Value *PtrBase = GEPLHS->getOperand(0);
-    if (PtrBase != GEPRHS->getOperand(0)) {
+    if (GEPLHS->getOperand(0) != GEPRHS->getOperand(0)) {
       bool IndicesTheSame =
           GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
           GEPLHS->getPointerOperand()->getType() ==
@@ -779,7 +778,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
       if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
           (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
           (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
-          PtrBase->stripPointerCasts() ==
+          GEPLHS->getOperand(0)->stripPointerCasts() ==
               GEPRHS->getOperand(0)->stripPointerCasts() &&
           !GEPLHS->getType()->isVectorTy()) {
         Value *LOffset = EmitGEPOffset(GEPLHS);
@@ -802,14 +801,10 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                                         LOffset, ROffset);
         return replaceInstUsesWith(I, Cmp);
       }
-
-      // Otherwise, the base pointers are different and the indices are
-      // different. Try convert this to an indexed compare by looking through
-      // PHIs/casts.
-      return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
     }
 
-    if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
+    if (GEPLHS->getOperand(0) == GEPRHS->getOperand(0) &&
+        GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
         GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) {
       // If the GEPs only differ by one index, compare it.
       unsigned NumDifferences = 0; // Keep track of # differences.
@@ -846,11 +841,14 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
       }
     }
 
-    if (CanFold(NW)) {
+    if (Base.Ptr && CanFold(Base.LHSNW & Base.RHSNW)) {
       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
-      Value *L = EmitGEPOffset(GEPLHS, /*RewriteGEP=*/true);
-      Value *R = EmitGEPOffset(GEPRHS, /*RewriteGEP=*/true);
-      return NewICmp(NW, L, R);
+      Type *IdxTy = DL.getIndexType(GEPLHS->getType());
+      Value *L =
+          EmitGEPOffsets(Base.LHSGEPs, Base.LHSNW, IdxTy, /*RewriteGEP=*/true);
+      Value *R =
+          EmitGEPOffsets(Base.RHSGEPs, Base.RHSNW, IdxTy, /*RewriteGEP=*/true);
+      return NewICmp(Base.LHSNW & Base.RHSNW, L, R);
     }
   }
 
diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll
index 3f104056fb1f2..6fd1eb112b450 100644
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -849,3 +849,151 @@ define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) {
   %cmp = icmp ugt ptr %gep2, %base
   ret i1 %cmp
 }
+
+define i1 @gep_gep_multiple_eq(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_gep_multiple_eq(
+; CHECK-NEXT:    [[GEP1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]]
+; CHECK-NEXT:    [[GEP3_IDX2:%.*]] = add i64 [[IDX3:%.*]], [[IDX4:%.*]]
+; CHECK-NEXT:    [[CMP_UNSHIFTED:%.*]] = xor i64 [[GEP1_IDX1]], [[GEP3_IDX2]]
+; CHECK-NEXT:    [[CMP_MASK:%.*]] = and i64 [[CMP_UNSHIFTED]], 4611686018427387903
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[CMP_MASK]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr i32, ptr %base, i64 %idx1
+  %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+  %gep3 = getelementptr i32, ptr %base, i64 %idx3
+  %gep4 = getelementptr i32, ptr %gep3, i64 %idx4
+  %cmp = icmp eq ptr %gep2, %gep4
+  ret i1 %cmp
+}
+
+define i1 @gep_gep_multiple_eq_nuw(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_gep_multiple_eq_nuw(
+; CHECK-NEXT:    [[GEP1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]]
+; CHECK-NEXT:    [[GEP3_IDX2:%.*]] = add i64 [[IDX3:%.*]], [[IDX4:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[GEP1_IDX1]], [[GEP3_IDX2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr nuw i32, ptr %base, i64 %idx1
+  %gep2 = getelementptr nuw i32, ptr %gep1, i64 %idx2
+  %gep3 = getelementptr nuw i32, ptr %base, i64 %idx3
+  %gep4 = getelementptr nuw i32, ptr %gep3, i64 %idx4
+  %cmp = icmp eq ptr %gep2, %gep4
+  ret i1 %cmp
+}
+
+define i1 @gep_gep_multiple_eq_nuw_different_scales(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_gep_multiple_eq_nuw_different_scales(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i64 [[IDX1:%.*]], 2
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = shl nuw i64 [[IDX2:%.*]], 3
+; CHECK-NEXT:    [[GEP3_IDX:%.*]] = shl nuw i64 [[IDX3:%.*]], 2
+; CHECK-NEXT:    [[GEP4_IDX:%.*]] = shl nuw i64 [[IDX4:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[GEP3_IDX]], [[GEP4_IDX]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr nuw i32, ptr %base, i64 %idx1
+  %gep2 = getelementptr nuw i64, ptr %gep1, i64 %idx2
+  %gep3 = getelementptr nuw i32, ptr %base, i64 %idx3
+  %gep4 = getelementptr nuw i64, ptr %gep3, i64 %idx4
+  %cmp = icmp eq ptr %gep2, %gep4
+  ret i1 %cmp
+}
+
+define i1 @gep_gep_multiple_eq_partial_nuw_different_scales(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_gep_multiple_eq_partial_nuw_different_scales(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i64 [[IDX1:%.*]], 2
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = shl nuw i64 [[IDX2:%.*]], 3
+; CHECK-NEXT:    [[GEP3_IDX:%.*]] = shl nuw i64 [[IDX3:%.*]], 2
+; CHECK-NEXT:    [[GEP4_IDX:%.*]] = shl i64 [[IDX4:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[GEP3_IDX]], [[GEP4_IDX]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr nuw i32, ptr %base, i64 %idx1
+  %gep2 = getelementptr nuw i64, ptr %gep1, i64 %idx2
+  %gep3 = getelementptr nuw i32, ptr %base, i64 %idx3
+  %gep4 = getelementptr i64, ptr %gep3, i64 %idx4
+  %cmp = icmp eq ptr %gep2, %gep4
+  ret i1 %cmp
+}
+
+define i1 @gep_gep_multiple_eq_partial_inbounds_different_scales(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_gep_multiple_eq_partial_inbounds_different_scales(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nsw i64 [[IDX1:%.*]], 2
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 3
+; CHECK-NEXT:    [[GEP3_IDX:%.*]] = shl nsw i64 [[IDX3:%.*]], 2
+; CHECK-NEXT:    [[GEP4_IDX:%.*]] = shl i64 [[IDX4:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[GEP3_IDX]], [[GEP4_IDX]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr inbounds i32, ptr %base, i64 %idx1
+  %gep2 = getelementptr inbounds i64, ptr %gep1, i64 %idx2
+  %gep3 = getelementptr inbounds i32, ptr %base, i64 %idx3
+  %gep4 = getelementptr i64, ptr %gep3, i64 %idx4
+  %cmp = icmp eq ptr %gep2, %gep4
+  ret i1 %cmp
+}
+
+define i1 @gep_gep_multiple_ult_nuw(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_gep_multiple_ult_nuw(
+; CHECK-NEXT:    [[GEP1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]]
+; CHECK-NEXT:    [[GEP3_IDX2:%.*]] = add i64 [[IDX3:%.*]], [[IDX4:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[GEP1_IDX1]], [[GEP3_IDX2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr nuw i32, ptr %base, i64 %idx1
+  %gep2 = getelementptr nuw i32, ptr %gep1, i64 %idx2
+  %gep3 = getelementptr nuw i32, ptr %base, i64 %idx3
+  %gep4 = getelementptr nuw i32, ptr %gep3, i64 %idx4
+  %cmp = icmp ult ptr %gep2, %gep4
+  ret i1 %cmp
+}
+
+define i1 @gep_gep_multiple_ult_missing_nuw(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_gep_multiple_ult_missing_nuw(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr nuw i32, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr nuw i32, ptr [[GEP1]], i64 [[IDX2:%.*]]
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr nuw i32, ptr [[BASE]], i64 [[IDX3:%.*]]
+; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX4:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult ptr [[GEP2]], [[GEP4]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr nuw i32, ptr %base, i64 %idx1
+  %gep2 = getelementptr nuw i32, ptr %gep1, i64 %idx2
+  %gep3 = getelementptr nuw i32, ptr %base, i64 %idx3
+  %gep4 = getelementptr i32, ptr %gep3, i64 %idx4
+  %cmp = icmp ult ptr %gep2, %gep4
+  ret i1 %cmp
+}
+
+define i1 @gep_gep_multiple_ult_nuw_multi_use(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_gep_multiple_ult_nuw_multi_use(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i64 [[IDX1:%.*]], 2
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr nuw i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = shl nuw i64 [[IDX2:%.*]], 2
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr nuw i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
+; CHECK-NEXT:    [[GEP3_IDX:%.*]] = shl nuw i64 [[IDX3:%.*]], 2
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 [[GEP3_IDX]]
+; CHECK-NEXT:    [[GEP4_IDX:%.*]] = shl nuw i64 [[IDX4:%.*]], 2
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr nuw i8, ptr [[GEP3]], i64 [[GEP4_IDX]]
+; CHECK-NEXT:    call void @use(ptr [[GEP2]])
+; CHECK-NEXT:    call void @use(ptr [[GEP5]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[GEP3_IDX]], [[GEP4_IDX]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr nuw i32, ptr %base, i64 %idx1
+  %gep2 = getelementptr nuw i32, ptr %gep1, i64 %idx2
+  %gep3 = getelementptr nuw i32, ptr %base, i64 %idx3
+  %gep4 = getelementptr nuw i32, ptr %gep3, i64 %idx4
+  call void @use(ptr %gep2)
+  call void @use(ptr %gep4)
+  %cmp = icmp ult ptr %gep2, %gep4
+  ret i1 %cmp
+}

@dtcxzyw
Copy link
Member

dtcxzyw commented Jul 3, 2025

We hit a tricky regression when the code contains both pointer comparison and difference sharing the same operands.
See https://github.com/facebook/zstd/blob/1dbc2e09084e843f6c0dcc2d0791610015c50979/lib/decompress/zstd_decompress.c#L1526-L1528 and dtcxzyw/llvm-opt-benchmark@1b6d543#r2183037608

It is hard to reassociate operands and reuse the offset...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants