llvm · nikic · Aug 1, 2025 · Jul 31, 2025
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -79,68 +79,41 @@
 // ld.global.f32   %f4, [%rl6+132]; // much better
 //
 // Another improvement enabled by the LowerGEP flag is to lower a GEP with
-// multiple indices to either multiple GEPs with a single index or arithmetic
-// operations (depending on whether the target uses alias analysis in codegen).
+// multiple indices to multiple GEPs with a single index.
 // Such transformation can have following benefits:
 // (1) It can always extract constants in the indices of structure type.
 // (2) After such Lowering, there are more optimization opportunities such as
 //     CSE, LICM and CGP.
 //
 // E.g. The following GEPs have multiple indices:
 //  BB1:
-//    %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
+//    %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3
 //    load %p
 //    ...
 //  BB2:
-//    %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
+//    %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2
 //    load %p2
 //    ...
 //
 // We can not do CSE to the common part related to index "i64 %i". Lowering
 // GEPs can achieve such goals.
-// If the target does not use alias analysis in codegen, this pass will
-// lower a GEP with multiple indices into arithmetic operations:
-//  BB1:
-//    %1 = ptrtoint [10 x %struct]* %ptr to i64    ; CSE opportunity
-//    %2 = mul i64 %i, length_of_10xstruct         ; CSE opportunity
-//    %3 = add i64 %1, %2                          ; CSE opportunity
-//    %4 = mul i64 %j1, length_of_struct
-//    %5 = add i64 %3, %4
-//    %6 = add i64 %3, struct_field_3              ; Constant offset
-//    %p = inttoptr i64 %6 to i32*
-//    load %p
-//    ...
-//  BB2:
-//    %7 = ptrtoint [10 x %struct]* %ptr to i64    ; CSE opportunity
-//    %8 = mul i64 %i, length_of_10xstruct         ; CSE opportunity
-//    %9 = add i64 %7, %8                          ; CSE opportunity
-//    %10 = mul i64 %j2, length_of_struct
-//    %11 = add i64 %9, %10
-//    %12 = add i64 %11, struct_field_2            ; Constant offset
-//    %p = inttoptr i64 %12 to i32*
-//    load %p2
-//    ...
 //
-// If the target uses alias analysis in codegen, this pass will lower a GEP
-// with multiple indices into multiple GEPs with a single index:
+// This pass will lower a GEP with multiple indices into multiple GEPs with a
+// single index:
 //  BB1:
-//    %1 = bitcast [10 x %struct]* %ptr to i8*     ; CSE opportunity
-//    %2 = mul i64 %i, length_of_10xstruct         ; CSE opportunity
-//    %3 = getelementptr i8* %1, i64 %2            ; CSE opportunity
+//    %2 = mul i64 %i, length_of_10xstruct          ; CSE opportunity
+//    %3 = getelementptr i8, ptr %ptr, i64 %2       ; CSE opportunity
 //    %4 = mul i64 %j1, length_of_struct
-//    %5 = getelementptr i8* %3, i64 %4
-//    %6 = getelementptr i8* %5, struct_field_3    ; Constant offset
-//    %p = bitcast i8* %6 to i32*
+//    %5 = getelementptr i8, ptr %3, i64 %4
+//    %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset
 //    load %p
 //    ...
 //  BB2:
-//    %7 = bitcast [10 x %struct]* %ptr to i8*     ; CSE opportunity
-//    %8 = mul i64 %i, length_of_10xstruct         ; CSE opportunity
-//    %9 = getelementptr i8* %7, i64 %8            ; CSE opportunity
+//    %8 = mul i64 %i, length_of_10xstruct            ; CSE opportunity
+//    %9 = getelementptr i8, ptr %ptr, i64 %8         ; CSE opportunity
 //    %10 = mul i64 %j2, length_of_struct
-//    %11 = getelementptr i8* %9, i64 %10
-//    %12 = getelementptr i8* %11, struct_field_2  ; Constant offset
-//    %p2 = bitcast i8* %12 to i32*
+//    %11 = getelementptr i8, ptr %9, i64 %10
+//    %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset
 //    load %p2
 //    ...
 //
@@ -408,16 +381,6 @@ class SeparateConstOffsetFromGEP {
   void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
                               int64_t AccumulativeByteOffset);
 
-  /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
-  /// Function splitGEP already split the original GEP into a variadic part and
-  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
-  /// variadic part into a set of arithmetic operations and applies
-  /// AccumulativeByteOffset to it.
-  /// \p Variadic                  The variadic part of the original GEP.
-  /// \p AccumulativeByteOffset    The constant offset.
-  void lowerToArithmetics(GetElementPtrInst *Variadic,
-                          int64_t AccumulativeByteOffset);
-
   /// Finds the constant offset within each index and accumulates them. If
   /// LowerGEP is true, it finds in indices of both sequential and structure
   /// types, otherwise it only finds in sequential indices. The output
@@ -951,55 +914,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
   Variadic->eraseFromParent();
 }
 
-void
-SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
-                                               int64_t AccumulativeByteOffset) {
-  IRBuilder<> Builder(Variadic);
-  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
-  assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
-         "Pointer type must match index type for arithmetic-based lowering of "
-         "split GEPs");
-
-  Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
-  gep_type_iterator GTI = gep_type_begin(*Variadic);
-  // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
-  // don't create arithmetics for structure indices, as they are accumulated
-  // in the constant offset index.
-  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
-    if (GTI.isSequential()) {
-      Value *Idx = Variadic->getOperand(I);
-      // Skip zero indices.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
-        if (CI->isZero())
-          continue;
-
-      APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
-                                GTI.getSequentialElementStride(*DL));
-      // Scale the index by element size.
-      if (ElementSize != 1) {
-        if (ElementSize.isPowerOf2()) {
-          Idx = Builder.CreateShl(
-              Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
-        } else {
-          Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
-        }
-      }
-      // Create an ADD for each index.
-      ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
-    }
-  }
-
-  // Create an ADD for the constant offset index.
-  if (AccumulativeByteOffset != 0) {
-    ResultPtr = Builder.CreateAdd(
-        ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
-  }
-
-  ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
-  Variadic->replaceAllUsesWith(ResultPtr);
-  Variadic->eraseFromParent();
-}
-
 bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
                                             TargetTransformInfo &TTI) {
   auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
@@ -1091,8 +1005,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // Notice that we don't remove struct field indices here. If LowerGEP is
   // disabled, a structure index is not accumulated and we still use the old
   // one. If LowerGEP is enabled, a structure index is accumulated in the
-  // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
-  // handle the constant offset and won't need a new structure index.
+  // constant offset. LowerToSingleIndexGEPs will later handle the constant
+  // offset and won't need a new structure index.
   gep_type_iterator GTI = gep_type_begin(*GEP);
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
     if (GTI.isSequential()) {
@@ -1167,22 +1081,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
 
   GEP->setNoWrapFlags(NewGEPFlags);
 
-  // Lowers a GEP to either GEPs with a single index or arithmetic operations.
+  // Lowers a GEP to GEPs with a single index.
   if (LowerGEP) {
-    // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
-    // arithmetic operations if the target uses alias analysis in codegen.
-    // Additionally, pointers that aren't integral (and so can't be safely
-    // converted to integers) or those whose offset size is different from their
-    // pointer size (which means that doing integer arithmetic on them could
-    // affect that data) can't be lowered in this way.
-    unsigned AddrSpace = GEP->getPointerAddressSpace();
-    bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
-                               DL->getIndexSizeInBits(AddrSpace);
-    if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
-        PointerHasExtraData)
-      lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
-    else
-      lowerToArithmetics(GEP, AccumulativeByteOffset);
+    lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
     return true;
   }
 

diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -O3 -aarch64-enable-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-NoAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64"
@@ -38,24 +38,12 @@ if.end:                                           ; preds = %if.then, %entry
 ; CHECK-NOT: madd
 ; CHECK:ldr
 
-; CHECK-NoAA-LABEL: @test_GEP_CSE(
-; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint ptr %string to i64
-; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
-; CHECK-NoAA: add i64 [[PTR2]], 23052
-; CHECK-NoAA: inttoptr
-; CHECK-NoAA: if.then:
-; CHECK-NoAA-NOT: ptrtoint
-; CHECK-NoAA-NOT: mul
-; CHECK-NoAA: add i64 [[PTR2]], 23048
-; CHECK-NoAA: inttoptr
-
-; CHECK-UseAA-LABEL: @test_GEP_CSE(
-; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
-; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23052
-; CHECK-UseAA: if.then:
-; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23048
+; CHECK-IR-LABEL: @test_GEP_CSE(
+; CHECK-IR: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
+; CHECK-IR: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
+; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23052
+; CHECK-IR: if.then:
+; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23048
 
 %class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
 %struct.pt = type { ptr, i32, i32 }

diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll
@@ -47,10 +47,8 @@ define void @testDisjointOrSplits(ptr %p) {
 ; CHECK-LABEL: define void @testDisjointOrSplits(
 ; CHECK-SAME: ptr [[P:%.*]]) {
 ; CHECK-NEXT:    [[VAR:%.*]] = tail call i64 @foo()
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], 10
-; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[VAR]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 10
 ; CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
 ; CHECK-NEXT:    ret void
 ;

diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll
@@ -31,11 +31,9 @@ define void @test_A_sub_B_add_ConstantInt(ptr %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[REM]] to i64
 ; CHECK-NEXT:    [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[SUB22]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 2044
-; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2044
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP4]]
 ; CHECK-NEXT:    store float 1.000000e+00, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    br label [[COND_END]]
 ; CHECK:       cond.end: