diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 320b79203c0b3..6ffe8413a5079 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -79,8 +79,7 @@ // ld.global.f32 %f4, [%rl6+132]; // much better // // Another improvement enabled by the LowerGEP flag is to lower a GEP with -// multiple indices to either multiple GEPs with a single index or arithmetic -// operations (depending on whether the target uses alias analysis in codegen). +// multiple indices to multiple GEPs with a single index. // Such transformation can have following benefits: // (1) It can always extract constants in the indices of structure type. // (2) After such Lowering, there are more optimization opportunities such as @@ -88,59 +87,33 @@ // // E.g. The following GEPs have multiple indices: // BB1: -// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3 +// %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3 // load %p // ... // BB2: -// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2 +// %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2 // load %p2 // ... // // We can not do CSE to the common part related to index "i64 %i". Lowering // GEPs can achieve such goals. -// If the target does not use alias analysis in codegen, this pass will -// lower a GEP with multiple indices into arithmetic operations: -// BB1: -// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity -// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity -// %3 = add i64 %1, %2 ; CSE opportunity -// %4 = mul i64 %j1, length_of_struct -// %5 = add i64 %3, %4 -// %6 = add i64 %3, struct_field_3 ; Constant offset -// %p = inttoptr i64 %6 to i32* -// load %p -// ... -// BB2: -// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity -// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity -// %9 = add i64 %7, %8 ; CSE opportunity -// %10 = mul i64 %j2, length_of_struct -// %11 = add i64 %9, %10 -// %12 = add i64 %11, struct_field_2 ; Constant offset -// %p = inttoptr i64 %12 to i32* -// load %p2 -// ... // -// If the target uses alias analysis in codegen, this pass will lower a GEP -// with multiple indices into multiple GEPs with a single index: +// This pass will lower a GEP with multiple indices into multiple GEPs with a +// single index: // BB1: -// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity -// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity -// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity +// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity +// %3 = getelementptr i8, ptr %ptr, i64 %2 ; CSE opportunity // %4 = mul i64 %j1, length_of_struct -// %5 = getelementptr i8* %3, i64 %4 -// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset -// %p = bitcast i8* %6 to i32* +// %5 = getelementptr i8, ptr %3, i64 %4 +// %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset // load %p // ... // BB2: -// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity -// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity -// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity +// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity +// %9 = getelementptr i8, ptr %ptr, i64 %8 ; CSE opportunity // %10 = mul i64 %j2, length_of_struct -// %11 = getelementptr i8* %9, i64 %10 -// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset -// %p2 = bitcast i8* %12 to i32* +// %11 = getelementptr i8, ptr %9, i64 %10 +// %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset // load %p2 // ... // @@ -408,16 +381,6 @@ class SeparateConstOffsetFromGEP { void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset); - /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form. - /// Function splitGEP already split the original GEP into a variadic part and - /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the - /// variadic part into a set of arithmetic operations and applies - /// AccumulativeByteOffset to it. - /// \p Variadic The variadic part of the original GEP. - /// \p AccumulativeByteOffset The constant offset. - void lowerToArithmetics(GetElementPtrInst *Variadic, - int64_t AccumulativeByteOffset); - /// Finds the constant offset within each index and accumulates them. If /// LowerGEP is true, it finds in indices of both sequential and structure /// types, otherwise it only finds in sequential indices. The output @@ -951,55 +914,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( Variadic->eraseFromParent(); } -void -SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, - int64_t AccumulativeByteOffset) { - IRBuilder<> Builder(Variadic); - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); - assert(IntPtrTy == DL->getIndexType(Variadic->getType()) && - "Pointer type must match index type for arithmetic-based lowering of " - "split GEPs"); - - Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); - gep_type_iterator GTI = gep_type_begin(*Variadic); - // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We - // don't create arithmetics for structure indices, as they are accumulated - // in the constant offset index. - for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) { - if (GTI.isSequential()) { - Value *Idx = Variadic->getOperand(I); - // Skip zero indices. - if (ConstantInt *CI = dyn_cast(Idx)) - if (CI->isZero()) - continue; - - APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), - GTI.getSequentialElementStride(*DL)); - // Scale the index by element size. - if (ElementSize != 1) { - if (ElementSize.isPowerOf2()) { - Idx = Builder.CreateShl( - Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); - } else { - Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); - } - } - // Create an ADD for each index. - ResultPtr = Builder.CreateAdd(ResultPtr, Idx); - } - } - - // Create an ADD for the constant offset index. - if (AccumulativeByteOffset != 0) { - ResultPtr = Builder.CreateAdd( - ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset)); - } - - ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType()); - Variadic->replaceAllUsesWith(ResultPtr); - Variadic->eraseFromParent(); -} - bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI) { auto PtrGEP = dyn_cast(GEP->getPointerOperand()); @@ -1091,8 +1005,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Notice that we don't remove struct field indices here. If LowerGEP is // disabled, a structure index is not accumulated and we still use the old // one. If LowerGEP is enabled, a structure index is accumulated in the - // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later - // handle the constant offset and won't need a new structure index. + // constant offset. LowerToSingleIndexGEPs will later handle the constant + // offset and won't need a new structure index. gep_type_iterator GTI = gep_type_begin(*GEP); for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (GTI.isSequential()) { @@ -1167,22 +1081,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { GEP->setNoWrapFlags(NewGEPFlags); - // Lowers a GEP to either GEPs with a single index or arithmetic operations. + // Lowers a GEP to GEPs with a single index. if (LowerGEP) { - // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to - // arithmetic operations if the target uses alias analysis in codegen. - // Additionally, pointers that aren't integral (and so can't be safely - // converted to integers) or those whose offset size is different from their - // pointer size (which means that doing integer arithmetic on them could - // affect that data) can't be lowered in this way. - unsigned AddrSpace = GEP->getPointerAddressSpace(); - bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) != - DL->getIndexSizeInBits(AddrSpace); - if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) || - PointerHasExtraData) - lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); - else - lowerToArithmetics(GEP, AccumulativeByteOffset); + lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); return true; } diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll index 578038b8f4daa..d9cdac4946360 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -1,8 +1,8 @@ ; RUN: llc -O3 -aarch64-enable-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-NoAA %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64" @@ -38,24 +38,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NOT: madd ; CHECK:ldr -; CHECK-NoAA-LABEL: @test_GEP_CSE( -; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint ptr %string to i64 -; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 -; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]] -; CHECK-NoAA: add i64 [[PTR2]], 23052 -; CHECK-NoAA: inttoptr -; CHECK-NoAA: if.then: -; CHECK-NoAA-NOT: ptrtoint -; CHECK-NoAA-NOT: mul -; CHECK-NoAA: add i64 [[PTR2]], 23048 -; CHECK-NoAA: inttoptr - -; CHECK-UseAA-LABEL: @test_GEP_CSE( -; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 -; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]] -; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23052 -; CHECK-UseAA: if.then: -; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23048 +; CHECK-IR-LABEL: @test_GEP_CSE( +; CHECK-IR: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 +; CHECK-IR: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]] +; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23052 +; CHECK-IR: if.then: +; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23048 %class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]} %struct.pt = type { ptr, i32, i32 } diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll index b3096820425b8..2fad306c5bf3c 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll @@ -47,10 +47,8 @@ define void @testDisjointOrSplits(ptr %p) { ; CHECK-LABEL: define void @testDisjointOrSplits( ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo() -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10 -; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[VAR]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 10 ; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll index b0e88ef835f6a..a6b38bc9924a7 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll @@ -31,11 +31,9 @@ define void @test_A_sub_B_add_ConstantInt(ptr %p) { ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64 ; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P:%.*]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[SUB22]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 2044 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2044 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP4]] ; CHECK-NEXT: store float 1.000000e+00, ptr [[TMP7]], align 4 ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: