Skip to content

Commit f616127

Browse files
authored
[SeparateConstOffsetFromGEP] Remove support for arithmetic lowering (#151477)
I don't think there is any benefit to lowering to ptrtoint + arithmetic + inttoptr over the newer ptradd lowering. Even if a target does not use codegen AA, it probably still has IR passes that benefit from correct representation. As far as I can tell, no targets actually use this configuration anymore (they either don't use the LowerGEP option, or they they UseAA and thus the ptradd lowering).
1 parent a1a3254 commit f616127

File tree

4 files changed

+31
-146
lines changed

4 files changed

+31
-146
lines changed

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Lines changed: 17 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -79,68 +79,41 @@
7979
// ld.global.f32 %f4, [%rl6+132]; // much better
8080
//
8181
// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82-
// multiple indices to either multiple GEPs with a single index or arithmetic
83-
// operations (depending on whether the target uses alias analysis in codegen).
82+
// multiple indices to multiple GEPs with a single index.
8483
// Such transformation can have following benefits:
8584
// (1) It can always extract constants in the indices of structure type.
8685
// (2) After such Lowering, there are more optimization opportunities such as
8786
// CSE, LICM and CGP.
8887
//
8988
// E.g. The following GEPs have multiple indices:
9089
// BB1:
91-
// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
90+
// %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3
9291
// load %p
9392
// ...
9493
// BB2:
95-
// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
94+
// %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2
9695
// load %p2
9796
// ...
9897
//
9998
// We can not do CSE to the common part related to index "i64 %i". Lowering
10099
// GEPs can achieve such goals.
101-
// If the target does not use alias analysis in codegen, this pass will
102-
// lower a GEP with multiple indices into arithmetic operations:
103-
// BB1:
104-
// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105-
// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106-
// %3 = add i64 %1, %2 ; CSE opportunity
107-
// %4 = mul i64 %j1, length_of_struct
108-
// %5 = add i64 %3, %4
109-
// %6 = add i64 %3, struct_field_3 ; Constant offset
110-
// %p = inttoptr i64 %6 to i32*
111-
// load %p
112-
// ...
113-
// BB2:
114-
// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115-
// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116-
// %9 = add i64 %7, %8 ; CSE opportunity
117-
// %10 = mul i64 %j2, length_of_struct
118-
// %11 = add i64 %9, %10
119-
// %12 = add i64 %11, struct_field_2 ; Constant offset
120-
// %p = inttoptr i64 %12 to i32*
121-
// load %p2
122-
// ...
123100
//
124-
// If the target uses alias analysis in codegen, this pass will lower a GEP
125-
// with multiple indices into multiple GEPs with a single index:
101+
// This pass will lower a GEP with multiple indices into multiple GEPs with a
102+
// single index:
126103
// BB1:
127-
// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128-
// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129-
// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
104+
// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
105+
// %3 = getelementptr i8, ptr %ptr, i64 %2 ; CSE opportunity
130106
// %4 = mul i64 %j1, length_of_struct
131-
// %5 = getelementptr i8* %3, i64 %4
132-
// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133-
// %p = bitcast i8* %6 to i32*
107+
// %5 = getelementptr i8, ptr %3, i64 %4
108+
// %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset
134109
// load %p
135110
// ...
136111
// BB2:
137-
// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138-
// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139-
// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
112+
// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
113+
// %9 = getelementptr i8, ptr %ptr, i64 %8 ; CSE opportunity
140114
// %10 = mul i64 %j2, length_of_struct
141-
// %11 = getelementptr i8* %9, i64 %10
142-
// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143-
// %p2 = bitcast i8* %12 to i32*
115+
// %11 = getelementptr i8, ptr %9, i64 %10
116+
// %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset
144117
// load %p2
145118
// ...
146119
//
@@ -408,16 +381,6 @@ class SeparateConstOffsetFromGEP {
408381
void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
409382
int64_t AccumulativeByteOffset);
410383

411-
/// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
412-
/// Function splitGEP already split the original GEP into a variadic part and
413-
/// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
414-
/// variadic part into a set of arithmetic operations and applies
415-
/// AccumulativeByteOffset to it.
416-
/// \p Variadic The variadic part of the original GEP.
417-
/// \p AccumulativeByteOffset The constant offset.
418-
void lowerToArithmetics(GetElementPtrInst *Variadic,
419-
int64_t AccumulativeByteOffset);
420-
421384
/// Finds the constant offset within each index and accumulates them. If
422385
/// LowerGEP is true, it finds in indices of both sequential and structure
423386
/// types, otherwise it only finds in sequential indices. The output
@@ -951,55 +914,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
951914
Variadic->eraseFromParent();
952915
}
953916

954-
void
955-
SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
956-
int64_t AccumulativeByteOffset) {
957-
IRBuilder<> Builder(Variadic);
958-
Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
959-
assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
960-
"Pointer type must match index type for arithmetic-based lowering of "
961-
"split GEPs");
962-
963-
Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
964-
gep_type_iterator GTI = gep_type_begin(*Variadic);
965-
// Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
966-
// don't create arithmetics for structure indices, as they are accumulated
967-
// in the constant offset index.
968-
for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
969-
if (GTI.isSequential()) {
970-
Value *Idx = Variadic->getOperand(I);
971-
// Skip zero indices.
972-
if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
973-
if (CI->isZero())
974-
continue;
975-
976-
APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
977-
GTI.getSequentialElementStride(*DL));
978-
// Scale the index by element size.
979-
if (ElementSize != 1) {
980-
if (ElementSize.isPowerOf2()) {
981-
Idx = Builder.CreateShl(
982-
Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
983-
} else {
984-
Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
985-
}
986-
}
987-
// Create an ADD for each index.
988-
ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
989-
}
990-
}
991-
992-
// Create an ADD for the constant offset index.
993-
if (AccumulativeByteOffset != 0) {
994-
ResultPtr = Builder.CreateAdd(
995-
ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
996-
}
997-
998-
ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
999-
Variadic->replaceAllUsesWith(ResultPtr);
1000-
Variadic->eraseFromParent();
1001-
}
1002-
1003917
bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
1004918
TargetTransformInfo &TTI) {
1005919
auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
@@ -1091,8 +1005,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
10911005
// Notice that we don't remove struct field indices here. If LowerGEP is
10921006
// disabled, a structure index is not accumulated and we still use the old
10931007
// one. If LowerGEP is enabled, a structure index is accumulated in the
1094-
// constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1095-
// handle the constant offset and won't need a new structure index.
1008+
// constant offset. LowerToSingleIndexGEPs will later handle the constant
1009+
// offset and won't need a new structure index.
10961010
gep_type_iterator GTI = gep_type_begin(*GEP);
10971011
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
10981012
if (GTI.isSequential()) {
@@ -1167,22 +1081,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
11671081

11681082
GEP->setNoWrapFlags(NewGEPFlags);
11691083

1170-
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
1084+
// Lowers a GEP to GEPs with a single index.
11711085
if (LowerGEP) {
1172-
// As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1173-
// arithmetic operations if the target uses alias analysis in codegen.
1174-
// Additionally, pointers that aren't integral (and so can't be safely
1175-
// converted to integers) or those whose offset size is different from their
1176-
// pointer size (which means that doing integer arithmetic on them could
1177-
// affect that data) can't be lowered in this way.
1178-
unsigned AddrSpace = GEP->getPointerAddressSpace();
1179-
bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
1180-
DL->getIndexSizeInBits(AddrSpace);
1181-
if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
1182-
PointerHasExtraData)
1183-
lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1184-
else
1185-
lowerToArithmetics(GEP, AccumulativeByteOffset);
1086+
lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
11861087
return true;
11871088
}
11881089

llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; RUN: llc -O3 -aarch64-enable-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
2-
; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
3-
; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-NoAA %s
4-
; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
5-
; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
2+
; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
3+
; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
4+
; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
5+
; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
66

77
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
88
target triple = "aarch64"
@@ -38,24 +38,12 @@ if.end: ; preds = %if.then, %entry
3838
; CHECK-NOT: madd
3939
; CHECK:ldr
4040

41-
; CHECK-NoAA-LABEL: @test_GEP_CSE(
42-
; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint ptr %string to i64
43-
; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
44-
; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
45-
; CHECK-NoAA: add i64 [[PTR2]], 23052
46-
; CHECK-NoAA: inttoptr
47-
; CHECK-NoAA: if.then:
48-
; CHECK-NoAA-NOT: ptrtoint
49-
; CHECK-NoAA-NOT: mul
50-
; CHECK-NoAA: add i64 [[PTR2]], 23048
51-
; CHECK-NoAA: inttoptr
52-
53-
; CHECK-UseAA-LABEL: @test_GEP_CSE(
54-
; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
55-
; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
56-
; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23052
57-
; CHECK-UseAA: if.then:
58-
; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23048
41+
; CHECK-IR-LABEL: @test_GEP_CSE(
42+
; CHECK-IR: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
43+
; CHECK-IR: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
44+
; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23052
45+
; CHECK-IR: if.then:
46+
; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23048
5947

6048
%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
6149
%struct.pt = type { ptr, i32, i32 }

llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,8 @@ define void @testDisjointOrSplits(ptr %p) {
4747
; CHECK-LABEL: define void @testDisjointOrSplits(
4848
; CHECK-SAME: ptr [[P:%.*]]) {
4949
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
50-
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
51-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]]
52-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
53-
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
50+
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[VAR]]
51+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 10
5452
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
5553
; CHECK-NEXT: ret void
5654
;

llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,9 @@ define void @test_A_sub_B_add_ConstantInt(ptr %p) {
3131
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
3232
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
3333
; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
34-
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
3534
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[SUB22]], 2
36-
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
37-
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 2044
38-
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
35+
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2044
36+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP4]]
3937
; CHECK-NEXT: store float 1.000000e+00, ptr [[TMP7]], align 4
4038
; CHECK-NEXT: br label [[COND_END]]
4139
; CHECK: cond.end:

0 commit comments

Comments
 (0)