|
79 | 79 | // ld.global.f32 %f4, [%rl6+132]; // much better
|
80 | 80 | //
|
81 | 81 | // Another improvement enabled by the LowerGEP flag is to lower a GEP with
|
82 |
| -// multiple indices to either multiple GEPs with a single index or arithmetic |
83 |
| -// operations (depending on whether the target uses alias analysis in codegen). |
| 82 | +// multiple indices to multiple GEPs with a single index. |
84 | 83 | // Such transformation can have following benefits:
|
85 | 84 | // (1) It can always extract constants in the indices of structure type.
|
86 | 85 | // (2) After such Lowering, there are more optimization opportunities such as
|
87 | 86 | // CSE, LICM and CGP.
|
88 | 87 | //
|
89 | 88 | // E.g. The following GEPs have multiple indices:
|
90 | 89 | // BB1:
|
91 |
| -// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3 |
| 90 | +// %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3 |
92 | 91 | // load %p
|
93 | 92 | // ...
|
94 | 93 | // BB2:
|
95 |
| -// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2 |
| 94 | +// %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2 |
96 | 95 | // load %p2
|
97 | 96 | // ...
|
98 | 97 | //
|
99 | 98 | // We can not do CSE to the common part related to index "i64 %i". Lowering
|
100 | 99 | // GEPs can achieve such goals.
|
101 |
| -// If the target does not use alias analysis in codegen, this pass will |
102 |
| -// lower a GEP with multiple indices into arithmetic operations: |
103 |
| -// BB1: |
104 |
| -// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity |
105 |
| -// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
106 |
| -// %3 = add i64 %1, %2 ; CSE opportunity |
107 |
| -// %4 = mul i64 %j1, length_of_struct |
108 |
| -// %5 = add i64 %3, %4 |
109 |
| -// %6 = add i64 %3, struct_field_3 ; Constant offset |
110 |
| -// %p = inttoptr i64 %6 to i32* |
111 |
| -// load %p |
112 |
| -// ... |
113 |
| -// BB2: |
114 |
| -// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity |
115 |
| -// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
116 |
| -// %9 = add i64 %7, %8 ; CSE opportunity |
117 |
| -// %10 = mul i64 %j2, length_of_struct |
118 |
| -// %11 = add i64 %9, %10 |
119 |
| -// %12 = add i64 %11, struct_field_2 ; Constant offset |
120 |
| -// %p = inttoptr i64 %12 to i32* |
121 |
| -// load %p2 |
122 |
| -// ... |
123 | 100 | //
|
124 |
| -// If the target uses alias analysis in codegen, this pass will lower a GEP |
125 |
| -// with multiple indices into multiple GEPs with a single index: |
| 101 | +// This pass will lower a GEP with multiple indices into multiple GEPs with a |
| 102 | +// single index: |
126 | 103 | // BB1:
|
127 |
| -// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity |
128 |
| -// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
129 |
| -// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity |
| 104 | +// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
| 105 | +// %3 = getelementptr i8, ptr %ptr, i64 %2 ; CSE opportunity |
130 | 106 | // %4 = mul i64 %j1, length_of_struct
|
131 |
| -// %5 = getelementptr i8* %3, i64 %4 |
132 |
| -// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset |
133 |
| -// %p = bitcast i8* %6 to i32* |
| 107 | +// %5 = getelementptr i8, ptr %3, i64 %4 |
| 108 | +// %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset |
134 | 109 | // load %p
|
135 | 110 | // ...
|
136 | 111 | // BB2:
|
137 |
| -// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity |
138 |
| -// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
139 |
| -// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity |
| 112 | +// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
| 113 | +// %9 = getelementptr i8, ptr %ptr, i64 %8 ; CSE opportunity |
140 | 114 | // %10 = mul i64 %j2, length_of_struct
|
141 |
| -// %11 = getelementptr i8* %9, i64 %10 |
142 |
| -// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset |
143 |
| -// %p2 = bitcast i8* %12 to i32* |
| 115 | +// %11 = getelementptr i8, ptr %9, i64 %10 |
| 116 | +// %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset |
144 | 117 | // load %p2
|
145 | 118 | // ...
|
146 | 119 | //
|
@@ -408,16 +381,6 @@ class SeparateConstOffsetFromGEP {
|
408 | 381 | void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
|
409 | 382 | int64_t AccumulativeByteOffset);
|
410 | 383 |
|
411 |
| - /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form. |
412 |
| - /// Function splitGEP already split the original GEP into a variadic part and |
413 |
| - /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the |
414 |
| - /// variadic part into a set of arithmetic operations and applies |
415 |
| - /// AccumulativeByteOffset to it. |
416 |
| - /// \p Variadic The variadic part of the original GEP. |
417 |
| - /// \p AccumulativeByteOffset The constant offset. |
418 |
| - void lowerToArithmetics(GetElementPtrInst *Variadic, |
419 |
| - int64_t AccumulativeByteOffset); |
420 |
| - |
421 | 384 | /// Finds the constant offset within each index and accumulates them. If
|
422 | 385 | /// LowerGEP is true, it finds in indices of both sequential and structure
|
423 | 386 | /// types, otherwise it only finds in sequential indices. The output
|
@@ -951,55 +914,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
|
951 | 914 | Variadic->eraseFromParent();
|
952 | 915 | }
|
953 | 916 |
|
954 |
| -void |
955 |
| -SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, |
956 |
| - int64_t AccumulativeByteOffset) { |
957 |
| - IRBuilder<> Builder(Variadic); |
958 |
| - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); |
959 |
| - assert(IntPtrTy == DL->getIndexType(Variadic->getType()) && |
960 |
| - "Pointer type must match index type for arithmetic-based lowering of " |
961 |
| - "split GEPs"); |
962 |
| - |
963 |
| - Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); |
964 |
| - gep_type_iterator GTI = gep_type_begin(*Variadic); |
965 |
| - // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We |
966 |
| - // don't create arithmetics for structure indices, as they are accumulated |
967 |
| - // in the constant offset index. |
968 |
| - for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) { |
969 |
| - if (GTI.isSequential()) { |
970 |
| - Value *Idx = Variadic->getOperand(I); |
971 |
| - // Skip zero indices. |
972 |
| - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) |
973 |
| - if (CI->isZero()) |
974 |
| - continue; |
975 |
| - |
976 |
| - APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), |
977 |
| - GTI.getSequentialElementStride(*DL)); |
978 |
| - // Scale the index by element size. |
979 |
| - if (ElementSize != 1) { |
980 |
| - if (ElementSize.isPowerOf2()) { |
981 |
| - Idx = Builder.CreateShl( |
982 |
| - Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); |
983 |
| - } else { |
984 |
| - Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); |
985 |
| - } |
986 |
| - } |
987 |
| - // Create an ADD for each index. |
988 |
| - ResultPtr = Builder.CreateAdd(ResultPtr, Idx); |
989 |
| - } |
990 |
| - } |
991 |
| - |
992 |
| - // Create an ADD for the constant offset index. |
993 |
| - if (AccumulativeByteOffset != 0) { |
994 |
| - ResultPtr = Builder.CreateAdd( |
995 |
| - ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset)); |
996 |
| - } |
997 |
| - |
998 |
| - ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType()); |
999 |
| - Variadic->replaceAllUsesWith(ResultPtr); |
1000 |
| - Variadic->eraseFromParent(); |
1001 |
| -} |
1002 |
| - |
1003 | 917 | bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
|
1004 | 918 | TargetTransformInfo &TTI) {
|
1005 | 919 | auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
|
@@ -1091,8 +1005,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
|
1091 | 1005 | // Notice that we don't remove struct field indices here. If LowerGEP is
|
1092 | 1006 | // disabled, a structure index is not accumulated and we still use the old
|
1093 | 1007 | // one. If LowerGEP is enabled, a structure index is accumulated in the
|
1094 |
| - // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later |
1095 |
| - // handle the constant offset and won't need a new structure index. |
| 1008 | + // constant offset. LowerToSingleIndexGEPs will later handle the constant |
| 1009 | + // offset and won't need a new structure index. |
1096 | 1010 | gep_type_iterator GTI = gep_type_begin(*GEP);
|
1097 | 1011 | for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
|
1098 | 1012 | if (GTI.isSequential()) {
|
@@ -1167,22 +1081,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
|
1167 | 1081 |
|
1168 | 1082 | GEP->setNoWrapFlags(NewGEPFlags);
|
1169 | 1083 |
|
1170 |
| - // Lowers a GEP to either GEPs with a single index or arithmetic operations. |
| 1084 | + // Lowers a GEP to GEPs with a single index. |
1171 | 1085 | if (LowerGEP) {
|
1172 |
| - // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to |
1173 |
| - // arithmetic operations if the target uses alias analysis in codegen. |
1174 |
| - // Additionally, pointers that aren't integral (and so can't be safely |
1175 |
| - // converted to integers) or those whose offset size is different from their |
1176 |
| - // pointer size (which means that doing integer arithmetic on them could |
1177 |
| - // affect that data) can't be lowered in this way. |
1178 |
| - unsigned AddrSpace = GEP->getPointerAddressSpace(); |
1179 |
| - bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) != |
1180 |
| - DL->getIndexSizeInBits(AddrSpace); |
1181 |
| - if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) || |
1182 |
| - PointerHasExtraData) |
1183 |
| - lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); |
1184 |
| - else |
1185 |
| - lowerToArithmetics(GEP, AccumulativeByteOffset); |
| 1086 | + lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); |
1186 | 1087 | return true;
|
1187 | 1088 | }
|
1188 | 1089 |
|
|
0 commit comments