Skip to content

Commit 95057f6

Browse files
committed
Okay, so there is no reasonable way for tail duplication to update SSA form,
as it is making effectively arbitrary modifications to the CFG and we don't have a domset/domfrontier implementations that can handle the dynamic updates. Instead of having a bunch of code that doesn't actually work in practice, just demote any potentially tricky values to the stack (causing the problem to go away entirely). Later invocations of mem2reg will rebuild SSA for us. This fixes all of the major performance regressions with tail duplication from LLVM 1.1. For example, this loop: --- int popcount(int x) { int result = 0; while (x != 0) { result = result + (x & 0x1); x = x >> 1; } return result; } --- Used to be compiled into: int %popcount(int %X) { entry: br label %loopentry loopentry: ; preds = %entry, %no_exit %x.0 = phi int [ %X, %entry ], [ %tmp.9, %no_exit ] ; <int> [#uses=3] %result.1.0 = phi int [ 0, %entry ], [ %tmp.6, %no_exit ] ; <int> [#uses=2] %tmp.1 = seteq int %x.0, 0 ; <bool> [#uses=1] br bool %tmp.1, label %loopexit, label %no_exit no_exit: ; preds = %loopentry %tmp.4 = and int %x.0, 1 ; <int> [#uses=1] %tmp.6 = add int %tmp.4, %result.1.0 ; <int> [#uses=1] %tmp.9 = shr int %x.0, ubyte 1 ; <int> [#uses=1] br label %loopentry loopexit: ; preds = %loopentry ret int %result.1.0 } And is now compiled into: int %popcount(int %X) { entry: br label %no_exit no_exit: ; preds = %entry, %no_exit %x.0.0 = phi int [ %X, %entry ], [ %tmp.9, %no_exit ] ; <int> [#uses=2] %result.1.0.0 = phi int [ 0, %entry ], [ %tmp.6, %no_exit ] ; <int> [#uses=1] %tmp.4 = and int %x.0.0, 1 ; <int> [#uses=1] %tmp.6 = add int %tmp.4, %result.1.0.0 ; <int> [#uses=2] %tmp.9 = shr int %x.0.0, ubyte 1 ; <int> [#uses=2] %tmp.1 = seteq int %tmp.9, 0 ; <bool> [#uses=1] br bool %tmp.1, label %loopexit, label %no_exit loopexit: ; preds = %no_exit ret int %tmp.6 } llvm-svn: 12457
1 parent bb1a2cc commit 95057f6

File tree

1 file changed

+49
-195
lines changed

1 file changed

+49
-195
lines changed

llvm/lib/Transforms/Scalar/TailDuplication.cpp

Lines changed: 49 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,7 @@ namespace {
4141
bool runOnFunction(Function &F);
4242
private:
4343
inline bool shouldEliminateUnconditionalBranch(TerminatorInst *TI);
44-
inline bool canEliminateUnconditionalBranch(TerminatorInst *TI);
4544
inline void eliminateUnconditionalBranch(BranchInst *BI);
46-
inline void InsertPHINodesIfNecessary(Instruction *OrigInst, Value *NewInst,
47-
BasicBlock *NewBlock);
48-
inline Value *GetValueInBlock(BasicBlock *BB, Value *OrigVal,
49-
std::map<BasicBlock*, ValueHolder> &ValueMap,
50-
std::map<BasicBlock*, ValueHolder> &OutValueMap);
51-
inline Value *GetValueOutBlock(BasicBlock *BB, Value *OrigVal,
52-
std::map<BasicBlock*, ValueHolder> &ValueMap,
53-
std::map<BasicBlock*, ValueHolder> &OutValueMap);
5445
};
5546
RegisterOpt<TailDup> X("tailduplicate", "Tail Duplication");
5647
}
@@ -64,8 +55,7 @@ Pass *llvm::createTailDuplicationPass() { return new TailDup(); }
6455
bool TailDup::runOnFunction(Function &F) {
6556
bool Changed = false;
6657
for (Function::iterator I = F.begin(), E = F.end(); I != E; )
67-
if (shouldEliminateUnconditionalBranch(I->getTerminator()) &&
68-
canEliminateUnconditionalBranch(I->getTerminator())) {
58+
if (shouldEliminateUnconditionalBranch(I->getTerminator())) {
6959
eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
7060
Changed = true;
7161
} else {
@@ -96,6 +86,12 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI) {
9686
if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
9787
return false; // Do not loop infinitely!
9888

89+
// FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
90+
// because doing so would require breaking critical edges. This should be
91+
// fixed eventually.
92+
if (!DTI->use_empty())
93+
return false;
94+
9995
// Do not bother working on dead blocks...
10096
pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
10197
if (PI == PE && Dest != Dest->getParent()->begin())
@@ -123,36 +119,6 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI) {
123119
return true;
124120
}
125121

126-
/// canEliminateUnconditionalBranch - Unfortunately, the general form of tail
127-
/// duplication can do very bad things to SSA form, by destroying arbitrary
128-
/// relationships between dominators and dominator frontiers as it processes the
129-
/// program. The right solution for this is to have an incrementally updating
130-
/// dominator data structure, which can gracefully react to arbitrary
131-
/// "addEdge/removeEdge" changes to the CFG. Implementing this is nontrivial,
132-
/// however, so we just disable the transformation in cases where it is not
133-
/// currently safe.
134-
///
135-
bool TailDup::canEliminateUnconditionalBranch(TerminatorInst *TI) {
136-
// Basically, we refuse to make the transformation if any of the values
137-
// computed in the 'tail' are used in any other basic blocks.
138-
BasicBlock *BB = TI->getParent();
139-
BasicBlock *Tail = TI->getSuccessor(0);
140-
assert(isa<BranchInst>(TI) && cast<BranchInst>(TI)->isUnconditional());
141-
142-
for (BasicBlock::iterator I = Tail->begin(), E = Tail->end(); I != E; ++I)
143-
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
144-
++UI) {
145-
Instruction *User = cast<Instruction>(*UI);
146-
if (User->getParent() != Tail && User->getParent() != BB)
147-
return false;
148-
149-
// The 'swap' problem foils the tail duplication rewriting code.
150-
if (isa<PHINode>(User) && User->getParent() == Tail)
151-
return false;
152-
}
153-
return true;
154-
}
155-
156122

157123
/// eliminateUnconditionalBranch - Clone the instructions from the destination
158124
/// block into the source block, eliminating the specified unconditional branch.
@@ -167,6 +133,42 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
167133
DEBUG(std::cerr << "TailDuplication[" << SourceBlock->getParent()->getName()
168134
<< "]: Eliminating branch: " << *Branch);
169135

136+
// Tail duplication can not update SSA properties correctly if the values
137+
// defined in the duplicated tail are used outside of the tail itself. For
138+
// this reason, we spill all values that are used outside of the tail to the
139+
// stack.
140+
for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
141+
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
142+
++UI) {
143+
bool ShouldDemote = false;
144+
if (cast<Instruction>(*UI)->getParent() != DestBlock) {
145+
// We must allow our successors to use tail values in their PHI nodes
146+
// (if the incoming value corresponds to the tail block).
147+
if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
148+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
149+
if (PN->getIncomingValue(i) == I &&
150+
PN->getIncomingBlock(i) != DestBlock) {
151+
ShouldDemote = true;
152+
break;
153+
}
154+
155+
} else {
156+
ShouldDemote = true;
157+
}
158+
} else if (PHINode *PN = dyn_cast<PHINode>(cast<Instruction>(*UI))) {
159+
// If the user of this instruction is a PHI node in the current block,
160+
// spill the value.
161+
ShouldDemote = true;
162+
}
163+
164+
if (ShouldDemote) {
165+
// We found a use outside of the tail. Create a new stack slot to
166+
// break this inter-block usage pattern.
167+
DemoteRegToStack(*I);
168+
break;
169+
}
170+
}
171+
170172
// We are going to have to map operands from the original block B to the new
171173
// copy of the block B'. If there are PHI nodes in the DestBlock, these PHI
172174
// nodes also define part of this mapping. Loop over these PHI nodes, adding
@@ -217,169 +219,21 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
217219
PN->addIncoming(IV, SourceBlock);
218220
}
219221
}
220-
221-
// Now that all of the instructions are correctly copied into the SourceBlock,
222-
// we have one more minor problem: the successors of the original DestBB may
223-
// use the values computed in DestBB either directly (if DestBB dominated the
224-
// block), or through a PHI node. In either case, we need to insert PHI nodes
225-
// into any successors of DestBB (which are now our successors) for each value
226-
// that is computed in DestBB, but is used outside of it. All of these uses
227-
// we have to rewrite with the new PHI node.
228-
//
229-
if (succ_begin(SourceBlock) != succ_end(SourceBlock)) // Avoid wasting time...
230-
for (BI = DestBlock->begin(); BI != DestBlock->end(); ++BI)
231-
if (BI->getType() != Type::VoidTy)
232-
InsertPHINodesIfNecessary(BI, ValueMapping[BI], SourceBlock);
222+
223+
// Next, remove the old branch instruction, and any PHI node entries that we
224+
// had.
225+
BI = Branch; ++BI; // Get an iterator to the first new instruction
226+
DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
227+
SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch...
233228

234229
// Final step: now that we have finished everything up, walk the cloned
235230
// instructions one last time, constant propagating and DCE'ing them, because
236231
// they may not be needed anymore.
237232
//
238-
BI = Branch; ++BI; // Get an iterator to the first new instruction
239233
if (HadPHINodes)
240234
while (BI != SourceBlock->end())
241235
if (!dceInstruction(BI) && !doConstantPropagation(BI))
242236
++BI;
243237

244-
DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
245-
SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch...
246-
247238
++NumEliminated; // We just killed a branch!
248239
}
249-
250-
/// InsertPHINodesIfNecessary - So at this point, we cloned the OrigInst
251-
/// instruction into the NewBlock with the value of NewInst. If OrigInst was
252-
/// used outside of its defining basic block, we need to insert a PHI nodes into
253-
/// the successors.
254-
///
255-
void TailDup::InsertPHINodesIfNecessary(Instruction *OrigInst, Value *NewInst,
256-
BasicBlock *NewBlock) {
257-
// Loop over all of the uses of OrigInst, rewriting them to be newly inserted
258-
// PHI nodes, unless they are in the same basic block as OrigInst.
259-
BasicBlock *OrigBlock = OrigInst->getParent();
260-
std::vector<Instruction*> Users;
261-
Users.reserve(OrigInst->use_size());
262-
for (Value::use_iterator I = OrigInst->use_begin(), E = OrigInst->use_end();
263-
I != E; ++I) {
264-
Instruction *In = cast<Instruction>(*I);
265-
if (In->getParent() != OrigBlock || // Don't modify uses in the orig block!
266-
isa<PHINode>(In))
267-
Users.push_back(In);
268-
}
269-
270-
// The common case is that the instruction is only used within the block that
271-
// defines it. If we have this case, quick exit.
272-
//
273-
if (Users.empty()) return;
274-
275-
// Otherwise, we have a more complex case, handle it now. This requires the
276-
// construction of a mapping between a basic block and the value to use when
277-
// in the scope of that basic block. This map will map to the original and
278-
// new values when in the original or new block, but will map to inserted PHI
279-
// nodes when in other blocks.
280-
//
281-
std::map<BasicBlock*, ValueHolder> ValueMap;
282-
std::map<BasicBlock*, ValueHolder> OutValueMap; // The outgoing value map
283-
OutValueMap[OrigBlock] = OrigInst;
284-
OutValueMap[NewBlock ] = NewInst; // Seed the initial values...
285-
286-
DEBUG(std::cerr << " ** Inserting PHI nodes for " << OrigInst);
287-
while (!Users.empty()) {
288-
Instruction *User = Users.back(); Users.pop_back();
289-
290-
if (PHINode *PN = dyn_cast<PHINode>(User)) {
291-
// PHI nodes must be handled specially here, because their operands are
292-
// actually defined in predecessor basic blocks, NOT in the block that the
293-
// PHI node lives in. Note that we have already added entries to PHI nods
294-
// which are in blocks that are immediate successors of OrigBlock, so
295-
// don't modify them again.
296-
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
297-
if (PN->getIncomingValue(i) == OrigInst &&
298-
PN->getIncomingBlock(i) != OrigBlock) {
299-
Value *V = GetValueOutBlock(PN->getIncomingBlock(i), OrigInst,
300-
ValueMap, OutValueMap);
301-
PN->setIncomingValue(i, V);
302-
}
303-
304-
} else {
305-
// Any other user of the instruction can just replace any uses with the
306-
// new value defined in the block it resides in.
307-
Value *V = GetValueInBlock(User->getParent(), OrigInst, ValueMap,
308-
OutValueMap);
309-
User->replaceUsesOfWith(OrigInst, V);
310-
}
311-
}
312-
}
313-
314-
/// GetValueInBlock - This is a recursive method which inserts PHI nodes into
315-
/// the function until there is a value available in basic block BB.
316-
///
317-
Value *TailDup::GetValueInBlock(BasicBlock *BB, Value *OrigVal,
318-
std::map<BasicBlock*, ValueHolder> &ValueMap,
319-
std::map<BasicBlock*,ValueHolder> &OutValueMap){
320-
ValueHolder &BBVal = ValueMap[BB];
321-
if (BBVal) return BBVal; // Value already computed for this block?
322-
323-
// If this block has no predecessors, then it must be unreachable, thus, it
324-
// doesn't matter which value we use.
325-
if (pred_begin(BB) == pred_end(BB))
326-
return BBVal = Constant::getNullValue(OrigVal->getType());
327-
328-
// If there is no value already available in this basic block, we need to
329-
// either reuse a value from an incoming, dominating, basic block, or we need
330-
// to create a new PHI node to merge in different incoming values. Because we
331-
// don't know if we're part of a loop at this point or not, we create a PHI
332-
// node, even if we will ultimately eliminate it.
333-
PHINode *PN = new PHINode(OrigVal->getType(), OrigVal->getName()+".pn",
334-
BB->begin());
335-
BBVal = PN; // Insert this into the BBVal slot in case of cycles...
336-
337-
ValueHolder &BBOutVal = OutValueMap[BB];
338-
if (BBOutVal == 0) BBOutVal = PN;
339-
340-
// Now that we have created the PHI node, loop over all of the predecessors of
341-
// this block, computing an incoming value for the predecessor.
342-
std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
343-
for (unsigned i = 0, e = Preds.size(); i != e; ++i)
344-
PN->addIncoming(GetValueOutBlock(Preds[i], OrigVal, ValueMap, OutValueMap),
345-
Preds[i]);
346-
347-
// The PHI node is complete. In many cases, however the PHI node was
348-
// ultimately unnecessary: we could have just reused a dominating incoming
349-
// value. If this is the case, nuke the PHI node and replace the map entry
350-
// with the dominating value.
351-
//
352-
assert(PN->getNumIncomingValues() > 0 && "No predecessors?");
353-
354-
// Check to see if all of the elements in the PHI node are either the PHI node
355-
// itself or ONE particular value.
356-
unsigned i = 0;
357-
Value *ReplVal = PN->getIncomingValue(i);
358-
for (; ReplVal == PN && i != PN->getNumIncomingValues(); ++i)
359-
ReplVal = PN->getIncomingValue(i); // Skip values equal to the PN
360-
361-
for (; i != PN->getNumIncomingValues(); ++i)
362-
if (PN->getIncomingValue(i) != PN && PN->getIncomingValue(i) != ReplVal) {
363-
ReplVal = 0;
364-
break;
365-
}
366-
367-
// Found a value to replace the PHI node with?
368-
if (ReplVal && ReplVal != PN) {
369-
PN->replaceAllUsesWith(ReplVal);
370-
BB->getInstList().erase(PN); // Erase the PHI node...
371-
} else {
372-
++NumPHINodes;
373-
}
374-
375-
return BBVal;
376-
}
377-
378-
Value *TailDup::GetValueOutBlock(BasicBlock *BB, Value *OrigVal,
379-
std::map<BasicBlock*, ValueHolder> &ValueMap,
380-
std::map<BasicBlock*, ValueHolder> &OutValueMap) {
381-
ValueHolder &BBVal = OutValueMap[BB];
382-
if (BBVal) return BBVal; // Value already computed for this block?
383-
384-
return GetValueInBlock(BB, OrigVal, ValueMap, OutValueMap);
385-
}

0 commit comments

Comments
 (0)