Skip to content

Commit 946656c

Browse files
committed
Search for live blocks backward from uses. Add limit for searching
Signed-off-by: John Lu <[email protected]>
1 parent 828e5b5 commit 946656c

File tree

2 files changed

+127
-18
lines changed

2 files changed

+127
-18
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ static cl::opt<unsigned> MaxSwitchCasesPerResult(
197197
"max-switch-cases-per-result", cl::Hidden, cl::init(16),
198198
cl::desc("Limit cases to analyze when converting a switch to select"));
199199

200+
static cl::opt<unsigned> MaxJumpThreadingLiveBlocks(
201+
"max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
202+
cl::desc("Limit number of blocks a define in a threaded block is allowed "
203+
"to be live in"));
204+
200205
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
201206
STATISTIC(NumLinearMaps,
202207
"Number of switch instructions turned into linear mapping");
@@ -3444,24 +3449,25 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
34443449

34453450
using BlocksSet = SmallPtrSet<BasicBlock *, 8>;
34463451

3447-
static bool reachesUsed(BasicBlock *BB, BlocksSet &ReachesNonLocalUses,
3448-
BlocksSet &VisitedBlocksSet) {
3449-
if (ReachesNonLocalUses.contains(BB))
3452+
// Return false if number of blocks searched is too much.
3453+
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3454+
BlocksSet &ReachesNonLocalUses) {
3455+
if (BB == DefBB)
3456+
return true;
3457+
if (!ReachesNonLocalUses.insert(BB).second)
34503458
return true;
3451-
if (!VisitedBlocksSet.insert(BB).second)
3459+
3460+
if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
34523461
return false;
3453-
for (BasicBlock *Succ : successors(BB))
3454-
if (reachesUsed(Succ, ReachesNonLocalUses, VisitedBlocksSet)) {
3455-
ReachesNonLocalUses.insert(BB);
3456-
return true;
3457-
}
3458-
return false;
3462+
for (BasicBlock *Pred : predecessors(BB))
3463+
if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3464+
return false;
3465+
return true;
34593466
}
34603467

34613468
/// Return true if we can thread a branch across this block.
3462-
static bool
3463-
blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3464-
BlocksSet &UsedInNonLocalBlocksSet) {
3469+
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3470+
BlocksSet &NonLocalUseBlocks) {
34653471
int Size = 0;
34663472
EphemeralValueTracker EphTracker;
34673473

@@ -3490,7 +3496,7 @@ blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
34903496
if (isa<PHINode>(UI))
34913497
return false;
34923498
} else
3493-
UsedInNonLocalBlocksSet.insert(UsedInBB);
3499+
NonLocalUseBlocks.insert(UsedInBB);
34943500
}
34953501

34963502
// Looks ok, continue checking.
@@ -3552,10 +3558,19 @@ foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
35523558
// Check that the block is small enough and record which non-local blocks use
35533559
// values defined in the block.
35543560

3555-
BlocksSet ReachesNonLocalUses;
3556-
if (!blockIsSimpleEnoughToThreadThrough(BB, ReachesNonLocalUses))
3561+
BlocksSet NonLocalUseBlocks;
3562+
BlocksSet ReachesNonLocalUseBlocks;
3563+
if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
35573564
return false;
35583565

3566+
if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3567+
NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3568+
return false;
3569+
3570+
for (BasicBlock *UseBB : NonLocalUseBlocks)
3571+
if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3572+
return false;
3573+
35593574
for (const auto &Pair : KnownValues) {
35603575
ConstantInt *CB = Pair.first;
35613576
ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
@@ -3573,8 +3588,7 @@ foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
35733588
continue;
35743589

35753590
// Only revector to RealDest if no values defined in BB are live.
3576-
BlocksSet VisitedBlocksSet;
3577-
if (reachesUsed(RealDest, ReachesNonLocalUses, VisitedBlocksSet))
3591+
if (ReachesNonLocalUseBlocks.contains(RealDest))
35783592
continue;
35793593

35803594
LLVM_DEBUG({
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=simplifycfg -S -max-jump-threading-live-blocks=3 < %s | FileCheck %s --check-prefixes=CHECK_LIMIT_3
3+
; RUN: opt -passes=simplifycfg -S -max-jump-threading-live-blocks=4 < %s | FileCheck %s --check-prefixes=CHECK_LIMIT_4
4+
5+
; Test option -max-jump-threading-live-blocks=<num>
6+
7+
define void @testB(ptr %ptrA, ptr %ptrB, i64 %a, i64 %b, i64 %c) {
8+
; CHECK_LIMIT_3-LABEL: define void @testB(
9+
; CHECK_LIMIT_3-SAME: ptr [[PTRA:%.*]], ptr [[PTRB:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) {
10+
; CHECK_LIMIT_3-NEXT: [[MAINA:.*]]:
11+
; CHECK_LIMIT_3-NEXT: [[COND:%.*]] = icmp slt i64 [[A]], [[B]]
12+
; CHECK_LIMIT_3-NEXT: br i1 [[COND]], label %[[IFA:.*]], label %[[MAINB:.*]]
13+
; CHECK_LIMIT_3: [[IFA]]:
14+
; CHECK_LIMIT_3-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTRA]], align 4
15+
; CHECK_LIMIT_3-NEXT: br label %[[MAINB]]
16+
; CHECK_LIMIT_3: [[MAINB]]:
17+
; CHECK_LIMIT_3-NEXT: [[VALUE:%.*]] = phi i64 [ [[TMP0]], %[[IFA]] ], [ 0, %[[MAINA]] ]
18+
; CHECK_LIMIT_3-NEXT: br i1 [[COND]], label %[[IFB:.*]], label %[[MAINC:.*]]
19+
; CHECK_LIMIT_3: [[IFB]]:
20+
; CHECK_LIMIT_3-NEXT: [[COND2:%.*]] = icmp slt i64 [[A]], [[C]]
21+
; CHECK_LIMIT_3-NEXT: br i1 [[COND2]], label %[[IFB_ARM1:.*]], label %[[IFB_ARM2:.*]]
22+
; CHECK_LIMIT_3: [[IFB_ARM1]]:
23+
; CHECK_LIMIT_3-NEXT: [[PTR_ARM1:%.*]] = getelementptr i64, ptr [[PTRB]], i64 8
24+
; CHECK_LIMIT_3-NEXT: store i128 0, ptr [[PTR_ARM1]], align 4
25+
; CHECK_LIMIT_3-NEXT: br label %[[IFB_JOIN:.*]]
26+
; CHECK_LIMIT_3: [[IFB_ARM2]]:
27+
; CHECK_LIMIT_3-NEXT: [[PTR_ARM2:%.*]] = getelementptr i64, ptr [[PTRB]], i64 16
28+
; CHECK_LIMIT_3-NEXT: store i128 0, ptr [[PTR_ARM2]], align 4
29+
; CHECK_LIMIT_3-NEXT: br label %[[IFB_JOIN]]
30+
; CHECK_LIMIT_3: [[IFB_JOIN]]:
31+
; CHECK_LIMIT_3-NEXT: [[PTRC:%.*]] = phi ptr [ [[PTR_ARM1]], %[[IFB_ARM1]] ], [ [[PTR_ARM2]], %[[IFB_ARM2]] ]
32+
; CHECK_LIMIT_3-NEXT: store i64 [[VALUE]], ptr [[PTRC]], align 4
33+
; CHECK_LIMIT_3-NEXT: br label %[[MAINC]]
34+
; CHECK_LIMIT_3: [[MAINC]]:
35+
; CHECK_LIMIT_3-NEXT: ret void
36+
;
37+
; CHECK_LIMIT_4-LABEL: define void @testB(
38+
; CHECK_LIMIT_4-SAME: ptr [[PTRA:%.*]], ptr [[PTRB:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) {
39+
; CHECK_LIMIT_4-NEXT: [[MAINA:.*:]]
40+
; CHECK_LIMIT_4-NEXT: [[COND:%.*]] = icmp slt i64 [[A]], [[B]]
41+
; CHECK_LIMIT_4-NEXT: br i1 [[COND]], label %[[IFA:.*]], label %[[MAINC:.*]]
42+
; CHECK_LIMIT_4: [[IFA]]:
43+
; CHECK_LIMIT_4-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTRA]], align 4
44+
; CHECK_LIMIT_4-NEXT: [[COND2:%.*]] = icmp slt i64 [[A]], [[C]]
45+
; CHECK_LIMIT_4-NEXT: br i1 [[COND2]], label %[[IFB_ARM1:.*]], label %[[IFB_ARM2:.*]]
46+
; CHECK_LIMIT_4: [[IFB_ARM1]]:
47+
; CHECK_LIMIT_4-NEXT: [[PTR_ARM1:%.*]] = getelementptr i64, ptr [[PTRB]], i64 8
48+
; CHECK_LIMIT_4-NEXT: store i128 0, ptr [[PTR_ARM1]], align 4
49+
; CHECK_LIMIT_4-NEXT: br label %[[IFB_JOIN:.*]]
50+
; CHECK_LIMIT_4: [[IFB_ARM2]]:
51+
; CHECK_LIMIT_4-NEXT: [[PTR_ARM2:%.*]] = getelementptr i64, ptr [[PTRB]], i64 16
52+
; CHECK_LIMIT_4-NEXT: store i128 0, ptr [[PTR_ARM2]], align 4
53+
; CHECK_LIMIT_4-NEXT: br label %[[IFB_JOIN]]
54+
; CHECK_LIMIT_4: [[IFB_JOIN]]:
55+
; CHECK_LIMIT_4-NEXT: [[PTRC:%.*]] = phi ptr [ [[PTR_ARM1]], %[[IFB_ARM1]] ], [ [[PTR_ARM2]], %[[IFB_ARM2]] ]
56+
; CHECK_LIMIT_4-NEXT: store i64 [[TMP0]], ptr [[PTRC]], align 4
57+
; CHECK_LIMIT_4-NEXT: br label %[[MAINC]]
58+
; CHECK_LIMIT_4: [[MAINC]]:
59+
; CHECK_LIMIT_4-NEXT: ret void
60+
;
61+
mainA:
62+
%cond = icmp slt i64 %a, %b
63+
br i1 %cond, label %ifA, label %mainB
64+
65+
ifA:
66+
%518 = load i64, ptr %ptrA
67+
br label %mainB
68+
69+
; Use of %value is not in either immediate destination of mainB.
70+
mainB:
71+
%value = phi i64 [ %518, %ifA ], [ zeroinitializer, %mainA ]
72+
br i1 %cond, label %ifB, label %mainC
73+
74+
ifB:
75+
%cond2 = icmp slt i64 %a, %c
76+
br i1 %cond2, label %ifB_arm1, label %ifB_arm2
77+
78+
ifB_arm1:
79+
%ptr_arm1 = getelementptr i64, ptr %ptrB, i64 8
80+
store i128 0, ptr %ptr_arm1
81+
br label %ifB_join
82+
83+
ifB_arm2:
84+
%ptr_arm2 = getelementptr i64, ptr %ptrB, i64 16
85+
store i128 0, ptr %ptr_arm2
86+
br label %ifB_join
87+
88+
ifB_join:
89+
%ptrC = phi ptr [ %ptr_arm1, %ifB_arm1 ], [ %ptr_arm2, %ifB_arm2 ]
90+
store i64 %value, ptr %ptrC
91+
br label %mainC
92+
93+
mainC:
94+
ret void
95+
}

0 commit comments

Comments
 (0)