Skip to content

Commit 0a0df38

Browse files
committed
Search for live blocks backward from uses. Add limit for searching
Signed-off-by: John Lu <[email protected]>
1 parent 16b1f65 commit 0a0df38

File tree

2 files changed

+127
-18
lines changed

2 files changed

+127
-18
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,11 @@ static cl::opt<unsigned> MaxSwitchCasesPerResult(
198198
"max-switch-cases-per-result", cl::Hidden, cl::init(16),
199199
cl::desc("Limit cases to analyze when converting a switch to select"));
200200

201+
static cl::opt<unsigned> MaxJumpThreadingLiveBlocks(
202+
"max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
203+
cl::desc("Limit number of blocks a define in a threaded block is allowed "
204+
"to be live in"));
205+
201206
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
202207
STATISTIC(NumLinearMaps,
203208
"Number of switch instructions turned into linear mapping");
@@ -3457,24 +3462,25 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
34573462

34583463
using BlocksSet = SmallPtrSet<BasicBlock *, 8>;
34593464

3460-
static bool reachesUsed(BasicBlock *BB, BlocksSet &ReachesNonLocalUses,
3461-
BlocksSet &VisitedBlocksSet) {
3462-
if (ReachesNonLocalUses.contains(BB))
3465+
// Return false if number of blocks searched is too much.
3466+
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3467+
BlocksSet &ReachesNonLocalUses) {
3468+
if (BB == DefBB)
3469+
return true;
3470+
if (!ReachesNonLocalUses.insert(BB).second)
34633471
return true;
3464-
if (!VisitedBlocksSet.insert(BB).second)
3472+
3473+
if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
34653474
return false;
3466-
for (BasicBlock *Succ : successors(BB))
3467-
if (reachesUsed(Succ, ReachesNonLocalUses, VisitedBlocksSet)) {
3468-
ReachesNonLocalUses.insert(BB);
3469-
return true;
3470-
}
3471-
return false;
3475+
for (BasicBlock *Pred : predecessors(BB))
3476+
if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3477+
return false;
3478+
return true;
34723479
}
34733480

34743481
/// Return true if we can thread a branch across this block.
3475-
static bool
3476-
blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3477-
BlocksSet &UsedInNonLocalBlocksSet) {
3482+
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3483+
BlocksSet &NonLocalUseBlocks) {
34783484
int Size = 0;
34793485
EphemeralValueTracker EphTracker;
34803486

@@ -3503,7 +3509,7 @@ blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
35033509
if (isa<PHINode>(UI))
35043510
return false;
35053511
} else
3506-
UsedInNonLocalBlocksSet.insert(UsedInBB);
3512+
NonLocalUseBlocks.insert(UsedInBB);
35073513
}
35083514

35093515
// Looks ok, continue checking.
@@ -3565,10 +3571,19 @@ foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
35653571
// Check that the block is small enough and record which non-local blocks use
35663572
// values defined in the block.
35673573

3568-
BlocksSet ReachesNonLocalUses;
3569-
if (!blockIsSimpleEnoughToThreadThrough(BB, ReachesNonLocalUses))
3574+
BlocksSet NonLocalUseBlocks;
3575+
BlocksSet ReachesNonLocalUseBlocks;
3576+
if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
35703577
return false;
35713578

3579+
if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3580+
NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3581+
return false;
3582+
3583+
for (BasicBlock *UseBB : NonLocalUseBlocks)
3584+
if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3585+
return false;
3586+
35723587
for (const auto &Pair : KnownValues) {
35733588
ConstantInt *CB = Pair.first;
35743589
ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
@@ -3586,8 +3601,7 @@ foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
35863601
continue;
35873602

35883603
// Only revector to RealDest if no values defined in BB are live.
3589-
BlocksSet VisitedBlocksSet;
3590-
if (reachesUsed(RealDest, ReachesNonLocalUses, VisitedBlocksSet))
3604+
if (ReachesNonLocalUseBlocks.contains(RealDest))
35913605
continue;
35923606

35933607
LLVM_DEBUG({
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=simplifycfg -S -max-jump-threading-live-blocks=3 < %s | FileCheck %s --check-prefixes=CHECK_LIMIT_3
3+
; RUN: opt -passes=simplifycfg -S -max-jump-threading-live-blocks=4 < %s | FileCheck %s --check-prefixes=CHECK_LIMIT_4
4+
5+
; Test option -max-jump-threading-live-blocks=<num>
6+
7+
define void @testB(ptr %ptrA, ptr %ptrB, i64 %a, i64 %b, i64 %c) {
8+
; CHECK_LIMIT_3-LABEL: define void @testB(
9+
; CHECK_LIMIT_3-SAME: ptr [[PTRA:%.*]], ptr [[PTRB:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) {
10+
; CHECK_LIMIT_3-NEXT: [[MAINA:.*]]:
11+
; CHECK_LIMIT_3-NEXT: [[COND:%.*]] = icmp slt i64 [[A]], [[B]]
12+
; CHECK_LIMIT_3-NEXT: br i1 [[COND]], label %[[IFA:.*]], label %[[MAINB:.*]]
13+
; CHECK_LIMIT_3: [[IFA]]:
14+
; CHECK_LIMIT_3-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTRA]], align 4
15+
; CHECK_LIMIT_3-NEXT: br label %[[MAINB]]
16+
; CHECK_LIMIT_3: [[MAINB]]:
17+
; CHECK_LIMIT_3-NEXT: [[VALUE:%.*]] = phi i64 [ [[TMP0]], %[[IFA]] ], [ 0, %[[MAINA]] ]
18+
; CHECK_LIMIT_3-NEXT: br i1 [[COND]], label %[[IFB:.*]], label %[[MAINC:.*]]
19+
; CHECK_LIMIT_3: [[IFB]]:
20+
; CHECK_LIMIT_3-NEXT: [[COND2:%.*]] = icmp slt i64 [[A]], [[C]]
21+
; CHECK_LIMIT_3-NEXT: br i1 [[COND2]], label %[[IFB_ARM1:.*]], label %[[IFB_ARM2:.*]]
22+
; CHECK_LIMIT_3: [[IFB_ARM1]]:
23+
; CHECK_LIMIT_3-NEXT: [[PTR_ARM1:%.*]] = getelementptr i64, ptr [[PTRB]], i64 8
24+
; CHECK_LIMIT_3-NEXT: store i128 0, ptr [[PTR_ARM1]], align 4
25+
; CHECK_LIMIT_3-NEXT: br label %[[IFB_JOIN:.*]]
26+
; CHECK_LIMIT_3: [[IFB_ARM2]]:
27+
; CHECK_LIMIT_3-NEXT: [[PTR_ARM2:%.*]] = getelementptr i64, ptr [[PTRB]], i64 16
28+
; CHECK_LIMIT_3-NEXT: store i128 0, ptr [[PTR_ARM2]], align 4
29+
; CHECK_LIMIT_3-NEXT: br label %[[IFB_JOIN]]
30+
; CHECK_LIMIT_3: [[IFB_JOIN]]:
31+
; CHECK_LIMIT_3-NEXT: [[PTRC:%.*]] = phi ptr [ [[PTR_ARM1]], %[[IFB_ARM1]] ], [ [[PTR_ARM2]], %[[IFB_ARM2]] ]
32+
; CHECK_LIMIT_3-NEXT: store i64 [[VALUE]], ptr [[PTRC]], align 4
33+
; CHECK_LIMIT_3-NEXT: br label %[[MAINC]]
34+
; CHECK_LIMIT_3: [[MAINC]]:
35+
; CHECK_LIMIT_3-NEXT: ret void
36+
;
37+
; CHECK_LIMIT_4-LABEL: define void @testB(
38+
; CHECK_LIMIT_4-SAME: ptr [[PTRA:%.*]], ptr [[PTRB:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) {
39+
; CHECK_LIMIT_4-NEXT: [[MAINA:.*:]]
40+
; CHECK_LIMIT_4-NEXT: [[COND:%.*]] = icmp slt i64 [[A]], [[B]]
41+
; CHECK_LIMIT_4-NEXT: br i1 [[COND]], label %[[IFA:.*]], label %[[MAINC:.*]]
42+
; CHECK_LIMIT_4: [[IFA]]:
43+
; CHECK_LIMIT_4-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTRA]], align 4
44+
; CHECK_LIMIT_4-NEXT: [[COND2:%.*]] = icmp slt i64 [[A]], [[C]]
45+
; CHECK_LIMIT_4-NEXT: br i1 [[COND2]], label %[[IFB_ARM1:.*]], label %[[IFB_ARM2:.*]]
46+
; CHECK_LIMIT_4: [[IFB_ARM1]]:
47+
; CHECK_LIMIT_4-NEXT: [[PTR_ARM1:%.*]] = getelementptr i64, ptr [[PTRB]], i64 8
48+
; CHECK_LIMIT_4-NEXT: store i128 0, ptr [[PTR_ARM1]], align 4
49+
; CHECK_LIMIT_4-NEXT: br label %[[IFB_JOIN:.*]]
50+
; CHECK_LIMIT_4: [[IFB_ARM2]]:
51+
; CHECK_LIMIT_4-NEXT: [[PTR_ARM2:%.*]] = getelementptr i64, ptr [[PTRB]], i64 16
52+
; CHECK_LIMIT_4-NEXT: store i128 0, ptr [[PTR_ARM2]], align 4
53+
; CHECK_LIMIT_4-NEXT: br label %[[IFB_JOIN]]
54+
; CHECK_LIMIT_4: [[IFB_JOIN]]:
55+
; CHECK_LIMIT_4-NEXT: [[PTRC:%.*]] = phi ptr [ [[PTR_ARM1]], %[[IFB_ARM1]] ], [ [[PTR_ARM2]], %[[IFB_ARM2]] ]
56+
; CHECK_LIMIT_4-NEXT: store i64 [[TMP0]], ptr [[PTRC]], align 4
57+
; CHECK_LIMIT_4-NEXT: br label %[[MAINC]]
58+
; CHECK_LIMIT_4: [[MAINC]]:
59+
; CHECK_LIMIT_4-NEXT: ret void
60+
;
61+
mainA:
62+
%cond = icmp slt i64 %a, %b
63+
br i1 %cond, label %ifA, label %mainB
64+
65+
ifA:
66+
%518 = load i64, ptr %ptrA
67+
br label %mainB
68+
69+
; Use of %value is not in either immediate destination of mainB.
70+
mainB:
71+
%value = phi i64 [ %518, %ifA ], [ zeroinitializer, %mainA ]
72+
br i1 %cond, label %ifB, label %mainC
73+
74+
ifB:
75+
%cond2 = icmp slt i64 %a, %c
76+
br i1 %cond2, label %ifB_arm1, label %ifB_arm2
77+
78+
ifB_arm1:
79+
%ptr_arm1 = getelementptr i64, ptr %ptrB, i64 8
80+
store i128 0, ptr %ptr_arm1
81+
br label %ifB_join
82+
83+
ifB_arm2:
84+
%ptr_arm2 = getelementptr i64, ptr %ptrB, i64 16
85+
store i128 0, ptr %ptr_arm2
86+
br label %ifB_join
87+
88+
ifB_join:
89+
%ptrC = phi ptr [ %ptr_arm1, %ifB_arm1 ], [ %ptr_arm2, %ifB_arm2 ]
90+
store i64 %value, ptr %ptrC
91+
br label %mainC
92+
93+
mainC:
94+
ret void
95+
}

0 commit comments

Comments
 (0)