@@ -329,18 +329,19 @@ class SampleProfileLoader {
329
329
bool emitAnnotations (Function &F);
330
330
ErrorOr<uint64_t > getInstWeight (const Instruction &I);
331
331
ErrorOr<uint64_t > getBlockWeight (const BasicBlock *BB);
332
- const FunctionSamples *findCalleeFunctionSamples (const Instruction &I) const ;
332
+ const FunctionSamples *findCalleeFunctionSamples (const CallBase &I) const ;
333
333
std::vector<const FunctionSamples *>
334
334
findIndirectCallFunctionSamples (const Instruction &I, uint64_t &Sum) const ;
335
335
mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
336
336
const FunctionSamples *findFunctionSamples (const Instruction &I) const ;
337
- bool inlineCallInstruction (Instruction *I );
337
+ bool inlineCallInstruction (CallBase &CB );
338
338
bool inlineHotFunctions (Function &F,
339
339
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
340
340
// Inline cold/small functions in addition to hot ones
341
- bool shouldInlineColdCallee (Instruction &CallInst);
341
+ bool shouldInlineColdCallee (CallBase &CallInst);
342
342
void emitOptimizationRemarksForInlineCandidates (
343
- const SmallVector<Instruction *, 10 > &Candidates, const Function &F, bool Hot);
343
+ const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
344
+ bool Hot);
344
345
void printEdgeWeight (raw_ostream &OS, Edge E);
345
346
void printBlockWeight (raw_ostream &OS, const BasicBlock *BB) const ;
346
347
void printBlockEquivalence (raw_ostream &OS, const BasicBlock *BB);
@@ -718,9 +719,9 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
718
719
// (findCalleeFunctionSamples returns non-empty result), but not inlined here,
719
720
// it means that the inlined callsite has no sample, thus the call
720
721
// instruction should have 0 count.
721
- if ((isa<CallInst>(Inst) || isa<InvokeInst>( Inst)) &&
722
- !cast<CallBase>(Inst). isIndirectCall () && findCalleeFunctionSamples (Inst ))
723
- return 0 ;
722
+ if (auto *CB = dyn_cast<CallBase>(& Inst))
723
+ if (!CB-> isIndirectCall () && findCalleeFunctionSamples (*CB ))
724
+ return 0 ;
724
725
725
726
const DILocation *DIL = DLoc;
726
727
uint32_t LineOffset = FunctionSamples::getOffset (DIL);
@@ -808,7 +809,7 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
808
809
// /
809
810
// / \returns The FunctionSamples pointer to the inlined instance.
810
811
const FunctionSamples *
811
- SampleProfileLoader::findCalleeFunctionSamples (const Instruction &Inst) const {
812
+ SampleProfileLoader::findCalleeFunctionSamples (const CallBase &Inst) const {
812
813
const DILocation *DIL = Inst.getDebugLoc ();
813
814
if (!DIL) {
814
815
return nullptr ;
@@ -892,15 +893,11 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
892
893
return it.first ->second ;
893
894
}
894
895
895
- // FIXME(CallSite): Parameter should be CallBase&, as it's assumed to be that,
896
- // and non-null.
897
- bool SampleProfileLoader::inlineCallInstruction (Instruction *I) {
898
- assert (isa<CallInst>(I) || isa<InvokeInst>(I));
899
- CallBase &CS = *cast<CallBase>(I);
900
- Function *CalledFunction = CS.getCalledFunction ();
896
+ bool SampleProfileLoader::inlineCallInstruction (CallBase &CB) {
897
+ Function *CalledFunction = CB.getCalledFunction ();
901
898
assert (CalledFunction);
902
- DebugLoc DLoc = I-> getDebugLoc ();
903
- BasicBlock *BB = I-> getParent ();
899
+ DebugLoc DLoc = CB. getDebugLoc ();
900
+ BasicBlock *BB = CB. getParent ();
904
901
InlineParams Params = getInlineParams ();
905
902
Params.ComputeFullInlineCost = true ;
906
903
// Checks if there is anything in the reachable portion of the callee at
@@ -909,16 +906,15 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
909
906
// when cost exceeds threshold without checking all IRs in the callee.
910
907
// The acutal cost does not matter because we only checks isNever() to
911
908
// see if it is legal to inline the callsite.
912
- InlineCost Cost =
913
- getInlineCost (cast<CallBase>(*I), Params, GetTTI (*CalledFunction), GetAC,
914
- None, GetTLI, nullptr , nullptr );
909
+ InlineCost Cost = getInlineCost (CB, Params, GetTTI (*CalledFunction), GetAC,
910
+ None, GetTLI, nullptr , nullptr );
915
911
if (Cost.isNever ()) {
916
912
ORE->emit (OptimizationRemarkAnalysis (CSINLINE_DEBUG, " InlineFail" , DLoc, BB)
917
913
<< " incompatible inlining" );
918
914
return false ;
919
915
}
920
916
InlineFunctionInfo IFI (nullptr , &GetAC);
921
- if (InlineFunction (CS , IFI).isSuccess ()) {
917
+ if (InlineFunction (CB , IFI).isSuccess ()) {
922
918
// The call to InlineFunction erases I, so we can't pass it here.
923
919
ORE->emit (OptimizationRemark (CSINLINE_DEBUG, " InlineSuccess" , DLoc, BB)
924
920
<< " inlined callee '" << ore::NV (" Callee" , CalledFunction)
@@ -928,26 +924,25 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
928
924
return false ;
929
925
}
930
926
931
- bool SampleProfileLoader::shouldInlineColdCallee (Instruction &CallInst) {
927
+ bool SampleProfileLoader::shouldInlineColdCallee (CallBase &CallInst) {
932
928
if (!ProfileSizeInline)
933
929
return false ;
934
930
935
- Function *Callee = cast<CallBase>( CallInst) .getCalledFunction ();
931
+ Function *Callee = CallInst.getCalledFunction ();
936
932
if (Callee == nullptr )
937
933
return false ;
938
934
939
- InlineCost Cost =
940
- getInlineCost (cast<CallBase>(CallInst), getInlineParams (),
941
- GetTTI (*Callee), GetAC, None, GetTLI, nullptr , nullptr );
935
+ InlineCost Cost = getInlineCost (CallInst, getInlineParams (), GetTTI (*Callee),
936
+ GetAC, None, GetTLI, nullptr , nullptr );
942
937
943
938
return Cost.getCost () <= SampleColdCallSiteThreshold;
944
939
}
945
940
946
941
void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates (
947
- const SmallVector<Instruction *, 10 > &Candidates, const Function &F,
942
+ const SmallVectorImpl<CallBase * > &Candidates, const Function &F,
948
943
bool Hot) {
949
944
for (auto I : Candidates) {
950
- Function *CalledFunction = cast<CallBase>(I) ->getCalledFunction ();
945
+ Function *CalledFunction = I ->getCalledFunction ();
951
946
if (CalledFunction) {
952
947
ORE->emit (OptimizationRemarkAnalysis (CSINLINE_DEBUG, " InlineAttempt" ,
953
948
I->getDebugLoc (), I->getParent ())
@@ -984,45 +979,43 @@ bool SampleProfileLoader::inlineHotFunctions(
984
979
" ProfAccForSymsInList should be false when profile-sample-accurate "
985
980
" is enabled" );
986
981
987
- // FIXME(CallSite): refactor the vectors here, as they operate with CallBase
988
- // values
989
- DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
982
+ DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
990
983
bool Changed = false ;
991
984
while (true ) {
992
985
bool LocalChanged = false ;
993
- SmallVector<Instruction *, 10 > CIS;
986
+ SmallVector<CallBase *, 10 > CIS;
994
987
for (auto &BB : F) {
995
988
bool Hot = false ;
996
- SmallVector<Instruction *, 10 > AllCandidates;
997
- SmallVector<Instruction *, 10 > ColdCandidates;
989
+ SmallVector<CallBase *, 10 > AllCandidates;
990
+ SmallVector<CallBase *, 10 > ColdCandidates;
998
991
for (auto &I : BB.getInstList ()) {
999
992
const FunctionSamples *FS = nullptr ;
1000
- if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
1001
- !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples (I))) {
1002
- AllCandidates.push_back (&I);
1003
- if (FS->getEntrySamples () > 0 )
1004
- localNotInlinedCallSites.try_emplace (&I, FS);
1005
- if (callsiteIsHot (FS, PSI))
1006
- Hot = true ;
1007
- else if (shouldInlineColdCallee (I))
1008
- ColdCandidates.push_back (&I);
993
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
994
+ if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples (*CB))) {
995
+ AllCandidates.push_back (CB);
996
+ if (FS->getEntrySamples () > 0 )
997
+ localNotInlinedCallSites.try_emplace (CB, FS);
998
+ if (callsiteIsHot (FS, PSI))
999
+ Hot = true ;
1000
+ else if (shouldInlineColdCallee (*CB))
1001
+ ColdCandidates.push_back (CB);
1002
+ }
1009
1003
}
1010
1004
}
1011
1005
if (Hot) {
1012
1006
CIS.insert (CIS.begin (), AllCandidates.begin (), AllCandidates.end ());
1013
1007
emitOptimizationRemarksForInlineCandidates (AllCandidates, F, true );
1014
- }
1015
- else {
1008
+ } else {
1016
1009
CIS.insert (CIS.begin (), ColdCandidates.begin (), ColdCandidates.end ());
1017
1010
emitOptimizationRemarksForInlineCandidates (ColdCandidates, F, false );
1018
1011
}
1019
1012
}
1020
- for (auto I : CIS) {
1021
- Function *CalledFunction = cast<CallBase>(I) ->getCalledFunction ();
1013
+ for (CallBase * I : CIS) {
1014
+ Function *CalledFunction = I ->getCalledFunction ();
1022
1015
// Do not inline recursive calls.
1023
1016
if (CalledFunction == &F)
1024
1017
continue ;
1025
- if (cast<CallBase>(I) ->isIndirectCall ()) {
1018
+ if (I ->isIndirectCall ()) {
1026
1019
if (PromotedInsns.count (I))
1027
1020
continue ;
1028
1021
uint64_t Sum;
@@ -1049,15 +1042,15 @@ bool SampleProfileLoader::inlineHotFunctions(
1049
1042
if (R != SymbolMap.end () && R->getValue () &&
1050
1043
!R->getValue ()->isDeclaration () &&
1051
1044
R->getValue ()->getSubprogram () &&
1052
- isLegalToPromote (*cast<CallBase>(I) , R->getValue (), &Reason)) {
1045
+ isLegalToPromote (*I , R->getValue (), &Reason)) {
1053
1046
uint64_t C = FS->getEntrySamples ();
1054
1047
Instruction *DI =
1055
1048
pgo::promoteIndirectCall (I, R->getValue (), C, Sum, false , ORE);
1056
1049
Sum -= C;
1057
1050
PromotedInsns.insert (I);
1058
1051
// If profile mismatches, we should not attempt to inline DI.
1059
1052
if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
1060
- inlineCallInstruction (DI )) {
1053
+ inlineCallInstruction (*cast<CallBase>(DI) )) {
1061
1054
localNotInlinedCallSites.erase (I);
1062
1055
LocalChanged = true ;
1063
1056
++NumCSInlined;
@@ -1070,7 +1063,7 @@ bool SampleProfileLoader::inlineHotFunctions(
1070
1063
}
1071
1064
} else if (CalledFunction && CalledFunction->getSubprogram () &&
1072
1065
!CalledFunction->isDeclaration ()) {
1073
- if (inlineCallInstruction (I)) {
1066
+ if (inlineCallInstruction (* I)) {
1074
1067
localNotInlinedCallSites.erase (I);
1075
1068
LocalChanged = true ;
1076
1069
++NumCSInlined;
@@ -1089,8 +1082,8 @@ bool SampleProfileLoader::inlineHotFunctions(
1089
1082
1090
1083
// Accumulate not inlined callsite information into notInlinedSamples
1091
1084
for (const auto &Pair : localNotInlinedCallSites) {
1092
- Instruction *I = Pair.getFirst ();
1093
- Function *Callee = cast<CallBase>(I) ->getCalledFunction ();
1085
+ CallBase *I = Pair.getFirst ();
1086
+ Function *Callee = I ->getCalledFunction ();
1094
1087
if (!Callee || Callee->isDeclaration ())
1095
1088
continue ;
1096
1089
0 commit comments