aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Smith <peter.smith@linaro.org>2018-06-22 15:29:29 +0100
committerPeter Smith <peter.smith@linaro.org>2018-10-26 15:05:14 +0100
commit08c52b4d9ae89117c117cabfd962d5746c19addb (patch)
tree8cadbb1619ba5d9b8849594abc20935c426c484f
parentd13f96252bbeb1aff2fd0374aa8fd1544a53752b (diff)
Add pass to mark cold and non-hot functions for size optimization.linaro-local/peter.smith/rebased-pgo
The MarkCold pass marks functions that the ProfileSummaryInfo has identified as having all cold basic block to be optimized for code size. The pass can be run before inlining with -fprofile-opt-cold-for-size-early or after with -fprofile-opt-cold-for-size. When -fprofile-sample-accurate is on then when using a sample profile then no profile information will imply cold in callgraph. When -fmark-neutral-cold is on then we will mark functions that are neither hot or cold as OptimizeForSize and cold functions as MinimalSize.
-rw-r--r--include/llvm/IR/Attributes.td1
-rw-r--r--include/llvm/InitializePasses.h1
-rw-r--r--include/llvm/LinkAllPasses.h1
-rw-r--r--include/llvm/Passes/PassBuilder.h19
-rw-r--r--include/llvm/Transforms/IPO/PassManagerBuilder.h4
-rw-r--r--include/llvm/Transforms/Scalar.h7
-rw-r--r--include/llvm/Transforms/Scalar/MarkCold.h35
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp4
-rw-r--r--lib/Passes/PassBuilder.cpp18
-rw-r--r--lib/Passes/PassRegistry.def1
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp7
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/MarkCold.cpp129
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
14 files changed, 219 insertions, 10 deletions
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td
index e786d85d05a..d54d30b4a56 100644
--- a/include/llvm/IR/Attributes.td
+++ b/include/llvm/IR/Attributes.td
@@ -206,6 +206,7 @@ def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">;
def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">;
def NoJumpTables : StrBoolAttr<"no-jump-tables">;
def ProfileSampleAccurate : StrBoolAttr<"profile-sample-accurate">;
+def MarkNeutralCold : StrBoolAttr<"mark-neutral-cold">;
class CompatRule<string F> {
// The name of the function called to check the attribute of the caller and
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 42bfc55b1aa..f9f74e887bb 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -265,6 +265,7 @@ void initializeMachineSchedulerPass(PassRegistry&);
void initializeMachineSinkingPass(PassRegistry&);
void initializeMachineTraceMetricsPass(PassRegistry&);
void initializeMachineVerifierPassPass(PassRegistry&);
+void initializeMarkColdLegacyPassPass(PassRegistry&);
void initializeMemCpyOptLegacyPassPass(PassRegistry&);
void initializeMemDepPrinterPass(PassRegistry&);
void initializeMemDerefPrinterPass(PassRegistry&);
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index e2378cea90c..a6ea48b8607 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -141,6 +141,7 @@ namespace {
(void) llvm::createLowerExpectIntrinsicPass();
(void) llvm::createLowerInvokePass();
(void) llvm::createLowerSwitchPass();
+ (void) llvm::createMarkColdPass();
(void) llvm::createNaryReassociatePass();
(void) llvm::createObjCARCAAWrapperPass();
(void) llvm::createObjCARCAPElimPass();
diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h
index efda64f7861..dfac4ef68b3 100644
--- a/include/llvm/Passes/PassBuilder.h
+++ b/include/llvm/Passes/PassBuilder.h
@@ -34,19 +34,20 @@ class ModuleSummaryIndex;
struct PGOOptions {
PGOOptions(std::string ProfileGenFile = "", std::string ProfileUseFile = "",
std::string SampleProfileFile = "",
- std::string ProfileRemappingFile = "",
- bool RunProfileGen = false, bool SamplePGOSupport = false,
- bool ProfileIsFE = false)
+ std::string ProfileRemappingFile = "", bool RunProfileGen = false,
+ bool SamplePGOSupport = false, bool ProfileIsFE = false,
+ bool MarkColdBeforeInline = false,
+ bool MarkColdAfterInline = false)
: ProfileGenFile(ProfileGenFile), ProfileUseFile(ProfileUseFile),
SampleProfileFile(SampleProfileFile),
ProfileRemappingFile(ProfileRemappingFile),
RunProfileGen(RunProfileGen),
SamplePGOSupport(SamplePGOSupport || !SampleProfileFile.empty()),
- ProfileIsFE(ProfileIsFE) {
- assert((RunProfileGen ||
- !SampleProfileFile.empty() ||
- !ProfileUseFile.empty() ||
- SamplePGOSupport) && "Illegal PGOOptions.");
+ ProfileIsFE(ProfileIsFE), MarkColdBeforeInline(MarkColdBeforeInline),
+ MarkColdAfterInline(MarkColdAfterInline) {
+ assert((RunProfileGen || !SampleProfileFile.empty() ||
+ !ProfileUseFile.empty() || SamplePGOSupport) &&
+ "Illegal PGOOptions.");
}
std::string ProfileGenFile;
std::string ProfileUseFile;
@@ -55,6 +56,8 @@ struct PGOOptions {
bool RunProfileGen;
bool SamplePGOSupport;
bool ProfileIsFE;
+ bool MarkColdBeforeInline;
+ bool MarkColdAfterInline;
};
/// This class provides access to building LLVM's passes.
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 276306f686f..2158ceca372 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -166,6 +166,10 @@ public:
std::string PGOInstrUse;
/// Path of the sample Profile data file.
std::string PGOSampleUse;
+ /// Mark functions as cold before inlining
+ bool EnableMarkColdBeforeInline;
+ /// Mark functions as cold after inlining
+ bool EnableMarkColdAfterInline;
private:
/// ExtensionList - This is list of all of the extensions that are registered.
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index fe4ff621c6f..dcacb96b162 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -301,6 +301,13 @@ FunctionPass *createGVNSinkPass();
//===----------------------------------------------------------------------===//
//
+// MarkCold - This pass marks cold functions as needing optimization for size
+// or minimum size.
+//
+FunctionPass *createMarkColdPass();
+
+//===----------------------------------------------------------------------===//
+//
// MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads
// are hoisted into the header, while stores sink into the footer.
//
diff --git a/include/llvm/Transforms/Scalar/MarkCold.h b/include/llvm/Transforms/Scalar/MarkCold.h
new file mode 100644
index 00000000000..74295737482
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/MarkCold.h
@@ -0,0 +1,35 @@
+//===- MarkCold.cpp - Mark cold functions for size optimizations ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// See the comments on MarkColdPass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_MARK_COLD_H
+#define LLVM_TRANSFORMS_MARK_COLD_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class ProfileSummaryInfo;
+class BlockFrequencyInfo;
+
+class MarkColdPass : public PassInfoMixin<MarkColdPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ // Glue for old PM.
+ bool runImpl(Function &F, ProfileSummaryInfo *PSI, BlockFrequencyInfo& BFI);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_MARK_COLD_H
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index aeaa5172b3e..782ea68e787 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -152,7 +152,9 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F,
bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
BlockFrequencyInfo &BFI) {
if (!F || !computeSummary())
- return false;
+ return F && hasSampleProfile() &&
+ (F->hasFnAttribute("profile-sample-accurate") ||
+ ProfileSampleAccurate);
if (auto FunctionCount = F->getEntryCount())
if (!isColdCount(FunctionCount.getCount()))
return false;
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 698f1537a0f..e84caf78900 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -131,6 +131,7 @@
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+#include "llvm/Transforms/Scalar/MarkCold.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
@@ -563,7 +564,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
MPM.addPass(InstrProfiling(Options));
}
- if (!ProfileUseFile.empty())
+ if (!ProfileUseFile.empty() && !ProfileIsFE)
MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile));
}
@@ -677,6 +678,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// the inliner pass.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ // Add the MarkCold pass to limit the amount of inlining for some functions
+ if (PGOOpt && PGOOpt->MarkColdBeforeInline) {
+ FunctionPassManager MarkFPM(DebugLogging);
+ MarkFPM.addPass(MarkColdPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MarkFPM)));
+ }
+
// Now begin the main postorder CGSCC pipeline.
// FIXME: The current CGSCC pipeline has its origins in the legacy pass
// manager and trying to emulate its precise behavior. Much of this doesn't
@@ -729,6 +737,14 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
std::move(MainCGPipeline), MaxDevirtIterations)));
+ // Add the MarkCold after the inliner, trusting that the CGSCC passes have
+ // used the profile information wisely.
+ if (PGOOpt && PGOOpt->MarkColdAfterInline) {
+ FunctionPassManager MarkFPM(DebugLogging);
+ MarkFPM.addPass(MarkColdPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MarkFPM)));
+ }
+
return MPM;
}
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def
index ad03942fb9a..e8247e3f534 100644
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -180,6 +180,7 @@ FUNCTION_PASS("gvn", GVN())
FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
FUNCTION_PASS("loop-sink", LoopSinkPass())
FUNCTION_PASS("lowerinvoke", LowerInvokePass())
+FUNCTION_PASS("markcold",MarkColdPass())
FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 19ff2a21cd2..98fe7b82211 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -176,6 +176,8 @@ PassManagerBuilder::PassManagerBuilder() {
MergeFunctions = false;
PrepareForLTO = false;
EnablePGOInstrGen = RunPGOInstrGen;
+ EnableMarkColdBeforeInline = false;
+ EnableMarkColdAfterInline = false;
PGOInstrGen = PGOOutputFile;
PGOInstrUse = RunPGOInstrUse;
PrepareForThinLTO = EnablePrepareForThinLTO;
@@ -523,6 +525,8 @@ void PassManagerBuilder::populateModulePassManager(
// not run it a second time
if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
addPGOInstrPasses(MPM);
+ if (EnableMarkColdBeforeInline)
+ MPM.add(createMarkColdPass());
// We add a module alias analysis pass here. In part due to bugs in the
// analysis infrastructure this "works" in that the analysis stays alive
@@ -624,6 +628,9 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createFloat2IntPass());
+ if (EnableMarkColdAfterInline)
+ MPM.add(createMarkColdPass());
+
addExtensionsToPM(EP_VectorizerStart, MPM);
// Re-rotate loops in all our loop nests. These may have fallout out of
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index fce37d4bffb..ae241e3bb41 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -45,6 +45,7 @@ add_llvm_library(LLVMScalarOpts
LowerAtomic.cpp
LowerExpectIntrinsic.cpp
LowerGuardIntrinsic.cpp
+ MarkCold.cpp
MemCpyOptimizer.cpp
MergeICmps.cpp
MergedLoadStoreMotion.cpp
diff --git a/lib/Transforms/Scalar/MarkCold.cpp b/lib/Transforms/Scalar/MarkCold.cpp
new file mode 100644
index 00000000000..5436f00eff0
--- /dev/null
+++ b/lib/Transforms/Scalar/MarkCold.cpp
@@ -0,0 +1,129 @@
+//===- MarkCold.cpp - Mark cold functions for size optimizations ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This transformation uses profile information to add attributes for size
+/// optimization to functions that are not hot, or contain a hot basic-block.
+/// The intention is to reduce code-size while retaining the majority of
+/// performance.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/MarkCold.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "markcold"
+
+static cl::opt<bool> MarkNeutralCold(
+ "mark-neutral-cold", cl::Hidden, cl::init(false),
+ cl::desc("Mark functions that are neither hot or cold in the profile for"
+ "size optimization. Mark cold functions for minimum size."));
+
+bool MarkColdPass::runImpl(Function &F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo &BFI) {
+ if (!F.hasProfileData())
+ LLVM_DEBUG(dbgs() << "Function does not have profile data\n");
+ bool HotCallGraph = PSI->isFunctionHotInCallGraph(&F, BFI);
+ bool ColdCallGraph = PSI->isFunctionColdInCallGraph(&F, BFI);
+
+ if (HotCallGraph)
+ LLVM_DEBUG(dbgs() << "Function " << F.getName()
+ << " is Hot in CallGraph\n");
+ if (ColdCallGraph)
+ LLVM_DEBUG(dbgs() << "Function " << F.getName()
+ << " is Cold in CallGraph\n");
+
+ if ((MarkNeutralCold || F.hasFnAttribute("mark-neutral-cold")) &&
+ !ColdCallGraph && !HotCallGraph) {
+ LLVM_DEBUG(dbgs() << "Function " << F.getName()
+ << " added OptimizeForSize\n");
+ F.addFnAttr(Attribute::OptimizeForSize);
+ }
+
+ if (ColdCallGraph && !HotCallGraph) {
+ LLVM_DEBUG(dbgs() << "Function " << F.getName() << "added MinSize\n");
+ F.addFnAttr(Attribute::OptimizeForSize);
+ F.addFnAttr(Attribute::MinSize);
+ }
+ return false;
+}
+
+PreservedAnalyses MarkColdPass::run(Function &F, FunctionAnalysisManager &AM) {
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ const LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+ BranchProbabilityInfo BPI(F, LI);
+ BlockFrequencyInfo BFI(F, BPI, LI);
+
+ runImpl(F, PSI, BFI);
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+class MarkColdLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ MarkColdLegacyPass() : FunctionPass(ID) {
+ initializeMarkColdLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &Fn) override {
+ ProfileSummaryInfo *PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BranchProbabilityInfo BPI(Fn, LI);
+ BlockFrequencyInfo BFI(Fn, BPI, LI);
+
+ LLVM_DEBUG(dbgs() << "********** Begin Mark Cold **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
+
+ bool MadeChange = Impl.runImpl(Fn, PSI, BFI);
+ if (MadeChange) {
+ LLVM_DEBUG(dbgs() << "********** Function after Mark Cold: "
+ << Fn.getName() << '\n');
+ LLVM_DEBUG(dbgs() << Fn);
+ }
+ LLVM_DEBUG(dbgs() << "********** End Mark Cold **********\n");
+
+ return MadeChange;
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+
+private:
+ MarkColdPass Impl;
+};
+
+} // namespace
+
+char MarkColdLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(MarkColdLegacyPass, "markcold",
+ "Mark cold functions to be optimized for size", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(MarkColdLegacyPass, "markcold",
+ "Mark cold functions to be optimized for size", false,
+ false)
+
+FunctionPass *llvm::createMarkColdPass() { return new MarkColdLegacyPass(); }
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 1b140acbaee..bd81cdfb57b 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -78,6 +78,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
+ initializeMarkColdLegacyPassPass(Registry);
initializeMemCpyOptLegacyPassPass(Registry);
initializeMergeICmpsPass(Registry);
initializeMergedLoadStoreMotionLegacyPassPass(Registry);