diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-01-30 21:05:18 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-01-30 21:05:18 +0000 |
commit | a3b72798af09724ab6b5e08a56031b8a06692f0b (patch) | |
tree | b562df297ab3932f91160d5b0106021172697893 /llvm/lib/Target | |
parent | e518e0bfe9326e32eca17bf532e4b58a07e4bc3a (diff) | |
download | bcm5719-llvm-a3b72798af09724ab6b5e08a56031b8a06692f0b.tar.gz bcm5719-llvm-a3b72798af09724ab6b5e08a56031b8a06692f0b.zip |
[AMDGPU] Internalize non-kernel symbols
Since we have no call support and late linking we can produce code
only for used symbols. This saves compilation time, size of the final
executable, and size of any intermediate dumps.
Run Internalize pass early in the opt pipeline followed by global
DCE pass. To enable it RT can pass -amdgpu-internalize-symbols option.
Differential Revision: https://reviews.llvm.org/D29214
llvm-svn: 293549
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 9cc30a8545c..d6b4a39e74b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -84,6 +84,13 @@ static cl::opt<bool> ScalarizeGlobal( cl::init(false), cl::Hidden); +// Option to run internalize pass. +static cl::opt<bool> InternalizeSymbols( + "amdgpu-internalize-symbols", + cl::desc("Enable elimination of non-kernel functions and unused globals"), + cl::init(false), + cl::Hidden); + extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget()); @@ -207,11 +214,35 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const { } void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { + bool Internalize = InternalizeSymbols && + (getOptLevel() > CodeGenOpt::None) && + (getTargetTriple().getArch() == Triple::amdgcn); Builder.addExtension( PassManagerBuilder::EP_ModuleOptimizerEarly, - [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + [Internalize](const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(createAMDGPUUnifyMetadataPass()); - }); + if (Internalize) { + PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool { + if (const Function *F = dyn_cast<Function>(&GV)) { + if (F->isDeclaration()) + return true; + switch (F->getCallingConv()) { + default: + return false; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return true; + } + } + return !GV.use_empty(); + })); + PM.add(createGlobalDCEPass()); + } + }); } //===----------------------------------------------------------------------===// |