summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-01-30 21:05:18 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-01-30 21:05:18 +0000
commita3b72798af09724ab6b5e08a56031b8a06692f0b (patch)
treeb562df297ab3932f91160d5b0106021172697893
parente518e0bfe9326e32eca17bf532e4b58a07e4bc3a (diff)
downloadbcm5719-llvm-a3b72798af09724ab6b5e08a56031b8a06692f0b.tar.gz
bcm5719-llvm-a3b72798af09724ab6b5e08a56031b8a06692f0b.zip
[AMDGPU] Internalize non-kernel symbols
Since we have no call support and late linking we can produce code only for used symbols. This saves compilation time, size of the final executable, and size of any intermediate dumps. Run Internalize pass early in the opt pipeline followed by global DCE pass. To enable it RT can pass -amdgpu-internalize-symbols option. Differential Revision: https://reviews.llvm.org/D29214 llvm-svn: 293549
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp35
-rw-r--r--llvm/test/CodeGen/AMDGPU/internalize.ll35
2 files changed, 68 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 9cc30a8545c..d6b4a39e74b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -84,6 +84,13 @@ static cl::opt<bool> ScalarizeGlobal(
cl::init(false),
cl::Hidden);
+// Option to run internalize pass.
+static cl::opt<bool> InternalizeSymbols(
+ "amdgpu-internalize-symbols",
+ cl::desc("Enable elimination of non-kernel functions and unused globals"),
+ cl::init(false),
+ cl::Hidden);
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -207,11 +214,35 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
}
void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ bool Internalize = InternalizeSymbols &&
+ (getOptLevel() > CodeGenOpt::None) &&
+ (getTargetTriple().getArch() == Triple::amdgcn);
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
- [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ [Internalize](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
PM.add(createAMDGPUUnifyMetadataPass());
- });
+ if (Internalize) {
+ PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
+ if (const Function *F = dyn_cast<Function>(&GV)) {
+ if (F->isDeclaration())
+ return true;
+ switch (F->getCallingConv()) {
+ default:
+ return false;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ }
+ }
+ return !GV.use_empty();
+ }));
+ PM.add(createGlobalDCEPass());
+ }
+ });
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/internalize.ll b/llvm/test/CodeGen/AMDGPU/internalize.ll
new file mode 100644
index 00000000000..5a6669977b8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/internalize.ll
@@ -0,0 +1,35 @@
+; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s
+; CHECK-NOT: unused
+; CHECK-NOT: foo_used
+; CHECK: gvar_used
+; CHECK: main_kernel
+
+@gvar_unused = addrspace(1) global i32 undef, align 4
+@gvar_used = addrspace(1) global i32 undef, align 4
+
+; Function Attrs: alwaysinline nounwind
+define void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 {
+entry:
+ store i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; Function Attrs: alwaysinline nounwind
+define void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 {
+entry:
+ store i32 %tid, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @main_kernel() {
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
+
+attributes #1 = { alwaysinline nounwind }
OpenPOWER on IntegriCloud