diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/stress-calls.ll | 36 |
2 files changed, 49 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index 6f3742ed039..c27425443ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -21,6 +21,12 @@ using namespace llvm; namespace { +static cl::opt<bool> StressCalls( + "amdgpu-stress-function-calls", + cl::Hidden, + cl::desc("Force all functions to be noinline"), + cl::init(false)); + class AMDGPUAlwaysInline : public ModulePass { bool GlobalOpt; @@ -57,9 +63,13 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) { } } + auto NewAttr = StressCalls ? Attribute::NoInline : Attribute::AlwaysInline; + auto IncompatAttr + = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; + for (Function &F : M) { if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && - !F.hasFnAttribute(Attribute::NoInline)) + !F.hasFnAttribute(IncompatAttr)) FuncsToClone.push_back(&F); } @@ -71,8 +81,8 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) { } for (Function &F : M) { - if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { - F.addFnAttr(Attribute::AlwaysInline); + if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) { + F.addFnAttr(NewAttr); } } return false; diff --git a/llvm/test/CodeGen/AMDGPU/stress-calls.ll b/llvm/test/CodeGen/AMDGPU/stress-calls.ll new file mode 100644 index 00000000000..480d40d67d4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/stress-calls.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck %s + +; CHECK: define internal fastcc i32 @alwaysinline_func(i32 %a) #0 { +define internal fastcc i32 @alwaysinline_func(i32 %a) alwaysinline { +entry: + %tmp0 = add i32 %a, 1 + ret i32 %tmp0 +} + +; CHECK: define internal fastcc i32 @noinline_func(i32 %a) #1 { +define internal fastcc i32 @noinline_func(i32 %a) noinline { +entry: + %tmp0 = add i32 %a, 2 + ret i32 %tmp0 +} + +; CHECK: define internal fastcc i32 @unmarked_func(i32 %a) #1 { +define internal fastcc i32 @unmarked_func(i32 %a) { +entry: + %tmp0 = add i32 %a, 3 + ret i32 %tmp0 +} + +define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) { +entry: + %tmp0 = call i32 @alwaysinline_func(i32 1) + store volatile i32 %tmp0, i32 addrspace(1)* %out + %tmp1 = call i32 @noinline_func(i32 1) + store volatile i32 %tmp1, i32 addrspace(1)* %out + %tmp2 = call i32 @unmarked_func(i32 1) + store volatile i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; CHECK: attributes #0 = { alwaysinline } +; CHECK: attributes #1 = { noinline } |