diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/stress-calls.ll | 36 | 
2 files changed, 49 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index 6f3742ed039..c27425443ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -21,6 +21,12 @@ using namespace llvm;  namespace { +static cl::opt<bool> StressCalls( +  "amdgpu-stress-function-calls", +  cl::Hidden, +  cl::desc("Force all functions to be noinline"), +  cl::init(false)); +  class AMDGPUAlwaysInline : public ModulePass {    bool GlobalOpt; @@ -57,9 +63,13 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {      }    } +  auto NewAttr = StressCalls ? Attribute::NoInline : Attribute::AlwaysInline; +  auto IncompatAttr +    = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; +    for (Function &F : M) {      if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && -        !F.hasFnAttribute(Attribute::NoInline)) +        !F.hasFnAttribute(IncompatAttr))        FuncsToClone.push_back(&F);    } @@ -71,8 +81,8 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {    }    for (Function &F : M) { -    if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { -      F.addFnAttr(Attribute::AlwaysInline); +    if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) { +      F.addFnAttr(NewAttr);      }    }    return false; diff --git a/llvm/test/CodeGen/AMDGPU/stress-calls.ll b/llvm/test/CodeGen/AMDGPU/stress-calls.ll new file mode 100644 index 00000000000..480d40d67d4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/stress-calls.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck %s + +; CHECK: define internal fastcc i32 @alwaysinline_func(i32 %a) #0 { +define internal fastcc i32 @alwaysinline_func(i32 %a) alwaysinline { +entry: +  %tmp0 = add i32 %a, 1 +  ret i32 %tmp0 +} + +; CHECK: define internal fastcc i32 @noinline_func(i32 %a) #1 { +define internal fastcc i32 @noinline_func(i32 %a) noinline { +entry: +  %tmp0 = add i32 %a, 2 +  ret i32 %tmp0 +} + +; CHECK: define internal fastcc i32 @unmarked_func(i32 %a) #1 { +define internal fastcc i32 @unmarked_func(i32 %a) { +entry: +  %tmp0 = add i32 %a, 3 +  ret i32 %tmp0 +} + +define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) { +entry: +  %tmp0 = call i32 @alwaysinline_func(i32 1) +  store volatile i32 %tmp0, i32 addrspace(1)* %out +  %tmp1 = call i32 @noinline_func(i32 1) +  store volatile i32 %tmp1, i32 addrspace(1)* %out +  %tmp2 = call i32 @unmarked_func(i32 1) +  store volatile i32 %tmp2, i32 addrspace(1)* %out +  ret void +} + +; CHECK: attributes #0 = { alwaysinline } +; CHECK: attributes #1 = { noinline }  | 

