summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp33
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp7
-rw-r--r--llvm/test/CodeGen/AMDGPU/inline-attr.ll33
4 files changed, 69 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 893e4cb2a15..6bf8cdcb849 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -23,6 +23,7 @@ class ModulePass;
class Pass;
class Target;
class TargetMachine;
+class TargetOptions;
class PassRegistry;
class Module;
@@ -52,7 +53,7 @@ FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitsPass();
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createSIFixWWMLivenessPass();
-FunctionPass *createAMDGPUSimplifyLibCallsPass();
+FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index d3d5c6dc9d9..e7e54750fe6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
#include <vector>
#include <cmath>
@@ -168,10 +169,13 @@ namespace {
AMDGPULibCalls Simplifier;
+ const TargetOptions Options;
+
public:
static char ID; // Pass identification
- AMDGPUSimplifyLibCalls() : FunctionPass(ID) {
+ AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
+ : FunctionPass(ID), Options(Opt) {
initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
@@ -1680,14 +1684,34 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
}
// Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass() {
- return new AMDGPUSimplifyLibCalls();
+FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
+ return new AMDGPUSimplifyLibCalls(Opt);
}
FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
return new AMDGPUUseNativeCalls();
}
+static bool setFastFlags(Function &F, const TargetOptions &Options) {
+ AttrBuilder B;
+
+ if (Options.UnsafeFPMath || Options.NoInfsFPMath)
+ B.addAttribute("no-infs-fp-math", "true");
+ if (Options.UnsafeFPMath || Options.NoNaNsFPMath)
+ B.addAttribute("no-nans-fp-math", "true");
+ if (Options.UnsafeFPMath) {
+ B.addAttribute("less-precise-fpmad", "true");
+ B.addAttribute("unsafe-fp-math", "true");
+ }
+
+ if (!B.hasAttributes())
+ return false;
+
+ F.addAttributes(AttributeList::FunctionIndex, B);
+
+ return true;
+}
+
bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -1699,6 +1723,9 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
F.printAsOperand(dbgs(), false, F.getParent());
dbgs() << '\n';);
+ if (!EnablePreLink)
+ Changed |= setFastFlags(F, Options);
+
for (auto &BB : F) {
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
// Ignore non-calls.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 25d3629cd69..8a6b5aeaebc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -370,17 +370,18 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
PM.add(createAMDGPUAlwaysInlinePass(false));
});
+ const auto &Opt = Options;
Builder.addExtension(
PassManagerBuilder::EP_EarlyAsPossible,
- [AMDGPUAA, LibCallSimplify](const PassManagerBuilder &,
- legacy::PassManagerBase &PM) {
+ [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
if (AMDGPUAA) {
PM.add(createAMDGPUAAWrapperPass());
PM.add(createAMDGPUExternalAAWrapperPass());
}
PM.add(llvm::createAMDGPUUseNativeCallsPass());
if (LibCallSimplify)
- PM.add(llvm::createAMDGPUSimplifyLibCallsPass());
+ PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
});
Builder.addExtension(
diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
new file mode 100644
index 00000000000..6f6b5f4c0b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
@@ -0,0 +1,33 @@
+; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-unsafe-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s
+
+; GCN: define float @foo(float %x) local_unnamed_addr #0 {
+; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 {
+; GCN: %mul.i = fmul float %load, 1.500000e+01
+
+; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+; UNSAFE: attributes #1 = { norecurse nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+
+; NOINFS: attributes #0 = { norecurse nounwind readnone "no-infs-fp-math"="true" }
+; NOINFS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
+
+; NONANS: attributes #0 = { norecurse nounwind readnone "no-nans-fp-math"="true" }
+; NONANS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
+
+define float @foo(float %x) #0 {
+entry:
+ %mul = fmul float %x, 1.500000e+01
+ ret float %mul
+}
+
+define amdgpu_kernel void @caller(float addrspace(1)* %p) #1 {
+entry:
+ %load = load float, float addrspace(1)* %p, align 4
+ %call = call fast float @foo(float %load) #0
+ store float %call, float addrspace(1)* %p, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
OpenPOWER on IntegriCloud