4 files changed, 69 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 893e4cb2a15..6bf8cdcb849 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -23,6 +23,7 @@ class ModulePass;
 class Pass;
 class Target;
 class TargetMachine;
+class TargetOptions;
 class PassRegistry;
 class Module;
 
@@ -52,7 +53,7 @@ FunctionPass *createSIDebuggerInsertNopsPass();
 FunctionPass *createSIInsertWaitsPass();
 FunctionPass *createSIInsertWaitcntsPass();
 FunctionPass *createSIFixWWMLivenessPass();
-FunctionPass *createAMDGPUSimplifyLibCallsPass();
+FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
 FunctionPass *createAMDGPUUseNativeCallsPass();
 FunctionPass *createAMDGPUCodeGenPreparePass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index d3d5c6dc9d9..e7e54750fe6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 #include <vector>
 #include <cmath>
 
@@ -168,10 +169,13 @@ namespace {
 
   AMDGPULibCalls Simplifier;
 
+  const TargetOptions Options;
+
   public:
     static char ID; // Pass identification
 
-    AMDGPUSimplifyLibCalls() : FunctionPass(ID) {
+    AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
+      : FunctionPass(ID), Options(Opt) {
       initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
     }
 
@@ -1680,14 +1684,34 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
 }
 
 // Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass() {
-  return new AMDGPUSimplifyLibCalls();
+FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
+  return new AMDGPUSimplifyLibCalls(Opt);
 }
 
 FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
   return new AMDGPUUseNativeCalls();
 }
 
+static bool setFastFlags(Function &F, const TargetOptions &Options) {
+  AttrBuilder B;
+
+  if (Options.UnsafeFPMath || Options.NoInfsFPMath)
+    B.addAttribute("no-infs-fp-math", "true");
+  if (Options.UnsafeFPMath || Options.NoNaNsFPMath)
+    B.addAttribute("no-nans-fp-math", "true");
+  if (Options.UnsafeFPMath) {
+    B.addAttribute("less-precise-fpmad", "true");
+    B.addAttribute("unsafe-fp-math", "true");
+  }
+
+  if (!B.hasAttributes())
+    return false;
+
+  F.addAttributes(AttributeList::FunctionIndex, B);
+
+  return true;
+}
+
 bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
@@ -1699,6 +1723,9 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
         F.printAsOperand(dbgs(), false, F.getParent());
         dbgs() << '\n';);
 
+  if (!EnablePreLink)
+    Changed |= setFastFlags(F, Options);
+
   for (auto &BB : F) {
     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
       // Ignore non-calls.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 25d3629cd69..8a6b5aeaebc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -370,17 +370,18 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
         PM.add(createAMDGPUAlwaysInlinePass(false));
   });
 
+  const auto &Opt = Options;
   Builder.addExtension(
     PassManagerBuilder::EP_EarlyAsPossible,
-    [AMDGPUAA, LibCallSimplify](const PassManagerBuilder &,
-                                legacy::PassManagerBase &PM) {
+    [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
+                                      legacy::PassManagerBase &PM) {
       if (AMDGPUAA) {
         PM.add(createAMDGPUAAWrapperPass());
         PM.add(createAMDGPUExternalAAWrapperPass());
       }
       PM.add(llvm::createAMDGPUUseNativeCallsPass());
       if (LibCallSimplify)
-        PM.add(llvm::createAMDGPUSimplifyLibCallsPass());
+        PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
   });
 
   Builder.addExtension(
diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
new file mode 100644
index 00000000000..6f6b5f4c0b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
@@ -0,0 +1,33 @@
+; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-unsafe-fp-math %s  | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s
+
+; GCN: define float @foo(float %x) local_unnamed_addr #0 {
+; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 {
+; GCN: %mul.i = fmul float %load, 1.500000e+01
+
+; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+; UNSAFE: attributes #1 = { norecurse nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+
+; NOINFS: attributes #0 = { norecurse nounwind readnone "no-infs-fp-math"="true" }
+; NOINFS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
+
+; NONANS: attributes #0 = { norecurse nounwind readnone "no-nans-fp-math"="true" }
+; NONANS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
+
+define float @foo(float %x) #0 {
+entry:
+  %mul = fmul float %x, 1.500000e+01
+  ret float %mul
+}
+
+define amdgpu_kernel void @caller(float addrspace(1)* %p) #1 {
+entry:
+  %load = load float, float addrspace(1)* %p, align 4
+  %call = call fast float @foo(float %load) #0
+  store float %call, float addrspace(1)* %p, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }