12 files changed, 163 insertions, 35 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index d4166216e49..49ded73aaf6 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -13021,6 +13021,41 @@ The value produced is the floating point remainder from the division of the two
 value operands and has the same type as the operands.  The remainder has the
 same sign as the dividend. 
 
+'``llvm.experimental.constrained.fma``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare <type>
+      @llvm.experimental.constrained.fma(<type> <op1>, <type> <op2>, <type> <op3>,
+                                          metadata <rounding mode>,
+                                          metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.fma``' intrinsic returns the result of a
+fused-multiply-add operation on its operands.
+
+Arguments:
+""""""""""
+
+The first three arguments to the '``llvm.experimental.constrained.fma``'
+intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
+<t_vector>` of floating point values. All arguments must have identical types.
+
+The fourth and fifth arguments specify the rounding mode and exception behavior
+as described above.
+
+Semantics:
+""""""""""
+
+The result produced is the product of the first two operands added to the third
+operand computed with infinite precision, and then rounded to the target
+precision.
 
 Constrained libm-equivalent Intrinsics
 --------------------------------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index bc5d2353f63..ffcf3fa62f9 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -263,6 +263,7 @@ namespace ISD {
     /// They are used to limit optimizations while the DAG is being
     /// optimized.
     STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
+    STRICT_FMA,
 
     /// Constrained versions of libm-equivalent floating point intrinsics.
     /// These will be lowered to the equivalent non-constrained pseudo-op
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index db42fb6c170..3f88ed2747c 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -623,13 +623,14 @@ public:
   /// Test if this node is a strict floating point pseudo-op.
   bool isStrictFPOpcode() {
     switch (NodeType) {
-      default: 
+      default:
         return false;
       case ISD::STRICT_FADD:
       case ISD::STRICT_FSUB:
       case ISD::STRICT_FMUL:
       case ISD::STRICT_FDIV:
       case ISD::STRICT_FREM:
+      case ISD::STRICT_FMA:
       case ISD::STRICT_FSQRT:
       case ISD::STRICT_FPOW:
       case ISD::STRICT_FPOWI:
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index aad49bc9ac3..f58ee217a7c 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -167,6 +167,7 @@ namespace llvm {
     };
 
     bool isUnaryOp() const;
+    bool isTernaryOp() const;
     RoundingMode getRoundingMode() const;
     ExceptionBehavior getExceptionBehavior() const;
 
@@ -178,6 +179,7 @@ namespace llvm {
       case Intrinsic::experimental_constrained_fmul:
       case Intrinsic::experimental_constrained_fdiv:
       case Intrinsic::experimental_constrained_frem:
+      case Intrinsic::experimental_constrained_fma:
       case Intrinsic::experimental_constrained_sqrt:
       case Intrinsic::experimental_constrained_pow:
       case Intrinsic::experimental_constrained_powi:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e2cd40b372d..55b284a1076 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -490,6 +490,13 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
                                                       llvm_metadata_ty,
                                                       llvm_metadata_ty ]>;
 
+  def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ],
+                                                    [ LLVMMatchType<0>,
+                                                      LLVMMatchType<0>,
+                                                      LLVMMatchType<0>,
+                                                      llvm_metadata_ty,
+                                                      llvm_metadata_ty ]>;
+
   // These intrinsics are sensitive to the rounding mode so we need constrained
   // versions of each of them.  When strict rounding and exception control are
   // not required the non-constrained versions of these intrinsics should be
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7a208cb53ae..e0825e8e8e3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -907,6 +907,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
     case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
     case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
     case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+    case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
     case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
     case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
     case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
@@ -1072,6 +1073,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     }
     break;
   case ISD::STRICT_FSQRT:
+  case ISD::STRICT_FMA:
   case ISD::STRICT_FPOW:
   case ISD::STRICT_FPOWI:
   case ISD::STRICT_FSIN:
@@ -1240,7 +1242,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
       // If the index is dependent on the store we will introduce a cycle when
       // creating the load (the load uses the index, and by replacing the chain
       // we will make the index dependent on the load). Also, the store might be
-      // dependent on the extractelement and introduce a cycle when creating 
+      // dependent on the extractelement and introduce a cycle when creating
       // the load.
       if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
           ST->hasPredecessor(Op.getNode()))
@@ -4065,6 +4067,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
                                       RTLIB::FMA_F80, RTLIB::FMA_F128,
                                       RTLIB::FMA_PPCF128));
+  case ISD::STRICT_FMA:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
+                                      RTLIB::FMA_PPCF128));
     break;
   case ISD::FADD:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b4154210329..d00ecafd580 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6695,6 +6695,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
   unsigned OrigOpc = Node->getOpcode();
   unsigned NewOpc;
   bool IsUnary = false;
+  bool IsTernary = false;
   switch (OrigOpc) {
   default:
     llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
@@ -6703,6 +6704,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
   case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
   case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
   case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+  case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
   case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
   case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
   case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
@@ -6729,6 +6731,10 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
   SDNode *Res = nullptr;
   if (IsUnary)
     Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+  else if (IsTernary)
+    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+                                           Node->getOperand(2),
+                                           Node->getOperand(3)});
   else
     Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
                                            Node->getOperand(2) });
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fc441b8d857..1371f52187c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5432,6 +5432,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::experimental_constrained_fmul:
   case Intrinsic::experimental_constrained_fdiv:
   case Intrinsic::experimental_constrained_frem:
+  case Intrinsic::experimental_constrained_fma:
   case Intrinsic::experimental_constrained_sqrt:
   case Intrinsic::experimental_constrained_pow:
   case Intrinsic::experimental_constrained_powi:
@@ -5963,6 +5964,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   case Intrinsic::experimental_constrained_frem:
     Opcode = ISD::STRICT_FREM;
     break;
+  case Intrinsic::experimental_constrained_fma:
+    Opcode = ISD::STRICT_FMA;
+    break;
   case Intrinsic::experimental_constrained_sqrt:
     Opcode = ISD::STRICT_FSQRT;
     break;
@@ -6009,10 +6013,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   SDVTList VTs = DAG.getVTList(ValueVTs);
   SDValue Result;
   if (FPI.isUnaryOp())
-    Result = DAG.getNode(Opcode, sdl, VTs, 
+    Result = DAG.getNode(Opcode, sdl, VTs,
                          { Chain, getValue(FPI.getArgOperand(0)) });
+  else if (FPI.isTernaryOp())
+    Result = DAG.getNode(Opcode, sdl, VTs,
+                         { Chain, getValue(FPI.getArgOperand(0)),
+                                  getValue(FPI.getArgOperand(1)),
+                                  getValue(FPI.getArgOperand(2)) });
   else
-    Result = DAG.getNode(Opcode, sdl, VTs, 
+    Result = DAG.getNode(Opcode, sdl, VTs,
                          { Chain, getValue(FPI.getArgOperand(0)),
                            getValue(FPI.getArgOperand(1))  });
 
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 8b12c55937f..67bd5b69bb0 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -14,10 +14,10 @@
 // are all subclasses of the CallInst class.  Note that none of these classes
 // has state or virtual methods, which is an important part of this gross/neat
 // hack working.
-// 
+//
 // In some cases, arguments to intrinsics need to be generic and are defined as
 // type pointer to empty struct { }*.  To access the real item of interest the
-// cast instruction needs to be stripped away. 
+// cast instruction needs to be stripped away.
 //
 //===----------------------------------------------------------------------===//
 
@@ -98,7 +98,7 @@ Value *InstrProfIncrementInst::getStep() const {
 ConstrainedFPIntrinsic::RoundingMode
 ConstrainedFPIntrinsic::getRoundingMode() const {
   unsigned NumOperands = getNumArgOperands();
-  Metadata *MD = 
+  Metadata *MD =
       dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
   if (!MD || !isa<MDString>(MD))
     return rmInvalid;
@@ -118,7 +118,7 @@ ConstrainedFPIntrinsic::getRoundingMode() const {
 ConstrainedFPIntrinsic::ExceptionBehavior
 ConstrainedFPIntrinsic::getExceptionBehavior() const {
   unsigned NumOperands = getNumArgOperands();
-  Metadata *MD = 
+  Metadata *MD =
       dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
   if (!MD || !isa<MDString>(MD))
     return ebInvalid;
@@ -132,7 +132,7 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const {
 
 bool ConstrainedFPIntrinsic::isUnaryOp() const {
   switch (getIntrinsicID()) {
-    default: 
+    default:
       return false;
     case Intrinsic::experimental_constrained_sqrt:
     case Intrinsic::experimental_constrained_sin:
@@ -147,3 +147,13 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
       return true;
   }
 }
+
+bool ConstrainedFPIntrinsic::isTernaryOp() const {
+  switch (getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::experimental_constrained_fma:
+      return true;
+  }
+}
+
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index c5cee49c0c2..0cd6dbb0664 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3973,6 +3973,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
   case Intrinsic::experimental_constrained_fmul:
   case Intrinsic::experimental_constrained_fdiv:
   case Intrinsic::experimental_constrained_frem:
+  case Intrinsic::experimental_constrained_fma:
   case Intrinsic::experimental_constrained_sqrt:
   case Intrinsic::experimental_constrained_pow:
   case Intrinsic::experimental_constrained_powi:
@@ -4433,8 +4434,9 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
 
 void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
   unsigned NumOperands = FPI.getNumArgOperands();
-  Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
-         "invalid arguments for constrained FP intrinsic", &FPI);
+  Assert(((NumOperands == 5 && FPI.isTernaryOp()) ||
+          (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
+           "invalid arguments for constrained FP intrinsic", &FPI);
   Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
          "invalid exception behavior argument", &FPI);
   Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index 0f8d730d753..eae3955adc3 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s
 
 ; Verify that constants aren't folded to inexact results when the rounding mode
 ; is unknown.
@@ -9,7 +10,7 @@
 ; }
 ;
 ; CHECK-LABEL: f1
-; CHECK: divsd
+; COMMON: divsd
 define double @f1() {
 entry:
   %div = call double @llvm.experimental.constrained.fdiv.f64(
@@ -29,7 +30,7 @@ entry:
 ; }
 ;
 ; CHECK-LABEL: f2
-; CHECK:  subsd
+; COMMON:  subsd
 define double @f2(double %a) {
 entry:
   %div = call double @llvm.experimental.constrained.fsub.f64(
@@ -50,9 +51,9 @@ entry:
 ; }
 ;
 ; CHECK-LABEL: f3:
-; CHECK:  subsd
-; CHECK:  mulsd
-; CHECK:  subsd
+; COMMON:  subsd
+; COMMON:  mulsd
+; COMMON:  subsd
 define double @f3(double %a, double %b) {
 entry:
   %sub = call double @llvm.experimental.constrained.fsub.f64(
@@ -81,11 +82,11 @@ entry:
 ;   return a;
 ; }
 ;
-; 
+;
 ; CHECK-LABEL: f4:
-; CHECK: testl
-; CHECK: jle
-; CHECK: addsd
+; COMMON: testl
+; COMMON: jle
+; COMMON: addsd
 define double @f4(i32 %n, double %a) {
 entry:
   %cmp = icmp sgt i32 %n, 0
@@ -105,7 +106,7 @@ if.end:
 
 ; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f5
-; CHECK:  sqrtsd
+; COMMON:  sqrtsd
 define double @f5() {
 entry:
   %result = call double @llvm.experimental.constrained.sqrt.f64(double 42.0,
@@ -116,7 +117,7 @@ entry:
 
 ; Verify that pow(42.1, 3.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f6
-; CHECK:  pow
+; COMMON:  pow
 define double @f6() {
 entry:
   %result = call double @llvm.experimental.constrained.pow.f64(double 42.1,
@@ -128,7 +129,7 @@ entry:
 
 ; Verify that powi(42.1, 3) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f7
-; CHECK:  powi
+; COMMON:  powi
 define double @f7() {
 entry:
   %result = call double @llvm.experimental.constrained.powi.f64(double 42.1,
@@ -140,7 +141,7 @@ entry:
 
 ; Verify that sin(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f8
-; CHECK:  sin
+; COMMON:  sin
 define double @f8() {
 entry:
   %result = call double @llvm.experimental.constrained.sin.f64(double 42.0,
@@ -151,7 +152,7 @@ entry:
 
 ; Verify that cos(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f9
-; CHECK:  cos
+; COMMON:  cos
 define double @f9() {
 entry:
   %result = call double @llvm.experimental.constrained.cos.f64(double 42.0,
@@ -162,7 +163,7 @@ entry:
 
 ; Verify that exp(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f10
-; CHECK:  exp
+; COMMON:  exp
 define double @f10() {
 entry:
   %result = call double @llvm.experimental.constrained.exp.f64(double 42.0,
@@ -173,7 +174,7 @@ entry:
 
 ; Verify that exp2(42.1) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f11
-; CHECK:  exp2
+; COMMON:  exp2
 define double @f11() {
 entry:
   %result = call double @llvm.experimental.constrained.exp2.f64(double 42.1,
@@ -184,7 +185,7 @@ entry:
 
 ; Verify that log(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f12
-; CHECK:  log
+; COMMON:  log
 define double @f12() {
 entry:
   %result = call double @llvm.experimental.constrained.log.f64(double 42.0,
@@ -195,7 +196,7 @@ entry:
 
 ; Verify that log10(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f13
-; CHECK:  log10
+; COMMON:  log10
 define double @f13() {
 entry:
   %result = call double @llvm.experimental.constrained.log10.f64(double 42.0,
@@ -206,7 +207,7 @@ entry:
 
 ; Verify that log2(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f14
-; CHECK:  log2
+; COMMON:  log2
 define double @f14() {
 entry:
   %result = call double @llvm.experimental.constrained.log2.f64(double 42.0,
@@ -217,7 +218,8 @@ entry:
 
 ; Verify that rint(42.1) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f15
-; CHECK:  rint
+; NO-FMA:  rint
+; HAS-FMA: vroundsd
 define double @f15() {
 entry:
   %result = call double @llvm.experimental.constrained.rint.f64(double 42.1,
@@ -229,7 +231,8 @@ entry:
 ; Verify that nearbyint(42.1) isn't simplified when the rounding mode is
 ; unknown.
 ; CHECK-LABEL: f16
-; CHECK:  nearbyint
+; NO-FMA:  nearbyint
+; HAS-FMA: vroundsd
 define double @f16() {
 entry:
   %result = call double @llvm.experimental.constrained.nearbyint.f64(
@@ -239,6 +242,38 @@ entry:
   ret double %result
 }
 
+; Verify that fma(3.5) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; FMACALL32: jmp fmaf  # TAILCALL
+; FMA32: vfmadd213ss
+define float @f17() {
+entry:
+  %result = call float @llvm.experimental.constrained.fma.f32(
+                                               float 3.5,
+                                               float 3.5,
+                                               float 3.5,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret float %result
+}
+
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f18
+; FMACALL64: jmp fma  # TAILCALL
+; FMA64: vfmadd213sd
+define double @f18() {
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(
+                                               double 42.1,
+                                               double 42.1,
+                                               double 42.1,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret double %result
+}
+
 @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@@ -256,3 +291,5 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
 declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll
index f21ba15b2d4..e1b253d1910 100644
--- a/llvm/test/Feature/fp-intrinsics.ll
+++ b/llvm/test/Feature/fp-intrinsics.ll
@@ -73,7 +73,7 @@ entry:
 ;   return a;
 ; }
 ;
-; 
+;
 ; CHECK-LABEL: @f4
 ; CHECK-NOT: select
 ; CHECK: br i1 %cmp
@@ -94,7 +94,6 @@ if.end:
   ret double %a.0
 }
 
-
 ; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f5
 ; CHECK: call double @llvm.experimental.constrained.sqrt
@@ -231,6 +230,18 @@ entry:
   ret double %result
 }
 
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; CHECK: call double @llvm.experimental.constrained.fma
+define double @f17() {
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(double 42.1, double 42.1, double 42.1,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret double %result
+}
+
 @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@@ -248,3 +259,4 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
 declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)