Fast Math Flag mapping into SDNode

Summary: Adding support for Fast flags in the SDNode to leverage fast math sub flag usage. Reviewers: spatel, arsenm, jbhateja, hfinkel, escha, qcolombet, echristo, wristow, javed.absar Reviewed By: spatel Subscribers: llvm-commits, rampitec, nhaehnle, tstellar, FarhanaAleen, nemanjai, javed.absar, jbhateja, hfinkel, wdng Differential Revision: https://reviews.llvm.org/D45710 llvm-svn: 331547
author: Michael Berg <michael_c_berg@apple.com> 2018-05-04 18:48:20 +0000
committer: Michael Berg <michael_c_berg@apple.com> 2018-05-04 18:48:20 +0000
commit: 7acc81b74428660efa5156815eebaee8a2ebe075 (patch)
tree: 43d5e80f2e186315ec3bdabe7635c905e792ae5c /llvm
parent: 0e51a125ea091955a1f5e7a7390a3b7953a3e314 (diff)
download: bcm5719-llvm-7acc81b74428660efa5156815eebaee8a2ebe075.tar.gz
bcm5719-llvm-7acc81b74428660efa5156815eebaee8a2ebe075.zip
8 files changed, 53 insertions, 40 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 5a6df4372be..f9dd35745ab 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -359,21 +359,22 @@ private:
   bool NoUnsignedWrap : 1;
   bool NoSignedWrap : 1;
   bool Exact : 1;
-  bool UnsafeAlgebra : 1;
   bool NoNaNs : 1;
   bool NoInfs : 1;
   bool NoSignedZeros : 1;
   bool AllowReciprocal : 1;
   bool VectorReduction : 1;
   bool AllowContract : 1;
+  bool ApproximateFuncs : 1;
+  bool AllowReassociation : 1;
 
 public:
   /// Default constructor turns off all optimization flags.
   SDNodeFlags()
       : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
-        Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false),
+        Exact(false), NoNaNs(false), NoInfs(false),
         NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
-        AllowContract(false) {}
+        AllowContract(false),  ApproximateFuncs(false), AllowReassociation(false) {}
 
   /// Sets the state of the flags to the defined state.
   void setDefined() { AnyDefined = true; }
@@ -393,10 +394,6 @@ public:
     setDefined();
     Exact = b;
   }
-  void setUnsafeAlgebra(bool b) {
-    setDefined();
-    UnsafeAlgebra = b;
-  }
   void setNoNaNs(bool b) {
     setDefined();
     NoNaNs = b;
@@ -421,18 +418,32 @@ public:
     setDefined();
     AllowContract = b;
   }
+  void setApproximateFuncs(bool b) {
+    setDefined();
+    ApproximateFuncs = b;
+  }
+  void setAllowReassociation(bool b) {
+    setDefined();
+    AllowReassociation = b;
+  }
 
   // These are accessors for each flag.
   bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
   bool hasNoSignedWrap() const { return NoSignedWrap; }
   bool hasExact() const { return Exact; }
-  bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
   bool hasNoNaNs() const { return NoNaNs; }
   bool hasNoInfs() const { return NoInfs; }
   bool hasNoSignedZeros() const { return NoSignedZeros; }
   bool hasAllowReciprocal() const { return AllowReciprocal; }
   bool hasVectorReduction() const { return VectorReduction; }
   bool hasAllowContract() const { return AllowContract; }
+  bool hasApproximateFuncs() const { return ApproximateFuncs; }
+  bool hasAllowReassociation() const { return AllowReassociation; }
+
+  bool isFast() const {
+    return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs &&
+           AllowContract && ApproximateFuncs && AllowReassociation;
+  }
 
   /// Clear any flags in this flag set that aren't also set in Flags.
   /// If the given Flags are undefined then don't do anything.
@@ -442,13 +453,14 @@ public:
     NoUnsignedWrap &= Flags.NoUnsignedWrap;
     NoSignedWrap &= Flags.NoSignedWrap;
     Exact &= Flags.Exact;
-    UnsafeAlgebra &= Flags.UnsafeAlgebra;
     NoNaNs &= Flags.NoNaNs;
     NoInfs &= Flags.NoInfs;
     NoSignedZeros &= Flags.NoSignedZeros;
     AllowReciprocal &= Flags.AllowReciprocal;
     VectorReduction &= Flags.VectorReduction;
     AllowContract &= Flags.AllowContract;
+    ApproximateFuncs &= Flags.ApproximateFuncs;
+    AllowReassociation &= Flags.AllowReassociation;
   }
 };
 
@@ -923,6 +935,7 @@ public:
 
   const SDNodeFlags getFlags() const { return Flags; }
   void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
+  bool isFast() { return Flags.isFast(); }
 
   /// Clear any flags in this node that aren't also set in Flags.
   /// If Flags is not in a defined state then this has no effect.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d54c6197356..a6a020e8361 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9542,7 +9542,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
 
 static bool isContractable(SDNode *N) {
   SDNodeFlags F = N->getFlags();
-  return F.hasAllowContract() || F.hasUnsafeAlgebra();
+  return F.hasAllowContract() || F.hasAllowReassociation();
 }
 
 /// Try to perform FMA combining on a given FADD node.
@@ -10567,9 +10567,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
 
   // TODO: FMA nodes should have flags that propagate to the created nodes.
-  // For now, create a Flags object for use with all unsafe math transforms.
+  // For now, create a Flags object for use with reassociation math transforms.
   SDNodeFlags Flags;
-  Flags.setUnsafeAlgebra(true);
+  Flags.setAllowReassociation(true);
 
   if (Options.UnsafeFPMath) {
     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
@@ -10841,9 +10841,9 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
     return SDValue();
 
   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
-  // For now, create a Flags object for use with all unsafe math transforms.
+  // For now, create a Flags object for use with reassociation math transforms.
   SDNodeFlags Flags;
-  Flags.setUnsafeAlgebra(true);
+  Flags.setAllowReassociation(true);
   return buildSqrtEstimate(N0, Flags);
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index bc08e950361..fe4cbfc4bf0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2776,7 +2776,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   Flags.setNoInfs(FMF.noInfs());
   Flags.setNoNaNs(FMF.noNaNs());
   Flags.setNoSignedZeros(FMF.noSignedZeros());
-  Flags.setUnsafeAlgebra(FMF.isFast());
+  Flags.setApproximateFuncs(FMF.approxFunc());
+  Flags.setAllowReassociation(FMF.allowReassoc());
 
   SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
                                      Op1, Op2, Flags);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9862bc45b17..cfe8ed01614 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -479,9 +479,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getFlags().hasExact())
     OS << " exact";
 
-  if (getFlags().hasUnsafeAlgebra())
-    OS << " unsafe";
-
   if (getFlags().hasNoNaNs())
     OS << " nnan";
 
@@ -497,6 +494,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getFlags().hasAllowContract())
     OS << " contract";
 
+  if (getFlags().hasApproximateFuncs())
+    OS << " afn";
+
+  if (getFlags().hasAllowReassociation())
+    OS << " reassoc";
+
   if (getFlags().hasVectorReduction())
     OS << " vector-reduction";
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 703cccb3dbf..1d5683cf3cb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5145,7 +5145,7 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
       EVT VT = Operand.getValueType();
 
       SDNodeFlags Flags;
-      Flags.setUnsafeAlgebra(true);
+      Flags.setAllowReassociation(true);
 
       // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
       // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
@@ -5184,7 +5184,7 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
       EVT VT = Operand.getValueType();
 
       SDNodeFlags Flags;
-      Flags.setUnsafeAlgebra(true);
+      Flags.setAllowReassociation(true);
 
       // Newton reciprocal iteration: E * (2 - X * E)
       // AArch64 reciprocal iteration instruction: (2 - M * N)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d98c2a272c9..82318263ea6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5347,8 +5347,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
   SDValue RHS = Op.getOperand(1);
   EVT VT = Op.getValueType();
   const SDNodeFlags Flags = Op->getFlags();
-  bool Unsafe = DAG.getTarget().Options.UnsafeFPMath ||
-                Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal();
+  bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || Flags.hasAllowReciprocal();
 
   if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals())
     return SDValue();
@@ -6698,8 +6697,8 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
 
   const TargetOptions &Options = DAG.getTarget().Options;
   if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
-       (N0->getFlags().hasUnsafeAlgebra() &&
-        N1->getFlags().hasUnsafeAlgebra())) &&
+       (N0->getFlags().hasAllowContract() &&
+        N1->getFlags().hasAllowContract())) &&
       isFMAFasterThanFMulAndFAdd(VT)) {
     return ISD::FMA;
   }
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index 03b6a0ba950..3550b09bbeb 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -63,7 +63,7 @@ define float @fmul_fadd_contract2(float %x, float %y, float %z) {
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
 ; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
-; FMFDEBUG:         fadd {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fadd reassoc {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
 
 define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
@@ -86,15 +86,14 @@ define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
-; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
-; FMFDEBUG:         fadd {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
 
 define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
 ; FMF-LABEL: fmul_fadd_reassoc2:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsmulsp 0, 1, 2
-; FMF-NEXT:    xsaddsp 1, 0, 3
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fadd_reassoc2:
@@ -161,7 +160,7 @@ define float @fmul_fadd_fast2(float %x, float %y, float %z) {
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
-; GLOBALDEBUG:         fmul unsafe {{t[0-9]+}}
+; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
 
 define float @fmul_fma_reassoc1(float %x) {
@@ -197,7 +196,7 @@ define float @fmul_fma_reassoc1(float %x) {
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
-; GLOBALDEBUG:         fmul unsafe {{t[0-9]+}}
+; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
 
 define float @fmul_fma_reassoc2(float %x) {
@@ -233,7 +232,7 @@ define float @fmul_fma_reassoc2(float %x) {
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
-; GLOBALDEBUG:         fmul unsafe {{t[0-9]+}}
+; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
 
 define float @fmul_fma_fast1(float %x) {
@@ -269,7 +268,7 @@ define float @fmul_fma_fast1(float %x) {
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
-; GLOBALDEBUG:         fmul unsafe {{t[0-9]+}}
+; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
 
 define float @fmul_fma_fast2(float %x) {
@@ -305,7 +304,7 @@ define float @fmul_fma_fast2(float %x) {
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
-; GLOBALDEBUG:         fmul unsafe {{t[0-9]+}}
+; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn:'
 
 define float @sqrt_afn(float %x) {
@@ -345,7 +344,7 @@ define float @sqrt_afn(float %x) {
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
-; GLOBALDEBUG:         fmul unsafe {{t[0-9]+}}
+; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast:'
 
 define float @sqrt_fast(float %x) {
diff --git a/llvm/test/CodeGen/X86/fmf-propagation.ll b/llvm/test/CodeGen/X86/fmf-propagation.ll
index 294a2952c76..56e813f371c 100644
--- a/llvm/test/CodeGen/X86/fmf-propagation.ll
+++ b/llvm/test/CodeGen/X86/fmf-propagation.ll
@@ -3,8 +3,6 @@
 
 ; This tests the propagation of fast-math-flags from IR instructions to SDNodeFlags.
 
-; FIXME: 'afn' and 'reassoc' were dropped. With 'fast', 'reassoc' got renamed to 'unsafe'.
-
 ; CHECK-LABEL: Initial selection DAG: %bb.0 'fmf_transfer:'
 
 ; CHECK:         t5: f32 = fadd nsz t2, t4
@@ -12,9 +10,9 @@
 ; CHECK-NEXT:    t7: f32 = fadd nnan t6, t4
 ; CHECK-NEXT:    t8: f32 = fadd ninf t7, t4
 ; CHECK-NEXT:    t9: f32 = fadd contract t8, t4
-; CHECK-NEXT:    t10: f32 = fadd t9, t4
-; CHECK-NEXT:    t11: f32 = fadd t10, t4
-; CHECK-NEXT:    t12: f32 = fadd unsafe nnan ninf nsz arcp contract t11, t4
+; CHECK-NEXT:    t10: f32 = fadd afn t9, t4
+; CHECK-NEXT:    t11: f32 = fadd reassoc t10, t4
+; CHECK-NEXT:    t12: f32 = fadd nnan ninf nsz arcp contract afn reassoc t11, t4
 
 ; CHECK: Optimized lowered selection DAG: %bb.0 'fmf_transfer:'
author	Michael Berg <michael_c_berg@apple.com>	2018-05-04 18:48:20 +0000
committer	Michael Berg <michael_c_berg@apple.com>	2018-05-04 18:48:20 +0000
commit	7acc81b74428660efa5156815eebaee8a2ebe075 (patch)
tree	43d5e80f2e186315ec3bdabe7635c905e792ae5c /llvm
parent	0e51a125ea091955a1f5e7a7390a3b7953a3e314 (diff)
download	bcm5719-llvm-7acc81b74428660efa5156815eebaee8a2ebe075.tar.gz bcm5719-llvm-7acc81b74428660efa5156815eebaee8a2ebe075.zip