[ARM] Use isFMAFasterThanFMulAndFAdd for MVE

The Arm backend will usually return false for isFMAFasterThanFMulAndFAdd, where both the fused VFMA.f32 and a non-fused VMLA.f32 are usually available for scalar code. For MVE we don't have the non-fused version though. It makes more sense for isFMAFasterThanFMulAndFAdd to return true, allowing us to simplify some of the existing ISel patterns. The tests here are that non of the existing tests failed, and so we are still selecting VFMA and VFMS. The one test that changed shows we can now select from fast math flags, as opposed to just relying on the isFMADLegalForFAddFSub option. Differential Revision: https://reviews.llvm.org/D69115
author: David Green <david.green@arm.com> 2019-11-04 14:06:04 +0000
committer: David Green <david.green@arm.com> 2019-11-04 15:05:41 +0000
commit: 91b0cad8132997060182146b2734065bc807e9fa (patch)
tree: bbc63b21e4ddd0042e7b9f7b50628f33a4b53627 /llvm/lib
parent: b556ce3992709e1f6302ca1d4c296f57e83cd6a7 (diff)
download: bcm5719-llvm-91b0cad8132997060182146b2734065bc807e9fa.tar.gz
bcm5719-llvm-91b0cad8132997060182146b2734065bc807e9fa.zip
3 files changed, 35 insertions, 30 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6e511e68d7a..4464fd1be05 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14858,6 +14858,36 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
   return -1;
 }
 
+/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+/// expanded to FMAs when this method returns true, otherwise fmuladd is
+/// expanded to fmul + fadd.
+///
+/// ARM supports both fused and unfused multiply-add operations; we already
+/// lower a pair of fmul and fadd to the latter so it's not clear that there
+/// would be a gain or that the gain would be worthwhile enough to risk
+/// correctness bugs.
+///
+/// For MVE, we set this to true as it helps simplify the need for some
+/// patterns (and we don't have the non-fused floating point instruction).
+bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  if (!Subtarget->hasMVEFloatOps())
+    return false;
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::v4f32:
+  case MVT::v8f16:
+    return true;
+  default:
+    break;
+  }
+
+  return false;
+}
+
 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
   if (V < 0)
     return false;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 53813fad5af..d3caed884a3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -738,16 +738,7 @@ class VectorType;
     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                           SmallVectorImpl<SDNode *> &Created) const override;
 
-    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
-    /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
-    /// expanded to FMAs when this method returns true, otherwise fmuladd is
-    /// expanded to fmul + fadd.
-    ///
-    /// ARM supports both fused and unfused multiply-add operations; we already
-    /// lower a pair of fmul and fadd to the latter so it's not clear that there
-    /// would be a gain or that the gain would be worthwhile enough to risk
-    /// correctness bugs.
-    bool isFMAFasterThanFMulAndFAdd(EVT VT) const override { return false; }
+    bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 
     SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index e43d64393a6..040b6f64832 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2808,31 +2808,15 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
 def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
     (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
 
-let Predicates = [HasMVEFloat, UseFusedMAC] in {
-  def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1),
-                             (fmul (v8f16 MQPR:$src2),
-                                      (v8f16 MQPR:$src3)))),
-            (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>;
-  def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1),
-                             (fmul (v4f32 MQPR:$src2),
-                                      (v4f32 MQPR:$src3)))),
-            (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>;
-
-  def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1),
-                             (fmul (v8f16 MQPR:$src2),
-                                      (v8f16 MQPR:$src3)))),
-            (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>;
-  def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1),
-                             (fmul (v4f32 MQPR:$src2),
-                                      (v4f32 MQPR:$src3)))),
-            (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>;
-}
-
 let Predicates = [HasMVEFloat] in {
   def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
             (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
   def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
             (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
+  def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
+            (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
+  def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
+            (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
 }
 
 multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
author	David Green <david.green@arm.com>	2019-11-04 14:06:04 +0000
committer	David Green <david.green@arm.com>	2019-11-04 15:05:41 +0000
commit	91b0cad8132997060182146b2734065bc807e9fa (patch)
tree	bbc63b21e4ddd0042e7b9f7b50628f33a4b53627 /llvm/lib
parent	b556ce3992709e1f6302ca1d4c296f57e83cd6a7 (diff)
download	bcm5719-llvm-91b0cad8132997060182146b2734065bc807e9fa.tar.gz bcm5719-llvm-91b0cad8132997060182146b2734065bc807e9fa.zip