summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2019-11-04 14:06:04 +0000
committerDavid Green <david.green@arm.com>2019-11-04 15:05:41 +0000
commit91b0cad8132997060182146b2734065bc807e9fa (patch)
treebbc63b21e4ddd0042e7b9f7b50628f33a4b53627 /llvm/lib
parentb556ce3992709e1f6302ca1d4c296f57e83cd6a7 (diff)
downloadbcm5719-llvm-91b0cad8132997060182146b2734065bc807e9fa.tar.gz
bcm5719-llvm-91b0cad8132997060182146b2734065bc807e9fa.zip
[ARM] Use isFMAFasterThanFMulAndFAdd for MVE
The Arm backend will usually return false for isFMAFasterThanFMulAndFAdd, where both the fused VFMA.f32 and a non-fused VMLA.f32 are usually available for scalar code. For MVE we don't have the non-fused version though. It makes more sense for isFMAFasterThanFMulAndFAdd to return true, allowing us to simplify some of the existing ISel patterns. The tests here are that non of the existing tests failed, and so we are still selecting VFMA and VFMS. The one test that changed shows we can now select from fast math flags, as opposed to just relying on the isFMADLegalForFAddFSub option. Differential Revision: https://reviews.llvm.org/D69115
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp30
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h11
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td24
3 files changed, 35 insertions, 30 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6e511e68d7a..4464fd1be05 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14858,6 +14858,36 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
+/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+/// expanded to FMAs when this method returns true, otherwise fmuladd is
+/// expanded to fmul + fadd.
+///
+/// ARM supports both fused and unfused multiply-add operations; we already
+/// lower a pair of fmul and fadd to the latter so it's not clear that there
+/// would be a gain or that the gain would be worthwhile enough to risk
+/// correctness bugs.
+///
+/// For MVE, we set this to true as it helps simplify the need for some
+/// patterns (and we don't have the non-fused floating point instruction).
+bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ if (!Subtarget->hasMVEFloatOps())
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::v4f32:
+ case MVT::v8f16:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 53813fad5af..d3caed884a3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -738,16 +738,7 @@ class VectorType;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
- ///
- /// ARM supports both fused and unfused multiply-add operations; we already
- /// lower a pair of fmul and fadd to the latter so it's not clear that there
- /// would be a gain or that the gain would be worthwhile enough to risk
- /// correctness bugs.
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override { return false; }
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index e43d64393a6..040b6f64832 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2808,31 +2808,15 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-let Predicates = [HasMVEFloat, UseFusedMAC] in {
- def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1),
- (fmul (v8f16 MQPR:$src2),
- (v8f16 MQPR:$src3)))),
- (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>;
- def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1),
- (fmul (v4f32 MQPR:$src2),
- (v4f32 MQPR:$src3)))),
- (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>;
-
- def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1),
- (fmul (v8f16 MQPR:$src2),
- (v8f16 MQPR:$src3)))),
- (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>;
- def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1),
- (fmul (v4f32 MQPR:$src2),
- (v4f32 MQPR:$src3)))),
- (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>;
-}
-
let Predicates = [HasMVEFloat] in {
def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
(v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
(v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
+ def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
+ (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
+ def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
+ (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
}
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
OpenPOWER on IntegriCloud