summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp30
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h11
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td24
3 files changed, 35 insertions, 30 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6e511e68d7a..4464fd1be05 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14858,6 +14858,36 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
+/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+/// expanded to FMAs when this method returns true, otherwise fmuladd is
+/// expanded to fmul + fadd.
+///
+/// ARM supports both fused and unfused multiply-add operations; we already
+/// lower a pair of fmul and fadd to the latter so it's not clear that there
+/// would be a gain or that the gain would be worthwhile enough to risk
+/// correctness bugs.
+///
+/// For MVE, we set this to true as it helps simplify the need for some
+/// patterns (and we don't have the non-fused floating point instruction).
+bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ if (!Subtarget->hasMVEFloatOps())
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::v4f32:
+ case MVT::v8f16:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 53813fad5af..d3caed884a3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -738,16 +738,7 @@ class VectorType;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
- ///
- /// ARM supports both fused and unfused multiply-add operations; we already
- /// lower a pair of fmul and fadd to the latter so it's not clear that there
- /// would be a gain or that the gain would be worthwhile enough to risk
- /// correctness bugs.
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override { return false; }
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index e43d64393a6..040b6f64832 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2808,31 +2808,15 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-let Predicates = [HasMVEFloat, UseFusedMAC] in {
- def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1),
- (fmul (v8f16 MQPR:$src2),
- (v8f16 MQPR:$src3)))),
- (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>;
- def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1),
- (fmul (v4f32 MQPR:$src2),
- (v4f32 MQPR:$src3)))),
- (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>;
-
- def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1),
- (fmul (v8f16 MQPR:$src2),
- (v8f16 MQPR:$src3)))),
- (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>;
- def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1),
- (fmul (v4f32 MQPR:$src2),
- (v4f32 MQPR:$src3)))),
- (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>;
-}
-
let Predicates = [HasMVEFloat] in {
def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
(v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
(v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
+ def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
+ (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
+ def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
+ (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
}
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
OpenPOWER on IntegriCloud