diff options
| author | David Green <david.green@arm.com> | 2019-11-04 14:06:04 +0000 |
|---|---|---|
| committer | David Green <david.green@arm.com> | 2019-11-04 15:05:41 +0000 |
| commit | 91b0cad8132997060182146b2734065bc807e9fa (patch) | |
| tree | bbc63b21e4ddd0042e7b9f7b50628f33a4b53627 /llvm/lib | |
| parent | b556ce3992709e1f6302ca1d4c296f57e83cd6a7 (diff) | |
| download | bcm5719-llvm-91b0cad8132997060182146b2734065bc807e9fa.tar.gz bcm5719-llvm-91b0cad8132997060182146b2734065bc807e9fa.zip | |
[ARM] Use isFMAFasterThanFMulAndFAdd for MVE
The Arm backend will usually return false for isFMAFasterThanFMulAndFAdd,
where both the fused VFMA.f32 and a non-fused VMLA.f32 are usually
available for scalar code. For MVE we don't have the non-fused version
though. It makes more sense for isFMAFasterThanFMulAndFAdd to return
true, allowing us to simplify some of the existing ISel patterns.
The tests here are that non of the existing tests failed, and so we are
still selecting VFMA and VFMS. The one test that changed shows we can
now select from fast math flags, as opposed to just relying on the
isFMADLegalForFAddFSub option.
Differential Revision: https://reviews.llvm.org/D69115
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 24 |
3 files changed, 35 insertions, 30 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6e511e68d7a..4464fd1be05 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14858,6 +14858,36 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, return -1; } +/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster +/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be +/// expanded to FMAs when this method returns true, otherwise fmuladd is +/// expanded to fmul + fadd. +/// +/// ARM supports both fused and unfused multiply-add operations; we already +/// lower a pair of fmul and fadd to the latter so it's not clear that there +/// would be a gain or that the gain would be worthwhile enough to risk +/// correctness bugs. +/// +/// For MVE, we set this to true as it helps simplify the need for some +/// patterns (and we don't have the non-fused floating point instruction). +bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + if (!Subtarget->hasMVEFloatOps()) + return false; + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::v4f32: + case MVT::v8f16: + return true; + default: + break; + } + + return false; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 53813fad5af..d3caed884a3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -738,16 +738,7 @@ class VectorType; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl<SDNode *> &Created) const override; - /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster - /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be - /// expanded to FMAs when this method returns true, otherwise fmuladd is - /// expanded to fmul + fadd. - /// - /// ARM supports both fused and unfused multiply-add operations; we already - /// lower a pair of fmul and fadd to the latter so it's not clear that there - /// would be a gain or that the gain would be worthwhile enough to risk - /// correctness bugs. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override { return false; } + bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index e43d64393a6..040b6f64832 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2808,31 +2808,15 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1, def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1, (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">; -let Predicates = [HasMVEFloat, UseFusedMAC] in { - def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1), - (fmul (v8f16 MQPR:$src2), - (v8f16 MQPR:$src3)))), - (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>; - def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1), - (fmul (v4f32 MQPR:$src2), - (v4f32 MQPR:$src3)))), - (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>; - - def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1), - (fmul (v8f16 MQPR:$src2), - (v8f16 MQPR:$src3)))), - (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>; - def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1), - (fmul (v4f32 MQPR:$src2), - (v4f32 MQPR:$src3)))), - (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>; -} - let Predicates = [HasMVEFloat] in { def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))), (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>; def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))), (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>; + def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))), + (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>; + def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))), + (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>; } multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI, |

