summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSjoerd Meijer <sjoerd.meijer@arm.com>2018-10-17 10:05:44 +0000
committerSjoerd Meijer <sjoerd.meijer@arm.com>2018-10-17 10:05:44 +0000
commit64cfb74a61e097e64bc393008f44be53f942110e (patch)
treedbfd355945dfe3923309fe1ab6bd366913daf733
parent7c8f91aa60600ffe3df9f6583432fa2cc38c5144 (diff)
downloadbcm5719-llvm-64cfb74a61e097e64bc393008f44be53f942110e.tar.gz
bcm5719-llvm-64cfb74a61e097e64bc393008f44be53f942110e.zip
[ARM] Do not fuse VADD and VMUL, continued (2/2)
This is patch 2/2, following up on D53314, and is the functional change to prevent fusing mul + add sequences into VFMAs. Differential revision: https://reviews.llvm.org/D53315 llvm-svn: 344683
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td6
-rw-r--r--llvm/test/CodeGen/ARM/fusedMAC.ll9
2 files changed, 13 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 529446ce809..fc8ed95ce8b 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -365,12 +365,14 @@ let RecomputePerFunction = 1 in {
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
-// But only select them if more precision in FP computation is allowed.
+// But only select them if more precision in FP computation is allowed, and when
+// they are not slower than a mul + add sequence.
// Do not use them for Darwin platforms.
def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
" FPOpFusion::Fast && "
" Subtarget->hasVFP4()) && "
- "!Subtarget->isTargetDarwin()">;
+ "!Subtarget->isTargetDarwin() &&"
+ "Subtarget->useFPVMLx()">;
def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
diff --git a/llvm/test/CodeGen/ARM/fusedMAC.ll b/llvm/test/CodeGen/ARM/fusedMAC.ll
index 6f6cdc11491..6b922895b00 100644
--- a/llvm/test/CodeGen/ARM/fusedMAC.ll
+++ b/llvm/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,8 @@
; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m7 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m4 -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
+; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m33 -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
+
; Check generated fused MAC and MLS.
define double @fusedMACTest1(double %d1, double %d2, double %d3) {
@@ -12,6 +16,11 @@ define double @fusedMACTest1(double %d1, double %d2, double %d3) {
define float @fusedMACTest2(float %f1, float %f2, float %f3) {
;CHECK-LABEL: fusedMACTest2:
;CHECK: vfma.f32
+
+;DONT-FUSE-LABEL: fusedMACTest2:
+;DONT-FUSE: vmul.f32
+;DONT-FUSE-NEXT: vadd.f32
+
%1 = fmul float %f1, %f2
%2 = fadd float %1, %f3
ret float %2
OpenPOWER on IntegriCloud