summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-09-22 09:50:52 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-09-22 09:50:52 +0000
commit2b1c3bb25daddd3d34a85d021f1b29a91dc932e5 (patch)
tree36fa731396db565f32bc29ec92c79975b2cb12e7 /llvm/test/CodeGen
parent489604cd1196dd899713cb31c4b7c0f1b546c56b (diff)
downloadbcm5719-llvm-2b1c3bb25daddd3d34a85d021f1b29a91dc932e5.tar.gz
bcm5719-llvm-2b1c3bb25daddd3d34a85d021f1b29a91dc932e5.zip
[ARM] Add missing selection patterns for vnmla
For the following function: double fn1(double d0, double d1, double d2) { double a = -d0 - d1 * d2; return a; } on ARM, LLVM generates code along the lines of vneg.f64 d0, d0 vmls.f64 d0, d1, d2 i.e., a negate and a multiply-subtract. The attached patch adds instruction selection patterns to allow it to generate the single instruction vnmla.f64 d0, d1, d2 (multiply-add with negation) instead, like GCC does. Committed on behalf of @gergo- (Gergö Barany) Differential Revision: https://reviews.llvm.org/D35911 llvm-svn: 313972
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/ARM/fnmscs.ll69
1 files changed, 67 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/ARM/fnmscs.ll b/llvm/test/CodeGen/ARM/fnmscs.ll
index 5fa6b219388..0fa878c0c2f 100644
--- a/llvm/test/CodeGen/ARM/fnmscs.ll
+++ b/llvm/test/CodeGen/ARM/fnmscs.ll
@@ -1,7 +1,10 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN: llc -mtriple=arm-eabihf -mattr=+vfp2 %s -o - \
; RUN: | FileCheck %s -check-prefix=VFP2
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN: llc -mtriple=arm-eabihf -mattr=+vfp3 %s -o - \
+; RUN: | FileCheck %s -check-prefix=VFP3
+
+; RUN: llc -mtriple=arm-eabihf -mattr=+neon %s -o - \
; RUN: | FileCheck %s -check-prefix=NEON
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
@@ -21,6 +24,9 @@ entry:
; VFP2-LABEL: t1:
; VFP2: vnmla.f32
+; VFP3-LABEL: t1:
+; VFP3: vnmla.f32
+
; NEON-LABEL: t1:
; NEON: vnmla.f32
@@ -42,6 +48,9 @@ entry:
; VFP2-LABEL: t2:
; VFP2: vnmla.f32
+; VFP3-LABEL: t2:
+; VFP3: vnmla.f32
+
; NEON-LABEL: t2:
; NEON: vnmla.f32
@@ -63,6 +72,9 @@ entry:
; VFP2-LABEL: t3:
; VFP2: vnmla.f64
+; VFP3-LABEL: t3:
+; VFP3: vnmla.f64
+
; NEON-LABEL: t3:
; NEON: vnmla.f64
@@ -84,6 +96,9 @@ entry:
; VFP2-LABEL: t4:
; VFP2: vnmla.f64
+; VFP3-LABEL: t4:
+; VFP3: vnmla.f64
+
; NEON-LABEL: t4:
; NEON: vnmla.f64
@@ -99,3 +114,53 @@ entry:
%2 = fsub double %1, %acc
ret double %2
}
+
+define double @t5(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2-LABEL: t5:
+; VFP2: vnmla.f64
+
+; VFP3-LABEL: t5:
+; VFP3: vnmla.f64
+
+; NEON-LABEL: t5:
+; NEON: vnmla.f64
+
+; A8U-LABEL: t5:
+; A8U: vmul.f64 d
+; A8U: vsub.f64 d
+
+; A8-LABEL: t5:
+; A8: vmul.f64 d
+; A8: vsub.f64 d
+
+ %0 = fsub double -0.0, %acc
+ %1 = fmul double %a, %b
+ %2 = fsub double %0, %1
+ ret double %2
+}
+
+define float @t6(float %acc, float %a, float %b) nounwind {
+entry:
+; VFP2-LABEL: t6:
+; VFP2: vnmla.f32
+
+; VFP3-LABEL: t6:
+; VFP3: vnmla.f32
+
+; NEON-LABEL: t6:
+; NEON: vnmla.f32
+
+; A8U-LABEL: t6:
+; A8U: vmul.f32 d
+; A8U: vsub.f32 d
+
+; A8-LABEL: t6:
+; A8: vmul.f32 s
+; A8: vsub.f32 s
+
+ %0 = fsub float -0.0, %acc
+ %1 = fmul float %a, %b
+ %2 = fsub float %0, %1
+ ret float %2
+}
OpenPOWER on IntegriCloud