diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-09-22 09:50:52 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-09-22 09:50:52 +0000 |
| commit | 2b1c3bb25daddd3d34a85d021f1b29a91dc932e5 (patch) | |
| tree | 36fa731396db565f32bc29ec92c79975b2cb12e7 /llvm/test/CodeGen | |
| parent | 489604cd1196dd899713cb31c4b7c0f1b546c56b (diff) | |
| download | bcm5719-llvm-2b1c3bb25daddd3d34a85d021f1b29a91dc932e5.tar.gz bcm5719-llvm-2b1c3bb25daddd3d34a85d021f1b29a91dc932e5.zip | |
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/ARM/fnmscs.ll | 69 |
1 files changed, 67 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/ARM/fnmscs.ll b/llvm/test/CodeGen/ARM/fnmscs.ll index 5fa6b219388..0fa878c0c2f 100644 --- a/llvm/test/CodeGen/ARM/fnmscs.ll +++ b/llvm/test/CodeGen/ARM/fnmscs.ll @@ -1,7 +1,10 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: llc -mtriple=arm-eabihf -mattr=+vfp2 %s -o - \ ; RUN: | FileCheck %s -check-prefix=VFP2 -; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \ +; RUN: llc -mtriple=arm-eabihf -mattr=+vfp3 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP3 + +; RUN: llc -mtriple=arm-eabihf -mattr=+neon %s -o - \ ; RUN: | FileCheck %s -check-prefix=NEON ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ @@ -21,6 +24,9 @@ entry: ; VFP2-LABEL: t1: ; VFP2: vnmla.f32 +; VFP3-LABEL: t1: +; VFP3: vnmla.f32 + ; NEON-LABEL: t1: ; NEON: vnmla.f32 @@ -42,6 +48,9 @@ entry: ; VFP2-LABEL: t2: ; VFP2: vnmla.f32 +; VFP3-LABEL: t2: +; VFP3: vnmla.f32 + ; NEON-LABEL: t2: ; NEON: vnmla.f32 @@ -63,6 +72,9 @@ entry: ; VFP2-LABEL: t3: ; VFP2: vnmla.f64 +; VFP3-LABEL: t3: +; VFP3: vnmla.f64 + ; NEON-LABEL: t3: ; NEON: vnmla.f64 @@ -84,6 +96,9 @@ entry: ; VFP2-LABEL: t4: ; VFP2: vnmla.f64 +; VFP3-LABEL: t4: +; VFP3: vnmla.f64 + ; NEON-LABEL: t4: ; NEON: vnmla.f64 @@ -99,3 +114,53 @@ entry: %2 = fsub double %1, %acc ret double %2 } + +define double @t5(double %acc, double %a, double %b) nounwind { +entry: +; VFP2-LABEL: t5: +; VFP2: vnmla.f64 + +; VFP3-LABEL: t5: +; VFP3: vnmla.f64 + +; NEON-LABEL: t5: +; NEON: vnmla.f64 + +; A8U-LABEL: t5: +; A8U: vmul.f64 d +; A8U: vsub.f64 d + +; A8-LABEL: t5: +; A8: vmul.f64 d +; A8: vsub.f64 d + + %0 = fsub double -0.0, %acc + %1 = fmul double %a, %b + %2 = fsub double %0, %1 + ret double %2 +} + +define float @t6(float %acc, float %a, float %b) nounwind { +entry: +; VFP2-LABEL: t6: +; VFP2: vnmla.f32 + +; VFP3-LABEL: t6: +; VFP3: vnmla.f32 + +; NEON-LABEL: t6: +; NEON: vnmla.f32 + +; A8U-LABEL: t6: +; A8U: vmul.f32 d +; A8U: vsub.f32 d + +; A8-LABEL: t6: +; A8: vmul.f32 s +; A8: vsub.f32 s + + %0 = fsub float -0.0, %acc + %1 = fmul float %a, %b + %2 = fsub float %0, %1 + ret float %2 +} |

