diff options
| author | Adam Nemet <anemet@apple.com> | 2017-03-30 18:53:04 +0000 |
|---|---|---|
| committer | Adam Nemet <anemet@apple.com> | 2017-03-30 18:53:04 +0000 |
| commit | edaec6de73aa24dee60645f0a8015ae0fe1849f3 (patch) | |
| tree | fa638ef77354e231493dc8ec83d30d6b64fcef7f /llvm/test | |
| parent | 1052b7236121f6934e46988e11b4e08d3dbf3951 (diff) | |
| download | bcm5719-llvm-edaec6de73aa24dee60645f0a8015ae0fe1849f3.tar.gz bcm5719-llvm-edaec6de73aa24dee60645f0a8015ae0fe1849f3.zip | |
[DAGCombiner] Initial support for the fast-math flag contract
Now alternatively to the TargetOption.AllowFPOpFusion global flag, FMUL->FADD
can also use the per operation FMF to allow fusion.
The idea here is not to port everything to the new scheme (e.g. fused
multiply-and-sub will be ported later) but that this work all the way from
clang.
The transformation is conditionalized on *both* the FADD and the FMUL having
the FMF contract flag.
Differential Revision: https://reviews.llvm.org/D31169
llvm-svn: 299096
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/neon-fma-FMF.ll | 27 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll | 35 |
2 files changed, 62 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll new file mode 100644 index 00000000000..f1e9d4f0c96 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <2 x float> @fma(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +; CHECK-LABEL: fma: +; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = fmul contract <2 x float> %A, %B; + %tmp2 = fadd contract <2 x float> %C, %tmp1; + ret <2 x float> %tmp2 +} + +define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +; CHECK-LABEL: no_fma_1: +; CHECK: fmul +; CHECK: fadd + %tmp1 = fmul contract <2 x float> %A, %B; + %tmp2 = fadd <2 x float> %C, %tmp1; + ret <2 x float> %tmp2 +} + +define <2 x float> @no_fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +; CHECK-LABEL: no_fma_2: +; CHECK: fmul +; CHECK: fadd + %tmp1 = fmul <2 x float> %A, %B; + %tmp2 = fadd contract <2 x float> %C, %tmp1; + ret <2 x float> %tmp2 +} diff --git a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll new file mode 100644 index 00000000000..8e97115bd1f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64le-linux-gnu | FileCheck %s + +define float @can_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) { +; CHECK-LABEL: can_fma_with_fewer_uses: +; CHECK: # BB#0: +; CHECK-NEXT: xsmulsp 0, 1, 2 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: xsmaddasp 1, 3, 4 +; CHECK-NEXT: xsdivsp 1, 0, 1 +; CHECK-NEXT: blr + %mul1 = fmul contract float %f1, %f2 + %mul2 = fmul contract float %f3, %f4 + %add = fadd contract float %mul1, %mul2 + %second_use_of_mul1 = fdiv float %mul1, %add + ret float %second_use_of_mul1 +} + +; There is no contract on the mul with no extra use so we can't fuse that. +; Since we are fusing with the mul with an extra use, the fmul needs to stick +; around beside the fma. +define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) { +; CHECK-LABEL: no_fma_with_fewer_uses: +; CHECK: # BB#0: +; CHECK-NEXT: xsmulsp 0, 3, 4 +; CHECK-NEXT: xsmulsp 13, 1, 2 +; CHECK-NEXT: xsmaddasp 0, 1, 2 +; CHECK-NEXT: xsdivsp 1, 13, 0 +; CHECK-NEXT: blr + %mul1 = fmul contract float %f1, %f2 + %mul2 = fmul float %f3, %f4 + %add = fadd contract float %mul1, %mul2 + %second_use_of_mul1 = fdiv float %mul1, %add + ret float %second_use_of_mul1 +} |

