summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorAdam Nemet <anemet@apple.com>2017-03-30 18:53:04 +0000
committerAdam Nemet <anemet@apple.com>2017-03-30 18:53:04 +0000
commitedaec6de73aa24dee60645f0a8015ae0fe1849f3 (patch)
treefa638ef77354e231493dc8ec83d30d6b64fcef7f /llvm/test
parent1052b7236121f6934e46988e11b4e08d3dbf3951 (diff)
downloadbcm5719-llvm-edaec6de73aa24dee60645f0a8015ae0fe1849f3.tar.gz
bcm5719-llvm-edaec6de73aa24dee60645f0a8015ae0fe1849f3.zip
[DAGCombiner] Initial support for the fast-math flag contract
Now alternatively to the TargetOption.AllowFPOpFusion global flag, FMUL->FADD can also use the per operation FMF to allow fusion. The idea here is not to port everything to the new scheme (e.g. fused multiply-and-sub will be ported later) but that this work all the way from clang. The transformation is conditionalized on *both* the FADD and the FMUL having the FMF contract flag. Differential Revision: https://reviews.llvm.org/D31169 llvm-svn: 299096
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/neon-fma-FMF.ll27
-rw-r--r--llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll35
2 files changed, 62 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
new file mode 100644
index 00000000000..f1e9d4f0c96
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <2 x float> @fma(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+ %tmp1 = fmul contract <2 x float> %A, %B;
+ %tmp2 = fadd contract <2 x float> %C, %tmp1;
+ ret <2 x float> %tmp2
+}
+
+define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: no_fma_1:
+; CHECK: fmul
+; CHECK: fadd
+ %tmp1 = fmul contract <2 x float> %A, %B;
+ %tmp2 = fadd <2 x float> %C, %tmp1;
+ ret <2 x float> %tmp2
+}
+
+define <2 x float> @no_fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: no_fma_2:
+; CHECK: fmul
+; CHECK: fadd
+ %tmp1 = fmul <2 x float> %A, %B;
+ %tmp2 = fadd contract <2 x float> %C, %tmp1;
+ ret <2 x float> %tmp2
+}
diff --git a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll
new file mode 100644
index 00000000000..8e97115bd1f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64le-linux-gnu | FileCheck %s
+
+define float @can_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
+; CHECK-LABEL: can_fma_with_fewer_uses:
+; CHECK: # BB#0:
+; CHECK-NEXT: xsmulsp 0, 1, 2
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: xsmaddasp 1, 3, 4
+; CHECK-NEXT: xsdivsp 1, 0, 1
+; CHECK-NEXT: blr
+ %mul1 = fmul contract float %f1, %f2
+ %mul2 = fmul contract float %f3, %f4
+ %add = fadd contract float %mul1, %mul2
+ %second_use_of_mul1 = fdiv float %mul1, %add
+ ret float %second_use_of_mul1
+}
+
+; There is no contract on the mul with no extra use so we can't fuse that.
+; Since we are fusing with the mul with an extra use, the fmul needs to stick
+; around beside the fma.
+define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
+; CHECK-LABEL: no_fma_with_fewer_uses:
+; CHECK: # BB#0:
+; CHECK-NEXT: xsmulsp 0, 3, 4
+; CHECK-NEXT: xsmulsp 13, 1, 2
+; CHECK-NEXT: xsmaddasp 0, 1, 2
+; CHECK-NEXT: xsdivsp 1, 13, 0
+; CHECK-NEXT: blr
+ %mul1 = fmul contract float %f1, %f2
+ %mul2 = fmul float %f3, %f4
+ %add = fadd contract float %mul1, %mul2
+ %second_use_of_mul1 = fdiv float %mul1, %add
+ ret float %second_use_of_mul1
+}
OpenPOWER on IntegriCloud