[DAGCombiner] Initial support for the fast-math flag contract

Now alternatively to the TargetOption.AllowFPOpFusion global flag, FMUL->FADD can also use the per operation FMF to allow fusion. The idea here is not to port everything to the new scheme (e.g. fused multiply-and-sub will be ported later) but that this work all the way from clang. The transformation is conditionalized on *both* the FADD and the FMUL having the FMF contract flag. Differential Revision: https://reviews.llvm.org/D31169 llvm-svn: 299096
author: Adam Nemet <anemet@apple.com> 2017-03-30 18:53:04 +0000
committer: Adam Nemet <anemet@apple.com> 2017-03-30 18:53:04 +0000
commit: edaec6de73aa24dee60645f0a8015ae0fe1849f3 (patch)
tree: fa638ef77354e231493dc8ec83d30d6b64fcef7f /llvm/test
parent: 1052b7236121f6934e46988e11b4e08d3dbf3951 (diff)
download: bcm5719-llvm-edaec6de73aa24dee60645f0a8015ae0fe1849f3.tar.gz
bcm5719-llvm-edaec6de73aa24dee60645f0a8015ae0fe1849f3.zip
2 files changed, 62 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
new file mode 100644
index 00000000000..f1e9d4f0c96
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <2 x float> @fma(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp1 = fmul contract <2 x float> %A, %B;
+	%tmp2 = fadd contract <2 x float> %C, %tmp1;
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: no_fma_1:
+; CHECK: fmul
+; CHECK: fadd
+	%tmp1 = fmul contract <2 x float> %A, %B;
+	%tmp2 = fadd <2 x float> %C, %tmp1;
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @no_fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: no_fma_2:
+; CHECK: fmul
+; CHECK: fadd
+	%tmp1 = fmul <2 x float> %A, %B;
+	%tmp2 = fadd contract <2 x float> %C, %tmp1;
+	ret <2 x float> %tmp2
+}
diff --git a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll
new file mode 100644
index 00000000000..8e97115bd1f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64le-linux-gnu | FileCheck %s
+
+define float @can_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
+; CHECK-LABEL: can_fma_with_fewer_uses:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    xsmulsp 0, 1, 2
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    xsmaddasp 1, 3, 4
+; CHECK-NEXT:    xsdivsp 1, 0, 1
+; CHECK-NEXT:    blr
+  %mul1 = fmul contract float %f1, %f2
+  %mul2 = fmul contract float %f3, %f4
+  %add = fadd contract float %mul1, %mul2
+  %second_use_of_mul1 = fdiv float %mul1, %add
+  ret float %second_use_of_mul1
+}
+
+; There is no contract on the mul with no extra use so we can't fuse that.
+; Since we are fusing with the mul with an extra use, the fmul needs to stick
+; around beside the fma.
+define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
+; CHECK-LABEL: no_fma_with_fewer_uses:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    xsmulsp 0, 3, 4
+; CHECK-NEXT:    xsmulsp 13, 1, 2
+; CHECK-NEXT:    xsmaddasp 0, 1, 2
+; CHECK-NEXT:    xsdivsp 1, 13, 0
+; CHECK-NEXT:    blr
+  %mul1 = fmul contract float %f1, %f2
+  %mul2 = fmul float %f3, %f4
+  %add = fadd contract float %mul1, %mul2
+  %second_use_of_mul1 = fdiv float %mul1, %add
+  ret float %second_use_of_mul1
+}
author	Adam Nemet <anemet@apple.com>	2017-03-30 18:53:04 +0000
committer	Adam Nemet <anemet@apple.com>	2017-03-30 18:53:04 +0000
commit	edaec6de73aa24dee60645f0a8015ae0fe1849f3 (patch)
tree	fa638ef77354e231493dc8ec83d30d6b64fcef7f /llvm/test
parent	1052b7236121f6934e46988e11b4e08d3dbf3951 (diff)
download	bcm5719-llvm-edaec6de73aa24dee60645f0a8015ae0fe1849f3.tar.gz bcm5719-llvm-edaec6de73aa24dee60645f0a8015ae0fe1849f3.zip