summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2017-01-31 14:35:37 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2017-01-31 14:35:37 +0000
commit8813d5d2217117ff7cc60d399015ffc5347c43ee (patch)
tree097536d920c7d2f1ed7f1f4e4f0362e8c2189824 /llvm
parent9bf658d5fe3813ebea4f798711ac257c72b1d6b2 (diff)
downloadbcm5719-llvm-8813d5d2217117ff7cc60d399015ffc5347c43ee.tar.gz
bcm5719-llvm-8813d5d2217117ff7cc60d399015ffc5347c43ee.zip
[DAGCombine] require UnsafeFPMath for re-association of addition
Summary: The affected transforms all implicitly use associativity of addition, for which we usually require unsafe math to be enabled. The "Aggressive" flag is only meant to convey information about the performance of the fused ops relative to a fmul+fadd sequence. Fixes Bug 31626. Reviewers: spatel, hfinkel, mehdi_amini, arsenm, tstellarAMD Subscribers: jholewinski, nemanjai, wdng, llvm-commits Differential Revision: https://reviews.llvm.org/D28675 llvm-svn: 293635
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp24
-rw-r--r--llvm/test/CodeGen/AMDGPU/fma-combine.ll26
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad-combine.ll87
-rw-r--r--llvm/test/CodeGen/NVPTX/fma-assoc.ll11
-rw-r--r--llvm/test/CodeGen/PowerPC/fma-assoc.ll126
5 files changed, 176 insertions, 98 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 33863ca16f9..58942469556 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8405,9 +8405,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((AllowFusion || HasFMAD) && Aggressive) {
+ if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8419,7 +8422,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL &&
N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8649,10 +8655,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((AllowFusion || HasFMAD) && Aggressive) {
+ if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8666,7 +8675,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
index ab1b6d96a8d..50c5a5abf7f 100644
--- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be
; beneficial even without fp32 denormals, but they do require no-infs-fp-math
@@ -308,8 +308,14 @@ define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %o
; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
-; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
-; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
+
+; SI-SAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]]
+; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[TMP0]]
+; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP1]], -[[Z]]
+
+; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
+; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
+
; SI: buffer_store_dwordx2 [[RESULT]]
define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
@@ -343,8 +349,14 @@ define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %
; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
-; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
-; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
+
+; SI-SAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]]
+; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[Y]], [[Z]], [[TMP0]]
+; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[TMP1]]
+
+; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
+; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
+
; SI: buffer_store_dwordx2 [[RESULT]]
define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
index d141281f36b..9caba32cbac 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -1,12 +1,12 @@
; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
; Make sure we don't form mad with denormals
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare float @llvm.fabs.f32(float) #0
@@ -21,7 +21,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #0
; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
; SI-DENORM-SLOWFMAF-NOT: v_fma
; SI-DENORM-SLOWFMAF-NOT: v_mad
@@ -58,8 +58,8 @@ define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrsp
; SI-STD-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]]
; SI-STD-DAG: v_mac_f32_e32 [[D]], [[B]], [[A]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
@@ -100,7 +100,7 @@ define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float a
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
@@ -131,7 +131,7 @@ define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrsp
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
+; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
@@ -164,8 +164,8 @@ define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float a
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
@@ -203,7 +203,7 @@ define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, fl
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
+; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
@@ -235,8 +235,8 @@ define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float a
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
@@ -275,7 +275,7 @@ define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, fl
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
+; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
@@ -309,8 +309,8 @@ define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float a
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], -[[B]], -[[C]]
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], -[[B]], -[[D]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
@@ -352,8 +352,8 @@ define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %ou
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
-; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
+; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
@@ -399,12 +399,9 @@ define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %ou
; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
-; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
+; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
+; SI-DENORM: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
@@ -444,12 +441,9 @@ define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %o
; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
-; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], -[[D]], [[E]], [[A]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP0]]
-
-; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
-; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
-; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
+; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
+; SI-DENORM: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
@@ -485,19 +479,23 @@ define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %o
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-STD: v_mac_f32_e32 [[TMP]], [[B]], [[A]]
+; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[A]]
+; SI-STD-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP0]]
+
+; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], [[D]], [[E]], -[[C]]
+; SI-STD-UNSAFE: v_mac_f32_e32 [[RESULT]], [[B]], [[A]]
-; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
+; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
+; SI-DENORM-FASTFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
-; SI-DENORM: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
@@ -532,11 +530,16 @@ define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %o
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
-; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
-; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
+; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[C]], [[B]]
+; SI-STD-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP0]], [[A]]
+
+; SI-STD-UNSAFE: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
+; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
-; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
-; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
+; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
+; SI-DENORM-FASTFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
diff --git a/llvm/test/CodeGen/NVPTX/fma-assoc.ll b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
index 80a08a86316..df86d476efd 100644
--- a/llvm/test/CodeGen/NVPTX/fma-assoc.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE
define ptx_device float @t1_f32(float %x, float %y, float %z,
float %u, float %v) {
-; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK-UNSAFE: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK-UNSAFE: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fmul float %x, %y
%b = fmul float %u, %v
@@ -14,8 +15,8 @@ define ptx_device float @t1_f32(float %x, float %y, float %z,
define ptx_device double @t1_f64(double %x, double %y, double %z,
double %u, double %v) {
-; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK-UNSAFE: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK-UNSAFE: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%a = fmul double %x, %y
%b = fmul double %u, %v
diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll
index e44bfc65242..5080e5b250e 100644
--- a/llvm/test/CodeGen/PowerPC/fma-assoc.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll
@@ -1,5 +1,7 @@
-; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SAFE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX -check-prefix=CHECK-VSX-SAFE %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -enable-unsafe-fp-math -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK -check-prefix=CHECK-UNSAFE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -enable-unsafe-fp-math -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX -check-prefix=CHECK-UNSAFE-VSX %s
define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
double %D, double %E) {
@@ -8,16 +10,28 @@ define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
%H = fadd double %F, %G ; <double> [#uses=1]
%I = fadd double %H, %E ; <double> [#uses=1]
ret double %I
-; CHECK-LABEL: test_FMADD_ASSOC1:
-; CHECK: fmadd
-; CHECK-NEXT: fmadd
-; CHECK-NEXT: blr
+; CHECK-SAFE-LABEL: test_FMADD_ASSOC1:
+; CHECK-SAFE: fmul
+; CHECK-SAFE-NEXT: fmadd
+; CHECK-SAFE-NEXT: fadd
+; CHECK-SAFE-NEXT: blr
-; CHECK-VSX-LABEL: test_FMADD_ASSOC1:
-; CHECK-VSX: xsmaddmdp
-; CHECK-VSX-NEXT: xsmaddadp
-; CHECK-VSX-NEXT: fmr
-; CHECK-VSX-NEXT: blr
+; CHECK-UNSAFE-LABEL: test_FMADD_ASSOC1:
+; CHECK-UNSAFE: fmadd
+; CHECK-UNSAFE-NEXT: fmadd
+; CHECK-UNSAFE-NEXT: blr
+
+; CHECK-VSX-SAFE-LABEL: test_FMADD_ASSOC1:
+; CHECK-VSX-SAFE: xsmuldp
+; CHECK-VSX-SAFE-NEXT: xsmaddadp
+; CHECK-VSX-SAFE-NEXT: xsadddp
+; CHECK-VSX-SAFE-NEXT: blr
+
+; CHECK-VSX-UNSAFE-LABEL: test_FMADD_ASSOC1:
+; CHECK-VSX-UNSAFE: xsmaddmdp
+; CHECK-VSX-UNSAFE-NEXT: xsmaddadp
+; CHECK-VSX-UNSAFE-NEXT: fmr
+; CHECK-VSX-UNSAFE-NEXT: blr
}
define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
@@ -27,16 +41,28 @@ define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
%H = fadd double %F, %G ; <double> [#uses=1]
%I = fadd double %E, %H ; <double> [#uses=1]
ret double %I
-; CHECK-LABEL: test_FMADD_ASSOC2:
-; CHECK: fmadd
-; CHECK-NEXT: fmadd
-; CHECK-NEXT: blr
+; CHECK-SAFE-LABEL: test_FMADD_ASSOC2:
+; CHECK-SAFE: fmul
+; CHECK-SAFE-NEXT: fmadd
+; CHECK-SAFE-NEXT: fadd
+; CHECK-SAFE-NEXT: blr
-; CHECK-VSX-LABEL: test_FMADD_ASSOC2:
-; CHECK-VSX: xsmaddmdp
-; CHECK-VSX-NEXT: xsmaddadp
-; CHECK-VSX-NEXT: fmr
-; CHECK-VSX-NEXT: blr
+; CHECK-UNSAFE-LABEL: test_FMADD_ASSOC2:
+; CHECK-UNSAFE: fmadd
+; CHECK-UNSAFE-NEXT: fmadd
+; CHECK-UNSAFE-NEXT: blr
+
+; CHECK-VSX-SAFE-LABEL: test_FMADD_ASSOC2:
+; CHECK-VSX-SAFE: xsmuldp
+; CHECK-VSX-SAFE-NEXT: xsmaddadp
+; CHECK-VSX-SAFE-NEXT: xsadddp
+; CHECK-VSX-SAFE-NEXT: blr
+
+; CHECK-VSX-UNSAFE-LABEL: test_FMADD_ASSOC2:
+; CHECK-VSX-UNSAFE: xsmaddmdp
+; CHECK-VSX-UNSAFE-NEXT: xsmaddadp
+; CHECK-VSX-UNSAFE-NEXT: fmr
+; CHECK-VSX-UNSAFE-NEXT: blr
}
define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
@@ -46,16 +72,28 @@ define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
%H = fadd double %F, %G ; <double> [#uses=1]
%I = fsub double %H, %E ; <double> [#uses=1]
ret double %I
-; CHECK-LABEL: test_FMSUB_ASSOC1:
-; CHECK: fmsub
-; CHECK-NEXT: fmadd
-; CHECK-NEXT: blr
+; CHECK-SAFE-LABEL: test_FMSUB_ASSOC1:
+; CHECK-SAFE: fmul
+; CHECK-SAFE-NEXT: fmadd
+; CHECK-SAFE-NEXT: fsub
+; CHECK-SAFE-NEXT: blr
-; CHECK-VSX-LABEL: test_FMSUB_ASSOC1:
-; CHECK-VSX: xsmsubmdp
-; CHECK-VSX-NEXT: xsmaddadp
-; CHECK-VSX-NEXT: fmr
-; CHECK-VSX-NEXT: blr
+; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC1:
+; CHECK-UNSAFE: fmsub
+; CHECK-UNSAFE-NEXT: fmadd
+; CHECK-UNSAFE-NEXT: blr
+
+; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC1:
+; CHECK-SAFE-VSX: xsmuldp
+; CHECK-SAFE-VSX-NEXT: xsmaddadp
+; CHECK-SAFE-VSX-NEXT: xssubdp
+; CHECK-SAFE-VSX-NEXT: blr
+
+; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC1:
+; CHECK-UNSAFE-VSX: xsmsubmdp
+; CHECK-UNSAFE-VSX-NEXT: xsmaddadp
+; CHECK-UNSAFE-VSX-NEXT: fmr
+; CHECK-UNSAFE-VSX-NEXT: blr
}
define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
@@ -65,16 +103,28 @@ define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
%H = fadd double %F, %G ; <double> [#uses=1]
%I = fsub double %E, %H ; <double> [#uses=1]
ret double %I
-; CHECK-LABEL: test_FMSUB_ASSOC2:
-; CHECK: fnmsub
-; CHECK-NEXT: fnmsub
-; CHECK-NEXT: blr
+; CHECK-SAFE-LABEL: test_FMSUB_ASSOC2:
+; CHECK-SAFE: fmul
+; CHECK-SAFE-NEXT: fmadd
+; CHECK-SAFE-NEXT: fsub
+; CHECK-SAFE-NEXT: blr
-; CHECK-VSX-LABEL: test_FMSUB_ASSOC2:
-; CHECK-VSX: xsnmsubmdp
-; CHECK-VSX-NEXT: xsnmsubadp
-; CHECK-VSX-NEXT: fmr
-; CHECK-VSX-NEXT: blr
+; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC2:
+; CHECK-UNSAFE: fnmsub
+; CHECK-UNSAFE-NEXT: fnmsub
+; CHECK-UNSAFE-NEXT: blr
+
+; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC2:
+; CHECK-SAFE-VSX: xsmuldp
+; CHECK-SAFE-VSX-NEXT: xsmaddadp
+; CHECK-SAFE-VSX-NEXT: xssubdp
+; CHECK-SAFE-VSX-NEXT: blr
+
+; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC2:
+; CHECK-UNSAFE-VSX: xsnmsubmdp
+; CHECK-UNSAFE-VSX-NEXT: xsnmsubadp
+; CHECK-UNSAFE-VSX-NEXT: fmr
+; CHECK-UNSAFE-VSX-NEXT: blr
}
define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C,
OpenPOWER on IntegriCloud