[X86] Specifically limit fmin/fmax commutativity to NoNaNs + NoSignedZeros

The backend UnsafeFPMath flag is not a superset of all the others, so limit it to the exact bits needed.
author: Benjamin Kramer <benny.kra@googlemail.com> 2019-11-05 19:12:44 +0100
committer: Benjamin Kramer <benny.kra@googlemail.com> 2019-11-05 19:34:06 +0100
commit: 00e53d912dd768047a4fdc6e0e9b3ac7f0bcc5e5 (patch)
tree: 06b93ec4c0b94d0fe9db986874f7fbf444a7869a
parent: e74c5b96610dfb03825d31035f50813af58beac5 (diff)
download: bcm5719-llvm-00e53d912dd768047a4fdc6e0e9b3ac7f0bcc5e5.tar.gz
bcm5719-llvm-00e53d912dd768047a4fdc6e0e9b3ac7f0bcc5e5.zip
5 files changed, 17 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cec04547e79..ea4d43bbc8e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41986,8 +41986,9 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
 static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
   assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
 
-  // Only perform optimizations if UnsafeMath is used.
-  if (!DAG.getTarget().Options.UnsafeFPMath)
+  // FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed.
+  if (!DAG.getTarget().Options.NoNaNsFPMath ||
+      !DAG.getTarget().Options.NoSignedZerosFPMath)
     return SDValue();
 
   // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
diff --git a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
index 00c9e4c957c..9282ed1ece4 100644
--- a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
+++ b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
@@ -1,5 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK_UNSAFE --check-prefix=AVX512F_UNSAFE
+; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK_UNSAFE --check-prefix=AVX512F_UNSAFE
+; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64 -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
 ; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
 
 define <16 x float> @test_max_v16f32(<16 x float> * %a_ptr, <16 x float> %b)  {
diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll
index 162cda8c867..22da63a083f 100644
--- a/llvm/test/CodeGen/X86/machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
 
 ; Incremental updates of the instruction depths should be enough for this test
 ; case.
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
 
 ; Verify that the first two adds are independent regardless of how the inputs are
 ; commuted. The destination registers are used as source registers for the third add.
diff --git a/llvm/test/CodeGen/X86/sse-minmax.ll b/llvm/test/CodeGen/X86/sse-minmax.ll
index 178a74ba546..0f703d69544 100644
--- a/llvm/test/CodeGen/X86/sse-minmax.ll
+++ b/llvm/test/CodeGen/X86/sse-minmax.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2                                                 | FileCheck %s --check-prefix=ALL --check-prefix=STRICT
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=UNSAFE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-nans-fp-math                         | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=FINITE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2                                                          | FileCheck %s --check-prefix=ALL --check-prefix=STRICT
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-signed-zeros-fp-math -enable-no-nans-fp-math  | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=UNSAFE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-nans-fp-math                                  | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=FINITE
 
 ; Some of these patterns can be matched as SSE min or max. Some of
 ; them can be matched provided that the operands are swapped.
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
index 39882cde108..9ad8e7b2bb6 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
@@ -2051,4 +2051,4 @@ define <16 x float> @stack_fold_permilpsvar_zmm_maskz(<16 x float> %a0, <16 x i3
 }
 
 attributes #0 = { "unsafe-fp-math"="false" }
-attributes #1 = { "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
author	Benjamin Kramer <benny.kra@googlemail.com>	2019-11-05 19:12:44 +0100
committer	Benjamin Kramer <benny.kra@googlemail.com>	2019-11-05 19:34:06 +0100
commit	00e53d912dd768047a4fdc6e0e9b3ac7f0bcc5e5 (patch)
tree	06b93ec4c0b94d0fe9db986874f7fbf444a7869a
parent	e74c5b96610dfb03825d31035f50813af58beac5 (diff)
download	bcm5719-llvm-00e53d912dd768047a4fdc6e0e9b3ac7f0bcc5e5.tar.gz bcm5719-llvm-00e53d912dd768047a4fdc6e0e9b3ac7f0bcc5e5.zip