summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-12-18 14:24:20 -0800
committerCraig Topper <craig.topper@intel.com>2019-12-18 14:42:56 -0800
commitf0df4218b67d0abe96867804b8932b9b88998f51 (patch)
tree9b5fd2d7f5ca195edc96855901ed5cf68c9dd062
parent6bf7c345f82a06107a541db80f1ae128844c307c (diff)
downloadbcm5719-llvm-f0df4218b67d0abe96867804b8932b9b88998f51.tar.gz
bcm5719-llvm-f0df4218b67d0abe96867804b8932b9b88998f51.zip
[X86] Add a simple hack to IsProfitableToFold to prevent vselect+strict fp operations from being folded into masked instructions.
We really need to update the isel patterns to prevent this, but that requires some tablegen de-tangling. So this hack will work for correctness in the short term.
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp6
-rw-r--r--llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll3
2 files changed, 8 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d60e08c409c..592e6484207 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -580,6 +580,12 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (!N.hasOneUse())
return false;
+ // FIXME: Temporary hack to prevent strict floating point nodes from
+ // folding into masked operations illegally.
+ if (U == Root && Root->getOpcode() == ISD::VSELECT &&
+ N.getOpcode() != ISD::LOAD && N.getOpcode() != X86ISD::VBROADCAST_LOAD)
+ return false;
+
if (N.getOpcode() != ISD::LOAD)
return true;
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 82c798ab170..27bc9c13d4d 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -7317,7 +7317,8 @@ define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i3
; AVX512-LABEL: vpaddd_mask_test:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestmd %zmm2, %zmm2, %k1
-; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %i, <16 x float> %j, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
OpenPOWER on IntegriCloud