[X86] Make getZeroVector return floating point vectors in their native type on SSE2 and later.

isel used to require zero vectors to be canonicalized to a single type to minimize the number of patterns needed to match. This is no longer required. I plan to do this to integers too, but floating point was simpler to start with. Integer has a complication where v32i16/v64i8 aren't legal when the other 512-bit integer types are. llvm-svn: 371325
author: Craig Topper <craig.topper@intel.com> 2019-09-08 00:43:52 +0000
committer: Craig Topper <craig.topper@intel.com> 2019-09-08 00:43:52 +0000
commit: 37dd59298fd46e28ae2b2569465c5195d5708a0a (patch)
tree: bbcaaaeff89f42ad2c185432251944116dc9e576 /llvm/lib
parent: 5bd4a4806aafc31c67a55c7d8b2993c879dc5bc2 (diff)
download: bcm5719-llvm-37dd59298fd46e28ae2b2569465c5195d5708a0a.tar.gz
bcm5719-llvm-37dd59298fd46e28ae2b2569465c5195d5708a0a.zip
3 files changed, 23 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8e45651bcab..552c91703a7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5403,6 +5403,8 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
   SDValue Vec;
   if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
     Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
+  } else if (VT.isFloatingPoint()) {
+    Vec = DAG.getConstantFP(+0.0, dl, VT);
   } else if (VT.getVectorElementType() == MVT::i1) {
     assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
            "Unexpected vector type");
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 31a46e68a86..94108402ac9 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -412,6 +412,11 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
 }
 
+let Predicates = [HasAVX512] in {
+def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
+}
+
 // Alias instructions that allow VPTERNLOG to be used with a mask to create
 // a mix of all ones and all zeros elements. This is done this way to force
 // the same register to be used as input for all three sources.
@@ -436,6 +441,13 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
 }
 
+let Predicates = [HasAVX512] in {
+def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
+}
+
 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
 // This is expanded by ExpandPostRAPseudos.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index b3c88982a39..1626228b06b 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -128,13 +128,15 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-zeros value if folding it would be beneficial.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1, SchedRW = [WriteZero] in {
+    isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
 def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
                [(set VR128:$dst, (v4f32 immAllZerosV))]>;
 }
 
-let Predicates = [NoAVX512] in
+let Predicates = [NoAVX512] in {
 def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+}
 
 
 // The same as done above but for AVX.  The 256-bit AVX1 ISA doesn't support PI,
@@ -147,6 +149,11 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
                  [(set VR256:$dst, (v8i32 immAllZerosV))]>;
 }
 
+let Predicates = [NoAVX512] in {
+def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
+}
+
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-ones value if folding it would be beneficial.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
author	Craig Topper <craig.topper@intel.com>	2019-09-08 00:43:52 +0000
committer	Craig Topper <craig.topper@intel.com>	2019-09-08 00:43:52 +0000
commit	37dd59298fd46e28ae2b2569465c5195d5708a0a (patch)
tree	bbcaaaeff89f42ad2c185432251944116dc9e576 /llvm/lib
parent	5bd4a4806aafc31c67a55c7d8b2993c879dc5bc2 (diff)
download	bcm5719-llvm-37dd59298fd46e28ae2b2569465c5195d5708a0a.tar.gz bcm5719-llvm-37dd59298fd46e28ae2b2569465c5195d5708a0a.zip