diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-09-08 00:43:52 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-09-08 00:43:52 +0000 |
| commit | 37dd59298fd46e28ae2b2569465c5195d5708a0a (patch) | |
| tree | bbcaaaeff89f42ad2c185432251944116dc9e576 /llvm/lib | |
| parent | 5bd4a4806aafc31c67a55c7d8b2993c879dc5bc2 (diff) | |
| download | bcm5719-llvm-37dd59298fd46e28ae2b2569465c5195d5708a0a.tar.gz bcm5719-llvm-37dd59298fd46e28ae2b2569465c5195d5708a0a.zip | |
[X86] Make getZeroVector return floating point vectors in their native type on SSE2 and later.
isel used to require zero vectors to be canonicalized to a single
type to minimize the number of patterns needed to match. This is
no longer required.
I plan to do this to integers too, but floating point was simpler
to start with. Integer has a complication where v32i16/v64i8 aren't
legal when the other 512-bit integer types are.
llvm-svn: 371325
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 11 |
3 files changed, 23 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8e45651bcab..552c91703a7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5403,6 +5403,8 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, SDValue Vec; if (!Subtarget.hasSSE2() && VT.is128BitVector()) { Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); + } else if (VT.isFloatingPoint()) { + Vec = DAG.getConstantFP(+0.0, dl, VT); } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 31a46e68a86..94108402ac9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -412,6 +412,11 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; +} + // Alias instructions that allow VPTERNLOG to be used with a mask to create // a mix of all ones and all zeros elements. This is done this way to force // the same register to be used as input for all three sources. @@ -436,6 +441,13 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", [(set VR256X:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; +} + // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index b3c88982a39..1626228b06b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -128,13 +128,15 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, SchedRW = [WriteZero] in { + isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } -let Predicates = [NoAVX512] in +let Predicates = [NoAVX512] in { def : Pat<(v4i32 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +} // The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, @@ -147,6 +149,11 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [NoAVX512] in { +def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; +} + // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, |

