diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-11-10 00:26:42 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-11-10 00:26:42 +0000 |
| commit | 0364085281468a436c4d287ffd1565249402e5f9 (patch) | |
| tree | 7b765f3a2d54cad270a6ebfbf0fb8c39c38c6fc6 /llvm/lib/Target | |
| parent | ff6a4edc26b59dca90ec89a3ff5bd8c1ac1e22c7 (diff) | |
| download | bcm5719-llvm-0364085281468a436c4d287ffd1565249402e5f9.tar.gz bcm5719-llvm-0364085281468a436c4d287ffd1565249402e5f9.zip | |
[X86] In LowerHorizontalByteSum, emit vector_shuffle nodes instead of directly using X86ISD::UNPCKL/X86ISD::UNPCKH.
This gives shuffle lowering the freedom to use zero_extend_vector_inreg for the unpckl shuffle. Shuffle combining usually makes this swap later, but not when AVX512 is enabled it seems.
While there also use DAG.getConstant to create a 0 vector instead of using the helper the forces a specific BUILD_VECTOR. I don't think that helper is usually needed. We're basically free to create a constant build_vector anytime and it will be legalized on its own.
llvm-svn: 346574
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index edefa158376..20faa8e3132 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25038,7 +25038,7 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, // PSADBW instruction horizontally add all bytes and leave the result in i64 // chunks, thus directly computes the pop count for v2i64 and v4i64. if (EltVT == MVT::i64) { - SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); + SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT); MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); return DAG.getBitcast(VT, V); @@ -25050,13 +25050,13 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, // this is that it lines up the results of two PSADBW instructions to be // two v2i64 vectors which concatenated are the 4 population counts. We can // then use PACKUSWB to shrink and concatenate them into a v4i32 again. - SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL); + SDValue Zeros = DAG.getConstant(0, DL, VT); SDValue V32 = DAG.getBitcast(VT, V); - SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros); - SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros); + SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros); + SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros); // Do the horizontal sums into two v2i64s. - Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); + Zeros = DAG.getConstant(0, DL, ByteVecVT); MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, Low), Zeros); |

