diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-popcnt-128.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-tzcnt-128.ll | 4 | 
3 files changed, 8 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index edefa158376..20faa8e3132 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25038,7 +25038,7 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,    // PSADBW instruction horizontally add all bytes and leave the result in i64    // chunks, thus directly computes the pop count for v2i64 and v4i64.    if (EltVT == MVT::i64) { -    SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); +    SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT);      MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);      V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);      return DAG.getBitcast(VT, V); @@ -25050,13 +25050,13 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,      // this is that it lines up the results of two PSADBW instructions to be      // two v2i64 vectors which concatenated are the 4 population counts. We can      // then use PACKUSWB to shrink and concatenate them into a v4i32 again. -    SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL); +    SDValue Zeros = DAG.getConstant(0, DL, VT);      SDValue V32 = DAG.getBitcast(VT, V); -    SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros); -    SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros); +    SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros); +    SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros);      // Do the horizontal sums into two v2i64s. -    Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); +    Zeros = DAG.getConstant(0, DL, ByteVecVT);      MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);      Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,                        DAG.getBitcast(ByteVecVT, Low), Zeros); diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll index 16539f1b2d4..c91b2e111b6 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll @@ -308,7 +308,7 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {  ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]  ; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero  ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0  ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0  ; BITALG-NEXT:    retq diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll index 21142ff3970..8c32bf86700 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll @@ -633,7 +633,7 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {  ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]  ; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero  ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0  ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0  ; BITALG-NEXT:    retq @@ -876,7 +876,7 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {  ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1  ; BITALG-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]  ; BITALG-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; BITALG-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero  ; BITALG-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0  ; BITALG-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0  ; BITALG-NEXT:    retq  | 

