diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/phaddsub-undef.ll | 24 |
2 files changed, 14 insertions, 27 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d4c2b108537..ed74d429606 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7183,9 +7183,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, } // Find all zeroable elements. - std::bitset<4> Zeroable; - for (int i=0; i < 4; ++i) { - SDValue Elt = Op->getOperand(i); + std::bitset<4> Zeroable, Undefs; + for (int i = 0; i < 4; ++i) { + SDValue Elt = Op.getOperand(i); + Undefs[i] = Elt.isUndef(); Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt)); } assert(Zeroable.size() - Zeroable.count() > 1 && @@ -7195,10 +7196,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, // zeroable or extract_vector_elt with constant index. SDValue FirstNonZero; unsigned FirstNonZeroIdx; - for (unsigned i=0; i < 4; ++i) { + for (unsigned i = 0; i < 4; ++i) { if (Zeroable[i]) continue; - SDValue Elt = Op->getOperand(i); + SDValue Elt = Op.getOperand(i); if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa<ConstantSDNode>(Elt.getOperand(1))) return SDValue(); @@ -7237,10 +7238,12 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, if (EltIdx == 4) { // Let the shuffle legalizer deal with blend operations. - SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op)); + SDValue VZeroOrUndef = (Zeroable == Undefs) + ? DAG.getUNDEF(VT) + : getZeroVector(VT, Subtarget, DAG, SDLoc(Op)); if (V1.getSimpleValueType() != VT) V1 = DAG.getBitcast(VT, V1); - return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask); + return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask); } // See if we can lower this build_vector to a INSERTPS. diff --git a/llvm/test/CodeGen/X86/phaddsub-undef.ll b/llvm/test/CodeGen/X86/phaddsub-undef.ll index fed7ec80d81..b0be5c7eede 100644 --- a/llvm/test/CodeGen/X86/phaddsub-undef.ll +++ b/llvm/test/CodeGen/X86/phaddsub-undef.ll @@ -160,26 +160,10 @@ define <16 x i32> @test16_v16i32_undef(<16 x i32> %a, <16 x i32> %b) { ; SSE-NEXT: phaddd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX-SLOW-LABEL: test16_v16i32_undef: -; AVX-SLOW: # %bb.0: -; AVX-SLOW-NEXT: vphaddd %xmm0, %xmm0, %xmm0 -; AVX-SLOW-NEXT: retq -; -; AVX1-FAST-LABEL: test16_v16i32_undef: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: retq -; -; AVX2-FAST-LABEL: test16_v16i32_undef: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: retq -; -; AVX512-FAST-LABEL: test16_v16i32_undef: -; AVX512-FAST: # %bb.0: -; AVX512-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 -; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; AVX512-FAST-NEXT: retq +; AVX-LABEL: test16_v16i32_undef: +; AVX: # %bb.0: +; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq %vecext = extractelement <16 x i32> %a, i32 0 %vecext1 = extractelement <16 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 |