summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp17
-rw-r--r--llvm/test/CodeGen/X86/phaddsub-undef.ll24
2 files changed, 14 insertions, 27 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d4c2b108537..ed74d429606 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7183,9 +7183,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
}
// Find all zeroable elements.
- std::bitset<4> Zeroable;
- for (int i=0; i < 4; ++i) {
- SDValue Elt = Op->getOperand(i);
+ std::bitset<4> Zeroable, Undefs;
+ for (int i = 0; i < 4; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ Undefs[i] = Elt.isUndef();
Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
}
assert(Zeroable.size() - Zeroable.count() > 1 &&
@@ -7195,10 +7196,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
// zeroable or extract_vector_elt with constant index.
SDValue FirstNonZero;
unsigned FirstNonZeroIdx;
- for (unsigned i=0; i < 4; ++i) {
+ for (unsigned i = 0; i < 4; ++i) {
if (Zeroable[i])
continue;
- SDValue Elt = Op->getOperand(i);
+ SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Elt.getOperand(1)))
return SDValue();
@@ -7237,10 +7238,12 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
if (EltIdx == 4) {
// Let the shuffle legalizer deal with blend operations.
- SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
+ SDValue VZeroOrUndef = (Zeroable == Undefs)
+ ? DAG.getUNDEF(VT)
+ : getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
if (V1.getSimpleValueType() != VT)
V1 = DAG.getBitcast(VT, V1);
- return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
+ return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask);
}
// See if we can lower this build_vector to a INSERTPS.
diff --git a/llvm/test/CodeGen/X86/phaddsub-undef.ll b/llvm/test/CodeGen/X86/phaddsub-undef.ll
index fed7ec80d81..b0be5c7eede 100644
--- a/llvm/test/CodeGen/X86/phaddsub-undef.ll
+++ b/llvm/test/CodeGen/X86/phaddsub-undef.ll
@@ -160,26 +160,10 @@ define <16 x i32> @test16_v16i32_undef(<16 x i32> %a, <16 x i32> %b) {
; SSE-NEXT: phaddd %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX-SLOW-LABEL: test16_v16i32_undef:
-; AVX-SLOW: # %bb.0:
-; AVX-SLOW-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX-SLOW-NEXT: retq
-;
-; AVX1-FAST-LABEL: test16_v16i32_undef:
-; AVX1-FAST: # %bb.0:
-; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX1-FAST-NEXT: retq
-;
-; AVX2-FAST-LABEL: test16_v16i32_undef:
-; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX2-FAST-NEXT: retq
-;
-; AVX512-FAST-LABEL: test16_v16i32_undef:
-; AVX512-FAST: # %bb.0:
-; AVX512-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX512-FAST-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
-; AVX512-FAST-NEXT: retq
+; AVX-LABEL: test16_v16i32_undef:
+; AVX: # %bb.0:
+; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
%vecext = extractelement <16 x i32> %a, i32 0
%vecext1 = extractelement <16 x i32> %a, i32 1
%add = add i32 %vecext, %vecext1
OpenPOWER on IntegriCloud