diff options
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll | 22 |
2 files changed, 27 insertions, 16 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6bee966a327..310f5ef5dc7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3794,20 +3794,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { - // If the input vector is not legal, it is likely that we will not find a - // legal vector of the same size. Replace the concatenate vector with a - // nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); + EVT InVT = N->getOperand(0).getValueType(); SDLoc dl(N); + + // If the widen width for this operand is the same as the width of the concat + // and all but the first operand is undef, just use the widened operand. + unsigned NumOperands = N->getNumOperands(); + if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { + unsigned i; + for (i = 1; i < NumOperands; ++i) + if (!N->getOperand(i).isUndef()) + break; + + if (i == NumOperands) + return GetWidenedVector(N->getOperand(0)); + } + + // Otherwise, fall back to a nasty build vector. unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); - EVT InVT = N->getOperand(0).getValueType(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned Idx = 0; - unsigned NumOperands = N->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue InOp = N->getOperand(i); assert(getTypeAction(InOp.getValueType()) == diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 47649a54e80..1ccd586c453 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -1320,10 +1320,10 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] -; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] -; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: shufps $0, %xmm1, %xmm2 # encoding: [0x0f,0xc6,0xd1,0x00] +; X86-SSE-NEXT: # xmm2 = xmm2[0,0],xmm1[0,0] +; X86-SSE-NEXT: shufps $36, %xmm2, %xmm0 # encoding: [0x0f,0xc6,0xc2,0x24] +; X86-SSE-NEXT: # xmm0 = xmm0[0,1],xmm2[2,0] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_loadh_pi: @@ -1378,14 +1378,14 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { ; X86-SSE-LABEL: test_mm_loadl_pi: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] -; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] +; X86-SSE-NEXT: movss (%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x10] ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] -; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] -; X86-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3] +; X86-SSE-NEXT: movss 4(%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x48,0x04] +; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: shufps $0, %xmm2, %xmm1 # encoding: [0x0f,0xc6,0xca,0x00] +; X86-SSE-NEXT: # xmm1 = xmm1[0,0],xmm2[0,0] +; X86-SSE-NEXT: shufps $226, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe2] +; X86-SSE-NEXT: # xmm1 = xmm1[2,0],xmm0[2,3] ; X86-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; |