diff options
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-concatvectors.ll | 2 |
2 files changed, 12 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 166577e5faf..d9834cf298c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16518,11 +16518,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); - // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). - if (In->getOpcode() == ISD::BITCAST && - !In->getOperand(0).getValueType().isVector()) { - SDValue Scalar = In->getOperand(0); + SDValue Scalar = peekThroughOneUseBitcasts(In); + // concat_vectors(scalar_to_vector(scalar), undef) -> + // scalar_to_vector(scalar) + if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR && + Scalar.hasOneUse()) { + EVT SVT = Scalar.getValueType().getVectorElementType(); + if (SVT == Scalar.getOperand(0).getValueType()) + Scalar = Scalar.getOperand(0); + } + + // concat_vectors(scalar, undef) -> scalar_to_vector(scalar) + if (!Scalar.getValueType().isVector()) { // If the bitcast type isn't legal, it might be a trunc of a legal type; // look through the trunc so we can still do the transform: // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) diff --git a/llvm/test/CodeGen/X86/combine-concatvectors.ll b/llvm/test/CodeGen/X86/combine-concatvectors.ll index 89e8c03eca3..29cf6148203 100644 --- a/llvm/test/CodeGen/X86/combine-concatvectors.ll +++ b/llvm/test/CodeGen/X86/combine-concatvectors.ll @@ -5,8 +5,6 @@ define void @PR32957(<2 x float>* %in, <8 x float>* %out) { ; CHECK-LABEL: PR32957: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; CHECK-NEXT: vmovaps %ymm0, (%rsi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq |