diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/insertelement-zero.ll | 2 |
3 files changed, 12 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f7fe3e8add4..80f1afe310f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35750,6 +35750,18 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, getZeroVector(OpVT, Subtarget, DAG, dl), SubVec2, Vec.getOperand(2)); + + // If we are inserting into both halves of the vector, the starting + // vector should be undef. If it isn't, make it so. Only do this if the + // the early insert has no other uses. + // TODO: Should this be a generic DAG combine? + if (!Vec.getOperand(0).isUndef() && Vec.hasOneUse()) { + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), + SubVec2, Vec.getOperand(2)); + DCI.AddToWorklist(Vec.getNode()); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, Idx); + + } } } diff --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll index f2b41f7c29e..a687d30d873 100644 --- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -758,7 +758,6 @@ define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind { ; AVX: # BB#0: ; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7] ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 diff --git a/llvm/test/CodeGen/X86/insertelement-zero.ll b/llvm/test/CodeGen/X86/insertelement-zero.ll index 8179758c4c0..dff7a69dc50 100644 --- a/llvm/test/CodeGen/X86/insertelement-zero.ll +++ b/llvm/test/CodeGen/X86/insertelement-zero.ll @@ -473,7 +473,6 @@ define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) { ; AVX1-NEXT: xorl %eax, %eax ; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] @@ -485,7 +484,6 @@ define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) { ; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 -; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] |