diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/haddsub-undef.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/phaddsub-undef.ll | 56 | 
3 files changed, 31 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 950e0f4c8e7..f7f3d38f68e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8581,6 +8581,22 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,    else if (V1.getValueSizeInBits() < Width)      V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width); +  unsigned NumElts = VT.getVectorNumElements(); +  APInt DemandedElts = APInt::getAllOnesValue(NumElts); +  for (unsigned i = 0; i != NumElts; ++i) +    if (BV->getOperand(i).isUndef()) +      DemandedElts.clearBit(i); + +  // If we don't need the upper xmm, then perform as a xmm hop. +  unsigned HalfNumElts = NumElts / 2; +  if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) { +    MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), HalfNumElts); +    V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128); +    V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128); +    SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1); +    return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256); +  } +    return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);  } diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll index a5bfaaf566c..c8eb9d7d1c3 100644 --- a/llvm/test/CodeGen/X86/haddsub-undef.ll +++ b/llvm/test/CodeGen/X86/haddsub-undef.ll @@ -247,7 +247,7 @@ define <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) {  ;  ; AVX-LABEL: test10_undef:  ; AVX:       # %bb.0: -; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 +; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0  ; AVX-NEXT:    retq    %vecext = extractelement <8 x float> %a, i32 0    %vecext1 = extractelement <8 x float> %a, i32 1 @@ -300,7 +300,7 @@ define <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) {  ;  ; AVX-LABEL: test12_undef:  ; AVX:       # %bb.0: -; AVX-NEXT:    vhaddps %ymm0, %ymm0, %ymm0 +; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0  ; AVX-NEXT:    retq    %vecext = extractelement <8 x float> %a, i32 0    %vecext1 = extractelement <8 x float> %a, i32 1 diff --git a/llvm/test/CodeGen/X86/phaddsub-undef.ll b/llvm/test/CodeGen/X86/phaddsub-undef.ll index 6fffbefa427..b0be5c7eede 100644 --- a/llvm/test/CodeGen/X86/phaddsub-undef.ll +++ b/llvm/test/CodeGen/X86/phaddsub-undef.ll @@ -16,20 +16,10 @@ define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) {  ; SSE-NEXT:    phaddd %xmm2, %xmm0  ; SSE-NEXT:    retq  ; -; AVX1-LABEL: test14_undef: -; AVX1:       # %bb.0: -; AVX1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT:    retq -; -; AVX2-LABEL: test14_undef: -; AVX2:       # %bb.0: -; AVX2-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT:    retq -; -; AVX512-LABEL: test14_undef: -; AVX512:       # %bb.0: -; AVX512-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT:    retq +; AVX-LABEL: test14_undef: +; AVX:       # %bb.0: +; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT:    retq    %vecext = extractelement <8 x i32> %a, i32 0    %vecext1 = extractelement <8 x i32> %a, i32 1    %add = add i32 %vecext, %vecext1 @@ -149,20 +139,10 @@ define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) {  ; SSE-NEXT:    phaddd %xmm0, %xmm0  ; SSE-NEXT:    retq  ; -; AVX1-LABEL: test16_undef: -; AVX1:       # %bb.0: -; AVX1-NEXT:    vphaddd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT:    retq -; -; AVX2-LABEL: test16_undef: -; AVX2:       # %bb.0: -; AVX2-NEXT:    vphaddd %ymm0, %ymm0, %ymm0 -; AVX2-NEXT:    retq -; -; AVX512-LABEL: test16_undef: -; AVX512:       # %bb.0: -; AVX512-NEXT:    vphaddd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT:    retq +; AVX-LABEL: test16_undef: +; AVX:       # %bb.0: +; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0 +; AVX-NEXT:    retq    %vecext = extractelement <8 x i32> %a, i32 0    %vecext1 = extractelement <8 x i32> %a, i32 1    %add = add i32 %vecext, %vecext1 @@ -180,20 +160,10 @@ define <16 x i32> @test16_v16i32_undef(<16 x i32> %a, <16 x i32> %b) {  ; SSE-NEXT:    phaddd %xmm0, %xmm0  ; SSE-NEXT:    retq  ; -; AVX1-LABEL: test16_v16i32_undef: -; AVX1:       # %bb.0: -; AVX1-NEXT:    vphaddd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT:    retq -; -; AVX2-LABEL: test16_v16i32_undef: -; AVX2:       # %bb.0: -; AVX2-NEXT:    vphaddd %ymm0, %ymm0, %ymm0 -; AVX2-NEXT:    retq -; -; AVX512-LABEL: test16_v16i32_undef: -; AVX512:       # %bb.0: -; AVX512-NEXT:    vphaddd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT:    retq +; AVX-LABEL: test16_v16i32_undef: +; AVX:       # %bb.0: +; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0 +; AVX-NEXT:    retq    %vecext = extractelement <16 x i32> %a, i32 0    %vecext1 = extractelement <16 x i32> %a, i32 1    %add = add i32 %vecext, %vecext1 @@ -268,7 +238,7 @@ define <16 x i32> @test17_v16i32_undef(<16 x i32> %a, <16 x i32> %b) {  ; AVX512-LABEL: test17_v16i32_undef:  ; AVX512:       # %bb.0:  ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT:    vphaddd %xmm1, %xmm0, %xmm0  ; AVX512-NEXT:    retq    %vecext = extractelement <16 x i32> %a, i32 0    %vecext1 = extractelement <16 x i32> %a, i32 1  | 

