[X86][SSE] Add getHopForBuildVector vector splitting

If we only use the lower xmm of a ymm hop, then extract the xmm's (for free), perform the xmm hop and then insert back into a ymm (for free). Fixes some of the regressions noted in D61782 llvm-svn: 360435
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-05-10 15:46:04 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-05-10 15:46:04 +0000
commit: a0b1518a4a5738765619fd09a58d12573163ec79 (patch)
tree: c62046eb659ea07eb136c783f67ee91f2cd48be0
parent: 7a41cd5b88849bc18b9d0a3971b4f9f551a6bd72 (diff)
download: bcm5719-llvm-a0b1518a4a5738765619fd09a58d12573163ec79.tar.gz
bcm5719-llvm-a0b1518a4a5738765619fd09a58d12573163ec79.zip
3 files changed, 31 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 950e0f4c8e7..f7f3d38f68e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8581,6 +8581,22 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
   else if (V1.getValueSizeInBits() < Width)
     V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
 
+  unsigned NumElts = VT.getVectorNumElements();
+  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i)
+    if (BV->getOperand(i).isUndef())
+      DemandedElts.clearBit(i);
+
+  // If we don't need the upper xmm, then perform as a xmm hop.
+  unsigned HalfNumElts = NumElts / 2;
+  if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {
+    MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), HalfNumElts);
+    V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128);
+    V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128);
+    SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1);
+    return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256);
+  }
+
   return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
 }
 
diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll
index a5bfaaf566c..c8eb9d7d1c3 100644
--- a/llvm/test/CodeGen/X86/haddsub-undef.ll
+++ b/llvm/test/CodeGen/X86/haddsub-undef.ll
@@ -247,7 +247,7 @@ define <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) {
 ;
 ; AVX-LABEL: test10_undef:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %vecext = extractelement <8 x float> %a, i32 0
   %vecext1 = extractelement <8 x float> %a, i32 1
@@ -300,7 +300,7 @@ define <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) {
 ;
 ; AVX-LABEL: test12_undef:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vhaddps %ymm0, %ymm0, %ymm0
+; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %vecext = extractelement <8 x float> %a, i32 0
   %vecext1 = extractelement <8 x float> %a, i32 1
diff --git a/llvm/test/CodeGen/X86/phaddsub-undef.ll b/llvm/test/CodeGen/X86/phaddsub-undef.ll
index 6fffbefa427..b0be5c7eede 100644
--- a/llvm/test/CodeGen/X86/phaddsub-undef.ll
+++ b/llvm/test/CodeGen/X86/phaddsub-undef.ll
@@ -16,20 +16,10 @@ define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) {
 ; SSE-NEXT:    phaddd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: test14_undef:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: test14_undef:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: test14_undef:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: test14_undef:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %vecext = extractelement <8 x i32> %a, i32 0
   %vecext1 = extractelement <8 x i32> %a, i32 1
   %add = add i32 %vecext, %vecext1
@@ -149,20 +139,10 @@ define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) {
 ; SSE-NEXT:    phaddd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: test16_undef:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: test16_undef:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vphaddd %ymm0, %ymm0, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: test16_undef:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vphaddd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: test16_undef:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %vecext = extractelement <8 x i32> %a, i32 0
   %vecext1 = extractelement <8 x i32> %a, i32 1
   %add = add i32 %vecext, %vecext1
@@ -180,20 +160,10 @@ define <16 x i32> @test16_v16i32_undef(<16 x i32> %a, <16 x i32> %b) {
 ; SSE-NEXT:    phaddd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: test16_v16i32_undef:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: test16_v16i32_undef:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vphaddd %ymm0, %ymm0, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: test16_v16i32_undef:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vphaddd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: test16_v16i32_undef:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %vecext = extractelement <16 x i32> %a, i32 0
   %vecext1 = extractelement <16 x i32> %a, i32 1
   %add = add i32 %vecext, %vecext1
@@ -268,7 +238,7 @@ define <16 x i32> @test17_v16i32_undef(<16 x i32> %a, <16 x i32> %b) {
 ; AVX512-LABEL: test17_v16i32_undef:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %vecext = extractelement <16 x i32> %a, i32 0
   %vecext1 = extractelement <16 x i32> %a, i32 1
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-05-10 15:46:04 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-05-10 15:46:04 +0000
commit	a0b1518a4a5738765619fd09a58d12573163ec79 (patch)
tree	c62046eb659ea07eb136c783f67ee91f2cd48be0
parent	7a41cd5b88849bc18b9d0a3971b4f9f551a6bd72 (diff)
download	bcm5719-llvm-a0b1518a4a5738765619fd09a58d12573163ec79.tar.gz bcm5719-llvm-a0b1518a4a5738765619fd09a58d12573163ec79.zip