diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-12-16 19:31:36 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-12-16 19:31:36 +0000 |
| commit | 1260a4e826ecff1ba6a9076336fb735cc90dcd76 (patch) | |
| tree | 5bf3626d9e3f991fd45d00079f09b428ea1d1e0b | |
| parent | 5029d676f82782c36b2217a542286eb1773756d5 (diff) | |
| download | bcm5719-llvm-1260a4e826ecff1ba6a9076336fb735cc90dcd76.tar.gz bcm5719-llvm-1260a4e826ecff1ba6a9076336fb735cc90dcd76.zip | |
[X86] When using vpopcntdq for ctpop of v8i16 vectors, only promote to v8i32.
Previously we promoted to v8i64, but we don't need to go all the way to 512-bits. If we have VLX we can use the 256-bit instruction. And even if we don't have VLX we can widen v8i32 to v16i32 and drop the upper half.
llvm-svn: 320926
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-popcnt-128.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-tzcnt-128.ll | 26 |
3 files changed, 28 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index de6e721c76c..ffbcd4537ec 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23765,14 +23765,13 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, // TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions. if (Subtarget.hasVPOPCNTDQ()) { - if (VT == MVT::v8i16) { - Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i64, Op0); - Op = DAG.getNode(ISD::CTPOP, DL, MVT::v8i64, Op); - return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); - } - if (VT == MVT::v16i8 || VT == MVT::v16i16) { - Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v16i32, Op0); - Op = DAG.getNode(ISD::CTPOP, DL, MVT::v16i32, Op); + unsigned NumElems = VT.getVectorNumElements(); + assert((VT.getVectorElementType() == MVT::i8 || + VT.getVectorElementType() == MVT::i16) && "Unexpected type"); + if (NumElems <= 16) { + MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); + Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0); + Op = DAG.getNode(ISD::CTPOP, DL, NewVT, Op); return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); } } diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll index 07d1188b6a4..e3cb8f5b46a 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll @@ -453,17 +453,18 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; ; AVX512VPOPCNTDQ-LABEL: testv8i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: testv8i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll index d1f8cb2bed8..dfb0adefe1d 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll @@ -1131,9 +1131,10 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX512VPOPCNTDQ-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; @@ -1144,9 +1145,9 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; @@ -1326,9 +1327,10 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX512VPOPCNTDQ-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; @@ -1339,9 +1341,9 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; |

