diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-05-14 11:46:26 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-05-14 11:46:26 +0000 |
commit | f3ee9c699796336bc17934e20ebc84a089d1a7e8 (patch) | |
tree | 57f947cfa06c23551d53ddf31f568cec66199669 | |
parent | f3e87ac5f0849a82a7b19d03ddb5e087f8850cd2 (diff) | |
download | bcm5719-llvm-f3ee9c699796336bc17934e20ebc84a089d1a7e8.tar.gz bcm5719-llvm-f3ee9c699796336bc17934e20ebc84a089d1a7e8.zip |
[X86][AVX] Allow 32-bit targets to peek through subvectors to extract constant splats for vXi64 shifts.
llvm-svn: 303009
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/known-signbits-vector.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-ashr-256.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-lshr-256.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 10 |
5 files changed, 27 insertions, 34 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1f5b94ff8fa..155564ef3ac 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21888,10 +21888,19 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } // Special case in 32-bit mode, where i64 is expanded into high and low parts. + // TODO: Replace constant extraction with getTargetConstantBitsFromNode. if (!Subtarget.is64Bit() && !Subtarget.hasXOP() && (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) || (Subtarget.hasAVX512() && VT == MVT::v8i64))) { + // AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant. + unsigned SubVectorScale = 1; + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SubVectorScale = + Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits(); + Amt = Amt.getOperand(0); + } + // Peek through any splat that was introduced for i64 shift vectorization. int SplatIndex = -1; if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode())) @@ -21908,7 +21917,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / - VT.getVectorNumElements(); + (SubVectorScale * VT.getVectorNumElements()); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index d526b5dd786..ec620b8ce87 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -176,16 +176,9 @@ define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 ; X32-NEXT: vpsrad $16, %xmm0, %xmm1 ; X32-NEXT: vpsrlq $16, %xmm0, %xmm0 ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [16,0,16,0,16,0,16,0] -; X32-NEXT: vextractf128 $1, %ymm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] -; X32-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; X32-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; X32-NEXT: vpsrlq $16, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X32-NEXT: vcvtdq2pd %xmm0, %xmm0 -; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: signbits_ashr_concat_ashr_extract_sitofp: diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index eba53997615..09e143ddcd4 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1719,19 +1719,14 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { ; ; X32-AVX1-LABEL: splatconstant_shift_v4i64: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,7,0,7,0,7,0] -; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] -; X32-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4 -; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 -; X32-AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm2 -; X32-AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; X32-AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2 -; X32-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3 -; X32-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; X32-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; X32-AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X32-AVX1-NEXT: vpsrad $7, %xmm1, %xmm2 +; X32-AVX1-NEXT: vpsrlq $7, %xmm1, %xmm1 +; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X32-AVX1-NEXT: vpsrad $7, %xmm0, %xmm2 +; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 +; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX2-LABEL: splatconstant_shift_v4i64: diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index d342292f253..46be36b76e9 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -1355,12 +1355,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { ; ; X32-AVX1-LABEL: splatconstant_shift_v4i64: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,7,0,7,0,7,0] -; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm2 -; X32-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm1 +; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0 +; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX2-LABEL: splatconstant_shift_v4i64: diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index 4e5ff0e9cee..4a134f440a7 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -1195,12 +1195,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { ; ; X32-AVX1-LABEL: splatconstant_shift_v4i64: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,7,0,7,0,7,0] -; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2 -; X32-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; X32-AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 +; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X32-AVX1-NEXT: vpsllq $7, %xmm0, %xmm0 +; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX2-LABEL: splatconstant_shift_v4i64: |