summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-10 11:23:51 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-10 11:23:51 +0000
commit16d11785a5fbd982cf81b1585603d576d743f576 (patch)
treed43c294f1622ccda5f8fb474123ba8c1e9d17b33
parent1ecd740cf0b99a7c2ac79fdfdf08338588296910 (diff)
downloadbcm5719-llvm-16d11785a5fbd982cf81b1585603d576d743f576.tar.gz
bcm5719-llvm-16d11785a5fbd982cf81b1585603d576d743f576.zip
[X86][SSE] Basic combining of unary target shuffles of binary target shuffles.
This patch reorders the combining of target shuffle masks so that when a unary shuffle takes a binary shuffle as its input but only references one of its inputs it can correctly combine into a unary shuffle mask. This is starting to encroach on the purpose of resolveTargetShuffleInputs, but I don't want to remove it until we definitely know we won't need it for full binary shuffle combining. There is a lot more work before we can properly support binary target shuffle masks but this was an easy case to add support for. Differential Revision: http://reviews.llvm.org/D17858 llvm-svn: 263102
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp30
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll9
3 files changed, 25 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b4823718a77..3ae5136fe90 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5266,8 +5266,7 @@ static bool setTargetShuffleZeroElements(SDValue N,
/// remaining input indices in case we now have a unary shuffle and adjust the
/// Op0/Op1 inputs accordingly.
/// Returns true if the target shuffle mask was decoded.
-static bool resolveTargetShuffleInputs(SDValue Op, bool &IsUnary, SDValue &Op0,
- SDValue &Op1,
+static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1,
SmallVectorImpl<int> &Mask) {
SmallVector<SDValue, 2> Ops;
if (!setTargetShuffleZeroElements(Op, Mask, Ops))
@@ -5282,10 +5281,6 @@ static bool resolveTargetShuffleInputs(SDValue Op, bool &IsUnary, SDValue &Op0,
Op0 = Op0InUse ? Ops[0] : SDValue();
Op1 = Op1InUse ? Ops[1] : SDValue();
- IsUnary = !(Op0InUse && Op1InUse);
-
- if (!IsUnary)
- return true;
// We're only using Op1 - commute the mask and inputs.
if (!Op0InUse && Op1InUse) {
@@ -24036,14 +24031,9 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
"Can only combine shuffles of the same vector register size.");
// Extract target shuffle mask and resolve sentinels and inputs.
- bool IsUnary;
SDValue Input0, Input1;
SmallVector<int, 16> OpMask;
- if (!resolveTargetShuffleInputs(Op, IsUnary, Input0, Input1, OpMask))
- return false;
-
- // At the moment we can only combine target shuffle unary cases.
- if (!IsUnary)
+ if (!resolveTargetShuffleInputs(Op, Input0, Input1, OpMask))
return false;
assert(VT.getVectorNumElements() == OpMask.size() &&
@@ -24103,8 +24093,24 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
Subtarget, DAG, SDLoc(Root)));
return true;
}
+
+ int MaskSize = Mask.size();
+ bool UseInput0 = std::any_of(Mask.begin(), Mask.end(),
+ [MaskSize](int Idx) { return 0 <= Idx && Idx < MaskSize; });
+ bool UseInput1 = std::any_of(Mask.begin(), Mask.end(),
+ [MaskSize](int Idx) { return MaskSize <= Idx; });
+
+ // At the moment we can only combine unary shuffle mask cases.
+ if (UseInput0 && UseInput1)
+ return false;
+ else if (UseInput1) {
+ std::swap(Input0, Input1);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ }
+
assert(Input0 && "Shuffle with no inputs detected");
+ // TODO - generalize this to support any variable mask shuffle.
HasPSHUFB |= (Op.getOpcode() == X86ISD::PSHUFB);
// See if we can recurse into Input0 (if it's a target shuffle).
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 5d970e11e5b..03973661ce0 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -929,18 +929,14 @@ define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
;
; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
; SSE41: # BB#0:
-; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
index aecd45ac3f5..f66f0d4dc0a 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -63,13 +63,11 @@ define <4 x float> @combine_pshufb_movsldup(<4 x float> %a0) {
define <16 x i8> @combine_unpckl_arg0_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: combine_unpckl_arg0_pshufb:
; SSE: # BB#0:
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: combine_unpckl_arg0_pshufb:
; AVX: # BB#0:
-; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
; AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -80,14 +78,13 @@ define <16 x i8> @combine_unpckl_arg0_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: combine_unpckl_arg1_pshufb:
; SSE: # BB#0:
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_unpckl_arg1_pshufb:
; AVX: # BB#0:
-; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero
; AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1>)
OpenPOWER on IntegriCloud