summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp270
-rw-r--r--llvm/test/CodeGen/X86/avx-basic.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx-splat.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll728
5 files changed, 716 insertions, 288 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2555132edc3..ff598835404 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18696,6 +18696,265 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Combine an arbitrary chain of shuffles into a single instruction if
+/// possible.
+///
+/// This is the leaf of the recursive combinine below. When we have found some
+/// chain of single-use x86 shuffle instructions and accumulated the combined
+/// shuffle mask represented by them, this will try to pattern match that mask
+/// into either a single instruction if there is a special purpose instruction
+/// for this operation, or into a PSHUFB instruction which is a fully general
+/// instruction but should only be used to replace chains over a certain depth.
+static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
+ int Depth, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
+
+ // Find the operand that enters the chain. Note that multiple uses are OK
+ // here, we're not going to remove the operand we find.
+ SDValue Input = Op.getOperand(0);
+ while (Input.getOpcode() == ISD::BITCAST)
+ Input = Input.getOperand(0);
+
+ MVT VT = Input.getSimpleValueType();
+ MVT RootVT = Root.getSimpleValueType();
+ SDLoc DL(Root);
+
+ // Just remove no-op shuffle masks.
+ if (Mask.size() == 1) {
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Input),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // Use the float domain if the operand type is a floatingc point type.
+ bool FloatDomain = VT.isFloatingPoint();
+
+ // If we don't have access to VEX encodings, the generic PSHUF instructions
+ // are preferable to some of the specialized forms despite requiring one more
+ // byte to encode because they can implicitly copy.
+ //
+ // IF we *do* have VEX encodings, than we can use shorter, more specific
+ // shuffle instructions freely as they can copy due to the extra register
+ // operand.
+ if (Subtarget->hasAVX()) {
+ // We have both floatincg point and integer variants of shuffles that dup
+ // either tho low or high half of the vector.
+ if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
+ bool Lo = Mask.equals(0, 0);
+ unsigned Shuffle = FloatDomain ? (Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS)
+ : (Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH);
+ MVT ShuffleVT = FloatDomain ? MVT::v4f32 : MVT::v2i64;
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // FIXME: We should match UNPCKLPS and UNPCKHPS here.
+
+ // For the integer domain we have specialized instructions for duplicating
+ // any element size from the low or high half.
+ if (!FloatDomain &&
+ (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3) ||
+ Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) ||
+ Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) ||
+ Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) ||
+ Mask.equals(8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15,
+ 15))) {
+ bool Lo = Mask[0] == 0;
+ MVT ShuffleVT;
+ switch (Mask.size()) {
+ case 4: ShuffleVT = MVT::v4i32; break;
+ case 8: ShuffleVT = MVT::v8i32; break;
+ case 16: ShuffleVT = MVT::v16i32; break;
+ };
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, ShuffleVT, Op,
+ Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+ }
+
+ // Bail if we have fewer than 3 shuffle instructions in the chain.
+ if (Depth < 3)
+ return false;
+
+ // If we have 3 or more shuffle instructions, we can replace them with
+ // a single PSHUFB instruction profitably. Intel's manuals suggest only using
+ // PSHUFB if doing so replacing 5 instructions, but in practice PSHUFB tends
+ // to be *very* fast so we're more aggressive.
+ if (Subtarget->hasSSSE3()) {
+ SmallVector<SDValue, 16> PSHUFBMask;
+ assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
+ int Ratio = 16 / Mask.size();
+ for (unsigned i = 0; i < 16; ++i) {
+ int M = Ratio * Mask[i / Ratio] + i % Ratio;
+ PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8));
+ }
+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input);
+ DCI.AddToWorklist(Op.getNode());
+ SDValue PSHUFBMaskOp =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask);
+ DCI.AddToWorklist(PSHUFBMaskOp.getNode());
+ Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // Failed to find any combines.
+ return false;
+}
+
+/// \brief Fully generic combining of x86 shuffle instructions.
+///
+/// This should be the last combine run over the x86 shuffle instructions. Once
+/// they have been fully optimized, this will recursively consdier all chains
+/// of single-use shuffle instructions, build a generic model of the cumulative
+/// shuffle operation, and check for simpler instructions which implement this
+/// operation. We use this primarily for two purposes:
+///
+/// 1) Collapse generic shuffles to specialized single instructions when
+/// equivalent. In most cases, this is just an encoding size win, but
+/// sometimes we will collapse multiple generic shuffles into a single
+/// special-purpose shuffle.
+/// 2) Look for sequences of shuffle instructions with 3 or more total
+/// instructions, and replace them with the slightly more expensive SSSE3
+/// PSHUFB instruction if available. We do this as the last combining step
+/// to ensure we avoid using PSHUFB if we can implement the shuffle with
+/// a suitable short sequence of other instructions. The PHUFB will either
+/// use a register or have to read from memory and so is slightly (but only
+/// slightly) more expensive than the other shuffle instructions.
+///
+/// Because this is inherently a quadratic operation (for each shuffle in
+/// a chain, we recurse up the chain), the depth is limited to 8 instructions.
+/// This should never be an issue in practice as the shuffle lowering doesn't
+/// produce sequences of more than 8 instructions.
+///
+/// FIXME: Currently, we don't collapse instructions *into* PSHUFB. We should,
+/// and we should do so more aggressively than we form PSHUFB because once we
+/// have a PSHUFB, we might as well do as much shuffling as we can.
+///
+/// FIXME: We will currently miss some cases where the redundant shuffling
+/// would simplify under the threshold for PSHUFB formation because of
+/// combine-ordering. To fix this, we should do the redundant instruction
+/// combining in this recursive walk.
+static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
+ ArrayRef<int> IncomingMask, int Depth,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ // Bound the depth of our recursive combine because this is ultimately
+ // quadratic in nature.
+ if (Depth > 8)
+ return false;
+
+ // Directly rip through bitcasts to find the underlying operand.
+ while (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).hasOneUse())
+ Op = Op.getOperand(0);
+
+ MVT VT = Op.getSimpleValueType();
+ if (!VT.isVector())
+ return false; // Bail if we hit a non-vector.
+ // FIXME: This routine should be taught about 256-bit shuffles, or a 256-bit
+ // version should be added.
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ MVT RootVT = Root.getSimpleValueType();
+ assert(RootVT.isVector() && "Shuffles operate on vector types!");
+ assert(VT.getSizeInBits() == RootVT.getSizeInBits() &&
+ "Can only combine shuffles of the same vector register size.");
+
+ if (!isTargetShuffle(Op.getOpcode()))
+ return false;
+ SmallVector<int, 16> OpMask;
+ bool IsUnary;
+ bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary);
+ // We only can combine unary shuffles which we can decode the mask for.
+ if (!HaveMask || !IsUnary)
+ return false;
+
+ assert(VT.getVectorNumElements() == OpMask.size() &&
+ "Different mask size from vector size!");
+
+ SmallVector<int, 16> Mask;
+ Mask.reserve(std::max(OpMask.size(), IncomingMask.size()));
+
+ // Merge this shuffle operation's mask into our accumulated mask. This is
+ // a bit tricky as the shuffle may have a different size from the root.
+ if (OpMask.size() == IncomingMask.size()) {
+ for (int M : IncomingMask)
+ Mask.push_back(OpMask[M]);
+ } else if (OpMask.size() < IncomingMask.size()) {
+ assert(IncomingMask.size() % OpMask.size() == 0 &&
+ "The smaller number of elements must divide the larger.");
+ int Ratio = IncomingMask.size() / OpMask.size();
+ for (int M : IncomingMask)
+ Mask.push_back(Ratio * OpMask[M / Ratio] + M % Ratio);
+ } else {
+ assert(OpMask.size() > IncomingMask.size() && "All other cases handled!");
+ assert(OpMask.size() % IncomingMask.size() == 0 &&
+ "The smaller number of elements must divide the larger.");
+ int Ratio = OpMask.size() / IncomingMask.size();
+ for (int i = 0, e = OpMask.size(); i < e; ++i)
+ Mask.push_back(OpMask[Ratio * IncomingMask[i / Ratio] + i % Ratio]);
+ }
+
+ // See if we can recurse into the operand to combine more things.
+ switch (Op.getOpcode()) {
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ if (Op.getOperand(0).hasOneUse() &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ DAG, DCI, Subtarget))
+ return true;
+ break;
+
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!");
+ // We can't check for single use, we have to check that this shuffle is the only user.
+ if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ DAG, DCI, Subtarget))
+ return true;
+ break;
+ }
+
+ // Minor canonicalization of the accumulated shuffle mask to make it easier
+ // to match below. All this does is detect masks with squential pairs of
+ // elements, and shrink them to the half-width mask. It does this in a loop
+ // so it will reduce the size of the mask to the minimal width mask which
+ // performs an equivalent shuffle.
+ while (Mask.size() > 1) {
+ SmallVector<int, 16> NewMask;
+ for (int i = 0, e = Mask.size()/2; i < e; ++i) {
+ if (Mask[2*i] % 2 != 0 || Mask[2*i] != Mask[2*i + 1] + 1) {
+ NewMask.clear();
+ break;
+ }
+ NewMask.push_back(Mask[2*i] / 2);
+ }
+ if (NewMask.empty())
+ break;
+ Mask.swap(NewMask);
+ }
+
+ return combineX86ShuffleChain(Op, Root, Mask, Depth, DAG, DCI, Subtarget);
+}
+
/// \brief Get the PSHUF-style mask from PSHUF node.
///
/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
@@ -19113,6 +19372,17 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget);
if (Shuffle.getNode())
return Shuffle;
+
+ // Try recursively combining arbitrary sequences of x86 shuffle
+ // instructions into higher-order shuffles. We do this after combining
+ // specific PSHUF instruction sequences into their minimal form so that we
+ // can evaluate how many specialized shuffle instructions are involved in
+ // a particular chain.
+ SmallVector<int, 1> NonceMask; // Just a placeholder.
+ NonceMask.push_back(0);
+ if (combineX86ShufflesRecursively(SDValue(N, 0), SDValue(N, 0), NonceMask,
+ /*Depth*/ 1, DAG, DCI, Subtarget))
+ return SDValue(); // This routine will use CombineTo to replace N.
}
return SDValue();
diff --git a/llvm/test/CodeGen/X86/avx-basic.ll b/llvm/test/CodeGen/X86/avx-basic.ll
index 1fd9085838d..6d9b11792c7 100644
--- a/llvm/test/CodeGen/X86/avx-basic.ll
+++ b/llvm/test/CodeGen/X86/avx-basic.ll
@@ -83,7 +83,7 @@ entry:
}
; CHECK: vpshufd $-96
-; CHECK: vpshufd $-6
+; CHECK: vpunpckhdq
; CHECK: vinsertf128 $1
define <8 x i32> @D(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
entry:
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index ce31161dbbc..7382ddcd740 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -2309,7 +2309,7 @@ declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) noun
define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
; CHECK: vpermilpd
- %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/avx-splat.ll b/llvm/test/CodeGen/X86/avx-splat.ll
index b1b2f8b97a7..8c1b7b63850 100644
--- a/llvm/test/CodeGen/X86/avx-splat.ll
+++ b/llvm/test/CodeGen/X86/avx-splat.ll
@@ -32,7 +32,7 @@ entry:
ret <4 x i64> %vecinit6.i
}
-; CHECK: vpermilpd $0
+; CHECK: vmovlhps %xmm
; CHECK-NEXT: vinsertf128 $1
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
entry:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 5d1922a3483..575dc25f2ec 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1,493 +1,651 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_01012323
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_01012323
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_67452301
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_67452301
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_456789AB
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_456789AB
+; ALL: # BB#0:
+; ALL: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_00000000
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_00000000
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_00000000
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_00004444
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_00004444
+; ALL: # BB#0:
+; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; ALL-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_31206745
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_31206745
+; ALL: # BB#0:
+; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_44440000
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,0,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_44440000
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,0,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_44440000
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_75643120
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_75643120
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_75643120
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_10545410
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_10545410
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_10545410
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_54105410
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_54105410
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_54105410
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_54101054
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_54101054
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_54101054
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_04400440
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,4,4,6]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_04400440
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,4,4,6]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_04400440
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_40044004
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,0,2,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_40044004
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,0,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_40044004
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_26405173
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_26405173
+; SSE2: # BB#0:
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_26405173
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_20645173
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_20645173
+; SSE2: # BB#0:
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_20645173
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_26401375
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_26401375
+; SSE2: # BB#0:
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_26401375
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,8,9,12,13,2,3,6,7,14,15,10,11]
+; SSSE3-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_00444444
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_00444444
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_00444444
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_44004444
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,2,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_44004444
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,2,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_44004444
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_04404444
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_04404444
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_04404444
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_04400000
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_04400000
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_04400000
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_04404567
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_04404567
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0X444444
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,2,2,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_0X444444
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_0X444444
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,{{[0-9]+,[0-9]+}},8,9,8,9,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_44X04444
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_44X04444
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_44X04444
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,{{[0-9]+,[0-9]+}},0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_X4404444
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_X4404444
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_X4404444
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+}},8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0127XXXX
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_0127XXXX
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_0127XXXX
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,2,3,4,5,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXX4563
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,0]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_XXXX4563
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_XXXX4563
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}},8,9,10,11,12,13,6,7]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_4563XXXX
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,2,3]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_4563XXXX
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,2,3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_4563XXXX
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_01274563
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_01274563
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_01274563
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_45630127
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,1,2,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,1,3]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,5,4]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_45630127
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,1,2,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,1,3]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,5,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_45630127
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_08192a3b
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_08192a3b
+; ALL: # BB#0:
+; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0c1d2e3f
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_0c1d2e3f
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
+; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_4c5d6e7f
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_4c5d6e7f
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
+; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_48596a7b
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_48596a7b
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
+; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_08196e7f
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_08196e7f
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
+; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0c1d6879
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,0,2,3]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_0c1d6879
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,0,2,3]
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
+; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_109832ba
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm0[2,0,3,1,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,3,1,4,5,6,7]
-; CHECK-SSE2-NEXT: punpcklqdq %xmm0, %xmm1
-; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_109832ba
+; ALL: # BB#0:
+; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: pshuflw {{.*}} # xmm1 = xmm0[2,0,3,1,4,5,6,7]
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
+; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; ALL-NEXT: punpcklqdq %xmm0, %xmm1
+; ALL-NEXT: movdqa %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_8091a2b3
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: punpcklwd %xmm0, %xmm1
-; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_8091a2b3
+; ALL: # BB#0:
+; ALL-NEXT: punpcklwd %xmm0, %xmm1
+; ALL-NEXT: movdqa %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_c4d5e6f7
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_c4d5e6f7
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm2 = xmm0[2,3,2,3]
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
+; ALL-NEXT: punpcklwd %xmm2, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0213cedf
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT: punpcklqdq %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_0213cedf
+; ALL: # BB#0:
+; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
+; ALL-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; ALL-NEXT: punpcklqdq %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_032dXXXX
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,1,4,5,6,7]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_032dXXXX
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
+; SSE2-NEXT: punpcklwd %xmm1, %xmm0
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_032dXXXX
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
+; SSSE3-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,12,13,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XXXcXXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXcXXXX
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm1[2,1,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,2,1,4,5,6,7]
-; CHECK-SSE2-NEXT: retq
+; ALL-LABEL: @shuffle_v8i16_XXXcXXXX
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm1[2,1,2,3]
+; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_012dXXXX
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_012dXXXX
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
+; SSE2-NEXT: punpcklwd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_012dXXXX
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
+; SSSE3-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXXcde3
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
-; CHECK-SSE2-NEXT: punpckhwd %xmm0, %xmm1
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,2]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_XXXXcde3
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT: punpckhwd %xmm0, %xmm1
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,2]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_XXXXcde3
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
+; SSSE3-NEXT: punpckhwd %xmm0, %xmm1 # xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}},0,1,4,5,8,9,14,15]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_cde3XXXX
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
-; CHECK-SSE2-NEXT: punpckhwd %xmm0, %xmm1
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_cde3XXXX
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT: punpckhwd %xmm0, %xmm1
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_cde3XXXX
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
+; SSSE3-NEXT: punpckhwd %xmm0, %xmm1 # xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[0,1,4,5,8,9,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_012dcde3
-; CHECK-SSE2: # BB#0:
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm0[0,1,2,1]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm3 = xmm1[2,1,2,3]
-; CHECK-SSE2-NEXT: punpckhwd %xmm2, %xmm1
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
-; CHECK-SSE2-NEXT: punpcklwd %xmm3, %xmm0
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
-; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
-; CHECK-SSE2-NEXT: punpcklqdq %xmm1, %xmm0
-; CHECK-SSE2-NEXT: retq
+; SSE2-LABEL: @shuffle_v8i16_012dcde3
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm0[0,1,2,1]
+; SSE2-NEXT: pshufd {{.*}} # xmm3 = xmm1[2,1,2,3]
+; SSE2-NEXT: punpckhwd %xmm2, %xmm1
+; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpcklwd %xmm3, %xmm0
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; SSE2-NEXT: punpcklqdq %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v8i16_012dcde3
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*}} # xmm2 = xmm0[0,1,2,1]
+; SSSE3-NEXT: pshufd {{.*}} # xmm3 = xmm1[2,1,2,3]
+; SSSE3-NEXT: punpckhwd %xmm2, %xmm1 # xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[0,1,4,5,8,9,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: punpcklwd %xmm3, %xmm0 # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: punpcklqdq %xmm1, %xmm0
+; SSSE3-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle
}
OpenPOWER on IntegriCloud