summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-06-09 17:29:52 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-06-09 17:29:52 +0000
commit3d37b1a277da44b9a39cf4daad3b14d7b69243ec (patch)
tree25fd771ae9c7638cfa1da372a5bfac5390ab82a8
parent8933ffbb1246cc6c7b3e09ca888e9bf5da0088da (diff)
downloadbcm5719-llvm-3d37b1a277da44b9a39cf4daad3b14d7b69243ec.tar.gz
bcm5719-llvm-3d37b1a277da44b9a39cf4daad3b14d7b69243ec.zip
[X86][SSE] Add support for PACKSS nodes to faux shuffle extraction
If the inputs won't saturate during packing then we can treat the PACKSS as a truncation shuffle llvm-svn: 305091
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp38
-rw-r--r--llvm/test/CodeGen/X86/vector-compare-results.ll538
2 files changed, 297 insertions, 279 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 759ddf58deb..831e9bdab0e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1,4 +1,4 @@
-
+
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
@@ -5816,7 +5816,8 @@ static bool setTargetShuffleZeroElements(SDValue N,
// The decoded shuffle mask may contain a different number of elements to the
// destination value type.
static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
- SmallVectorImpl<SDValue> &Ops) {
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAG &DAG) {
Mask.clear();
Ops.clear();
@@ -5924,6 +5925,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
return true;
}
+ case X86ISD::PACKSS: {
+ // If we know input saturation won't happen we can treat this
+ // as a truncation shuffle.
+ if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt ||
+ DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt)
+ return false;
+
+ Ops.push_back(N.getOperand(0));
+ Ops.push_back(N.getOperand(1));
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(i * 2);
+ return true;
+ }
case X86ISD::VSHLI:
case X86ISD::VSRLI: {
uint64_t ShiftVal = N.getConstantOperandVal(1);
@@ -5998,9 +6012,10 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
/// Returns true if the target shuffle mask was decoded.
static bool resolveTargetShuffleInputs(SDValue Op,
SmallVectorImpl<SDValue> &Inputs,
- SmallVectorImpl<int> &Mask) {
+ SmallVectorImpl<int> &Mask,
+ SelectionDAG &DAG) {
if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
- if (!getFauxShuffleMask(Op, Mask, Inputs))
+ if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
return false;
resolveTargetShuffleInputsAndMask(Inputs, Mask);
@@ -26760,6 +26775,17 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
return Tmp;
}
+ case X86ISD::VSHLI: {
+ SDValue Src = Op.getOperand(0);
+ unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+ APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ if (ShiftVal.uge(VTBits))
+ return VTBits; // Shifted all bits out --> zero.
+ if (ShiftVal.uge(Tmp))
+ return 1; // Shifted all sign bits out --> unknown.
+ return Tmp - ShiftVal.getZExtValue();
+ }
+
case X86ISD::VSRAI: {
SDValue Src = Op.getOperand(0);
unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
@@ -27908,7 +27934,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
// Extract target shuffle mask and resolve sentinels and inputs.
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
- if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask))
+ if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
return false;
assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
@@ -29450,7 +29476,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
// Resolve the target shuffle inputs and mask.
SmallVector<int, 16> Mask;
SmallVector<SDValue, 2> Ops;
- if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask))
+ if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG))
return SDValue();
// Attempt to narrow/widen the shuffle mask to the correct size.
diff --git a/llvm/test/CodeGen/X86/vector-compare-results.ll b/llvm/test/CodeGen/X86/vector-compare-results.ll
index 4fa9596192a..ce0b067f504 100644
--- a/llvm/test/CodeGen/X86/vector-compare-results.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-results.ll
@@ -5345,217 +5345,213 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
;
; AVX1-LABEL: test_cmp_v64i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9
-; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
-; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm2
-; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpacksswb %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vpextrb $15, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $14, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $12, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $10, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $8, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $6, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $4, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $2, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $0, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $14, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $12, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $10, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $8, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $0, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $6, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $4, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $2, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $0, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $14, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $12, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $10, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $8, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $6, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $4, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $2, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $0, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $14, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $12, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $10, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $8, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $6, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $4, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $2, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $0, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $14, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $12, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $10, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $8, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $0, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $6, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $4, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $2, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $14, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $12, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $10, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $8, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $6, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $4, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $2, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $0, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: vzeroupper
@@ -5565,207 +5561,203 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm1, %xmm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
; AVX2-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
-; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm6
; AVX2-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vpextrb $15, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm7
+; AVX2-NEXT: vpextrb $14, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
; AVX2-NEXT: vpextrb $0, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
; AVX2-NEXT: vpextrb $0, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: vpextrb $0, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: vzeroupper
OpenPOWER on IntegriCloud