summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-24 11:21:57 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-24 11:21:57 +0000
commit6340e548615c82de1d38a0c5dca6c82026e575d3 (patch)
treefbc499833cb395c6e81c50137ef8dbd9379f1e43 /llvm
parentdcbd614c6c0a6962d28656b7b04bebedc262bbd2 (diff)
downloadbcm5719-llvm-6340e548615c82de1d38a0c5dca6c82026e575d3.tar.gz
bcm5719-llvm-6340e548615c82de1d38a0c5dca6c82026e575d3.zip
[X86][SSE] Add support for constant folding vector logical shift by immediates
llvm-svn: 292915
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp24
-rw-r--r--llvm/test/CodeGen/X86/vec_shift5.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-128.ll60
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-256.ll48
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-128.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-256.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-512.ll5
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-lshr-128.ll34
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-lshr-256.ll78
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-lshr-512.ll5
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-shl-128.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-shl-256.ll72
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-shl-512.ll5
13 files changed, 186 insertions, 213 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2168022d8cb..834a04c61d3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30463,9 +30463,11 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
- assert((X86ISD::VSHLI == N->getOpcode() || X86ISD::VSRLI == N->getOpcode()) &&
- "Unexpected opcode");
+ unsigned Opcode = N->getOpcode();
+ assert((X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode) &&
+ "Unexpected shift opcode");
EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
// This fails for mask register (vXi1) shifts.
@@ -30477,12 +30479,14 @@ static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG,
if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt))
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ SDValue N0 = N->getOperand(0);
+
// Shift N0 by zero -> N0.
if (!ShiftVal)
- return N->getOperand(0);
+ return N0;
// Shift zero -> zero.
- if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
// We can decode 'whole byte' logical bit shifts as shuffles.
@@ -30496,6 +30500,18 @@ static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG,
return SDValue(); // This routine will use CombineTo to replace N.
}
+ // Constant Folding.
+ SmallBitVector UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (N->isOnlyUserOf(N0.getNode()) &&
+ getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
+ assert(EltBits.size() == NumElts && "Unexpected shift value type");
+ for (APInt &Elt : EltBits)
+ Elt = X86ISD::VSHLI == Opcode ? Elt.shl(ShiftVal.getZExtValue())
+ : Elt.lshr(ShiftVal.getZExtValue());
+ return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/vec_shift5.ll b/llvm/test/CodeGen/X86/vec_shift5.ll
index cba2b5d0504..c0226d0a4c0 100644
--- a/llvm/test/CodeGen/X86/vec_shift5.ll
+++ b/llvm/test/CodeGen/X86/vec_shift5.ll
@@ -93,8 +93,7 @@ define <4 x i32> @test6() {
define <2 x i64> @test7() {
; X32-LABEL: test7:
; X32: # BB#0:
-; X32-NEXT: movdqa {{.*#+}} xmm0 = [1,0,2,0]
-; X32-NEXT: psllq $3, %xmm0
+; X32-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0]
; X32-NEXT: retl
;
; X64-LABEL: test7:
@@ -108,8 +107,7 @@ define <2 x i64> @test7() {
define <2 x i64> @test8() {
; X32-LABEL: test8:
; X32: # BB#0:
-; X32-NEXT: movdqa {{.*#+}} xmm0 = [8,0,16,0]
-; X32-NEXT: psrlq $3, %xmm0
+; X32-NEXT: movaps {{.*#+}} xmm0 = [1,0,2,0]
; X32-NEXT: retl
;
; X64-LABEL: test8:
@@ -151,8 +149,7 @@ define <4 x i32> @test10() {
define <2 x i64> @test11() {
; X32-LABEL: test11:
; X32: # BB#0:
-; X32-NEXT: movdqa {{.*#+}} xmm0 = <u,u,31,0>
-; X32-NEXT: psrlq $3, %xmm0
+; X32-NEXT: movaps {{.*#+}} xmm0 = <u,u,3,0>
; X32-NEXT: retl
;
; X64-LABEL: test11:
@@ -222,8 +219,7 @@ define <4 x i32> @test15() {
define <2 x i64> @test16() {
; X32-LABEL: test16:
; X32: # BB#0:
-; X32-NEXT: movdqa {{.*#+}} xmm0 = <u,u,31,0>
-; X32-NEXT: psllq $3, %xmm0
+; X32-NEXT: movaps {{.*#+}} xmm0 = <u,u,248,0>
; X32-NEXT: retl
;
; X64-LABEL: test16:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index fbb67ebbf60..eda893fc942 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -1044,8 +1044,7 @@ define <8 x i16> @constant_rotate_v8i16(<8 x i16> %a) nounwind {
define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
; SSE2-LABEL: constant_rotate_v16i8:
; SSE2: # BB#0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
-; SSE2-NEXT: psllw $5, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,57600,41152,24704,8256]
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpgtb %xmm3, %xmm1
@@ -1071,8 +1070,7 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
; SSE2-NEXT: pandn %xmm1, %xmm3
; SSE2-NEXT: paddb %xmm1, %xmm1
; SSE2-NEXT: pand %xmm4, %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
-; SSE2-NEXT: psllw $5, %xmm4
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [57600,41152,24704,8256,8192,24640,41088,57536]
; SSE2-NEXT: pxor %xmm5, %xmm5
; SSE2-NEXT: pcmpgtb %xmm4, %xmm5
; SSE2-NEXT: movdqa %xmm5, %xmm6
@@ -1105,11 +1103,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
; SSE41-LABEL: constant_rotate_v16i8:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
-; SSE41-NEXT: psllw $5, %xmm0
; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: psllw $4, %xmm3
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,57600,41152,24704,8256]
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: pblendvb %xmm3, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm3
@@ -1121,11 +1118,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
; SSE41-NEXT: paddb %xmm3, %xmm3
; SSE41-NEXT: paddb %xmm0, %xmm0
; SSE41-NEXT: pblendvb %xmm3, %xmm2
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
-; SSE41-NEXT: psllw $5, %xmm0
; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: psrlw $4, %xmm3
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [57600,41152,24704,8256,8192,24640,41088,57536]
; SSE41-NEXT: pblendvb %xmm3, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: psrlw $2, %xmm3
@@ -1143,31 +1139,29 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
;
; AVX-LABEL: constant_rotate_v16i8:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
-; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
-; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vpsllw $2, %xmm2, %xmm3
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
-; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm3
-; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
-; AVX-NEXT: vpsllw $5, %xmm2, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
-; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $2, %xmm0, %xmm3
+; AVX-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,57600,41152,24704,8256]
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpsllw $2, %xmm1, %xmm3
; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $1, %xmm0, %xmm3
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm3
; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm1, %xmm1
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [57600,41152,24704,8256,8192,24640,41088,57536]
+; AVX-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
@@ -1182,8 +1176,7 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
;
; X32-SSE-LABEL: constant_rotate_v16i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
-; X32-SSE-NEXT: psllw $5, %xmm3
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,57600,41152,24704,8256]
; X32-SSE-NEXT: pxor %xmm2, %xmm2
; X32-SSE-NEXT: pxor %xmm1, %xmm1
; X32-SSE-NEXT: pcmpgtb %xmm3, %xmm1
@@ -1209,8 +1202,7 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
; X32-SSE-NEXT: pandn %xmm1, %xmm3
; X32-SSE-NEXT: paddb %xmm1, %xmm1
; X32-SSE-NEXT: pand %xmm4, %xmm1
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
-; X32-SSE-NEXT: psllw $5, %xmm4
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [57600,41152,24704,8256,8192,24640,41088,57536]
; X32-SSE-NEXT: pxor %xmm5, %xmm5
; X32-SSE-NEXT: pcmpgtb %xmm4, %xmm5
; X32-SSE-NEXT: movdqa %xmm5, %xmm6
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index af1755e1431..2e357660ee2 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -582,8 +582,7 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX1-NEXT: vpand %xmm8, %xmm2, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
-; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8192,24640,41088,57536,57600,41152,24704,8256]
; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vpsllw $2, %xmm2, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@@ -605,8 +604,7 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX1-NEXT: vpand %xmm8, %xmm3, %xmm3
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [57600,41152,24704,8256,8192,24640,41088,57536]
; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
@@ -633,31 +631,29 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind {
;
; AVX2-LABEL: constant_rotate_v32i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
-; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsllw $2, %ymm2, %ymm3
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
-; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm3
-; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm2, %ymm1
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
-; AVX2-NEXT: vpsllw $5, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
-; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm3
+; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpsllw $2, %ymm1, %ymm3
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm3
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm3
; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536]
+; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT: vpaddb %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT: vpaddb %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 77a25b311e4..acad1216978 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -1309,8 +1309,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE2-LABEL: constant_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; SSE2-NEXT: psllw $5, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,49376,32928,16480,32]
; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm5, %xmm5
@@ -1368,8 +1367,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE41-LABEL: constant_shift_v16i8:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; SSE41-NEXT: psllw $5, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,49376,32928,16480,32]
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; SSE41-NEXT: movdqa %xmm2, %xmm4
@@ -1404,8 +1402,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
;
; AVX-LABEL: constant_shift_v16i8:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [8192,24640,41088,57536,49376,32928,16480,32]
; AVX-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; AVX-NEXT: vpsraw $4, %xmm3, %xmm4
@@ -1455,8 +1452,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; X32-SSE-LABEL: constant_shift_v16i8:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; X32-SSE-NEXT: psllw $5, %xmm3
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [8192,24640,41088,57536,49376,32928,16480,32]
; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
; X32-SSE-NEXT: pxor %xmm2, %xmm2
; X32-SSE-NEXT: pxor %xmm5, %xmm5
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 27ed4592943..c09e6b2bc8d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -988,8 +988,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-LABEL: constant_shift_v32i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8192,24640,41088,57536,49376,32928,16480,32]
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
@@ -1036,8 +1035,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX2-LABEL: constant_shift_v32i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX2-NEXT: vpsraw $4, %ymm3, %ymm4
@@ -1085,8 +1083,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX512DQ-LABEL: constant_shift_v32i8:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512DQ-NEXT: vpsraw $4, %ymm3, %ymm4
@@ -1121,8 +1118,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX512DQVL-LABEL: constant_shift_v32i8:
; AVX512DQVL: # BB#0:
-; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512DQVL-NEXT: vpsraw $4, %ymm3, %ymm4
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index 6cc98b5f3ee..4d4b7f4e822 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -309,8 +309,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-LABEL: constant_shift_v64i8:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
@@ -357,7 +356,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; AVX512BW-NEXT: vpsraw $4, %zmm1, %zmm2
-; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm3
+; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm3[8],zmm0[9],zmm3[9],zmm0[10],zmm3[10],zmm0[11],zmm3[11],zmm0[12],zmm3[12],zmm0[13],zmm3[13],zmm0[14],zmm3[14],zmm0[15],zmm3[15],zmm0[24],zmm3[24],zmm0[25],zmm3[25],zmm0[26],zmm3[26],zmm0[27],zmm3[27],zmm0[28],zmm3[28],zmm0[29],zmm3[29],zmm0[30],zmm3[30],zmm0[31],zmm3[31],zmm0[40],zmm3[40],zmm0[41],zmm3[41],zmm0[42],zmm3[42],zmm0[43],zmm3[43],zmm0[44],zmm3[44],zmm0[45],zmm3[45],zmm0[46],zmm3[46],zmm0[47],zmm3[47],zmm0[56],zmm3[56],zmm0[57],zmm3[57],zmm0[58],zmm3[58],zmm0[59],zmm3[59],zmm0[60],zmm3[60],zmm0[61],zmm3[61],zmm0[62],zmm3[62],zmm0[63],zmm3[63]
; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index 9b8c0def455..23c8f8997f7 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -1069,8 +1069,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE2-LABEL: constant_shift_v16i8:
; SSE2: # BB#0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; SSE2-NEXT: psllw $5, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: pcmpgtb %xmm2, %xmm3
@@ -1102,11 +1101,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE41-LABEL: constant_shift_v16i8:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; SSE41-NEXT: psllw $5, %xmm0
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: psrlw $4, %xmm2
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32]
; SSE41-NEXT: pblendvb %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: psrlw $2, %xmm2
@@ -1123,19 +1121,18 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
;
; AVX-LABEL: constant_shift_v16i8:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $2, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; XOP-LABEL: constant_shift_v16i8:
@@ -1161,8 +1158,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
;
; X32-SSE-LABEL: constant_shift_v16i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; X32-SSE-NEXT: psllw $5, %xmm2
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
; X32-SSE-NEXT: pxor %xmm1, %xmm1
; X32-SSE-NEXT: pxor %xmm3, %xmm3
; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 58bb8f3e6ec..60575250d71 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -825,8 +825,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX1-NEXT: vpand %xmm8, %xmm2, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8192,24640,41088,57536,49376,32928,16480,32]
; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
@@ -852,19 +851,18 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX2-LABEL: constant_shift_v32i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v32i8:
@@ -889,19 +887,18 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX512DQ-LABEL: constant_shift_v32i8:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm1
+; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm1
+; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: constant_shift_v32i8:
@@ -913,19 +910,18 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX512DQVL-LABEL: constant_shift_v32i8:
; AVX512DQVL: # BB#0:
-; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512DQVL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQVL-NEXT: vpsrlw $2, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQVL-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpsrlw $4, %ymm0, %ymm1
+; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
+; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpsrlw $2, %ymm0, %ymm1
+; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpsrlw $1, %ymm0, %ymm1
+; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: constant_shift_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index 905445f3016..c269f815951 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -244,8 +244,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
@@ -270,7 +269,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
;
; AVX512BW-LABEL: constant_shift_v64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1
+; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index 32334420f8b..d6088906ef8 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -926,8 +926,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE2-LABEL: constant_shift_v16i8:
; SSE2: # BB#0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; SSE2-NEXT: psllw $5, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: pcmpgtb %xmm2, %xmm3
@@ -958,11 +957,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE41-LABEL: constant_shift_v16i8:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; SSE41-NEXT: psllw $5, %xmm0
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: psllw $4, %xmm2
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32]
; SSE41-NEXT: pblendvb %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: psllw $2, %xmm2
@@ -978,18 +976,17 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
;
; AVX-LABEL: constant_shift_v16i8:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
-; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsllw $2, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; XOP-LABEL: constant_shift_v16i8:
@@ -1013,8 +1010,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
;
; X32-SSE-LABEL: constant_shift_v16i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; X32-SSE-NEXT: psllw $5, %xmm2
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
; X32-SSE-NEXT: pxor %xmm1, %xmm1
; X32-SSE-NEXT: pxor %xmm3, %xmm3
; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 104fa089c74..7f534050b6a 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -730,8 +730,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8192,24640,41088,57536,49376,32928,16480,32]
; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsllw $2, %xmm1, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@@ -754,18 +753,17 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX2-LABEL: constant_shift_v32i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2
-; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsllw $2, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1
+; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v32i8:
@@ -788,18 +786,17 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX512DQ-LABEL: constant_shift_v32i8:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm1
+; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: constant_shift_v32i8:
@@ -811,18 +808,17 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
;
; AVX512DQVL-LABEL: constant_shift_v32i8:
; AVX512DQVL: # BB#0:
-; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
-; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
+; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm1
+; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm1
+; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: constant_shift_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index 180d6f3a3b0..39f8fe2f05d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -230,8 +230,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
@@ -253,7 +252,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
;
; AVX512BW-LABEL: constant_shift_v64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1
+; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
OpenPOWER on IntegriCloud