diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-05-01 13:51:09 +0000 | 
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-05-01 13:51:09 +0000 | 
| commit | 3d6899e3699d58c93900dc81874a58f5c8aaf877 (patch) | |
| tree | 57bbfc8ebbedba2fb5dde79a4a175f27502fcb15 | |
| parent | 4e701ab17756f7fc3461b35edde0f333ce87d1c0 (diff) | |
| download | bcm5719-llvm-3d6899e3699d58c93900dc81874a58f5c8aaf877.tar.gz bcm5719-llvm-3d6899e3699d58c93900dc81874a58f5c8aaf877.zip  | |
[X86][SSE] Add SSE vector shift support to SimplifyDemandedVectorEltsForTargetNode vector splitting
llvm-svn: 359680
20 files changed, 340 insertions, 334 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 657832ab74b..0cc7c157b74 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33413,6 +33413,27 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(        ExtSizeInBits = SizeInBits / 4;      switch (Opc) { +      // Byte shifts by immediate. +    case X86ISD::VSHLDQ: +    case X86ISD::VSRLDQ: +      // Shift by uniform. +    case X86ISD::VSHL: +    case X86ISD::VSRL: +    case X86ISD::VSRA: +      // Shift by immediate. +    case X86ISD::VSHLI: +    case X86ISD::VSRLI: +    case X86ISD::VSRAI: { +      SDLoc DL(Op); +      SDValue Ext0 = +          extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); +      SDValue ExtOp = +          TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Op.getOperand(1)); +      SDValue UndefVec = TLO.DAG.getUNDEF(VT); +      SDValue Insert = +          insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); +      return TLO.CombineTo(Op, Insert); +    }         // Target Shuffles.      case X86ISD::PSHUFB:      case X86ISD::UNPCKL: diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index 9da6a988e77..bb7583b4a33 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -679,36 +679,21 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {  ; SSE41-NEXT:    movdqa %xmm2, %xmm0  ; SSE41-NEXT:    retq  ; -; AVX1-LABEL: combine_vec_udiv_nonuniform4: -; AVX1:       # %bb.0: -; AVX1-NEXT:    movl $171, %eax -; AVX1-NEXT:    vmovd %eax, %xmm1 -; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT:    vpmullw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT:    vpsllw $1, %xmm1, %xmm2 -; AVX1-NEXT:    vpsllw $8, %xmm1, %xmm1 -; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] -; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT:    vpackuswb %xmm0, %xmm1, %xmm1 -; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT:    retq -; -; AVX2-LABEL: combine_vec_udiv_nonuniform4: -; AVX2:       # %bb.0: -; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT:    movl $171, %eax -; AVX2-NEXT:    vmovd %eax, %xmm2 -; AVX2-NEXT:    vpmullw %ymm2, %ymm1, %ymm1 -; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1 -; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm1, %ymm1 -; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1 -; AVX2-NEXT:    vpackuswb %xmm0, %xmm1, %xmm1 -; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-NEXT:    vzeroupper -; AVX2-NEXT:    retq +; AVX-LABEL: combine_vec_udiv_nonuniform4: +; AVX:       # %bb.0: +; AVX-NEXT:    movl $171, %eax +; AVX-NEXT:    vmovd %eax, %xmm1 +; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-NEXT:    vpmullw %xmm1, %xmm2, %xmm1 +; AVX-NEXT:    vpsrlw $8, %xmm1, %xmm1 +; AVX-NEXT:    vpsllw $1, %xmm1, %xmm2 +; AVX-NEXT:    vpsllw $8, %xmm1, %xmm1 +; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] +; AVX-NEXT:    vpsrlw $8, %xmm1, %xmm1 +; AVX-NEXT:    vpackuswb %xmm0, %xmm1, %xmm1 +; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT:    retq  ;  ; XOP-LABEL: combine_vec_udiv_nonuniform4:  ; XOP:       # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 349e5455345..dd9028dd92b 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -1491,16 +1491,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %  ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX2-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX2-NEXT:    vpsllw %xmm3, %ymm5, %ymm3 +; AVX2-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX2-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX2-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX2-NEXT:    vpand %ymm3, %ymm4, %ymm3  ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX2-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsrlw %xmm4, %ymm1, %ymm1 -; AVX2-NEXT:    vpsrlw %xmm4, %ymm5, %ymm4 -; AVX2-NEXT:    vpsrlw $8, %ymm4, %ymm4 +; AVX2-NEXT:    vpsrlw %xmm4, %xmm5, %xmm4 +; AVX2-NEXT:    vpsrlw $8, %xmm4, %xmm4  ; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4  ; AVX2-NEXT:    vpand %ymm4, %ymm1, %ymm1  ; AVX2-NEXT:    vpor %ymm1, %ymm3, %ymm1 @@ -1515,16 +1515,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX512F-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT:    vpsllw %xmm3, %ymm5, %ymm3 +; AVX512F-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX512F-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX512F-NEXT:    vpand %ymm3, %ymm4, %ymm3  ; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512F-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm4, %ymm1, %ymm1 -; AVX512F-NEXT:    vpsrlw %xmm4, %ymm5, %ymm4 -; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4 +; AVX512F-NEXT:    vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512F-NEXT:    vpsrlw $8, %xmm4, %xmm4  ; AVX512F-NEXT:    vpbroadcastb %xmm4, %ymm4  ; AVX512F-NEXT:    vpand %ymm4, %ymm1, %ymm1  ; AVX512F-NEXT:    vpor %ymm1, %ymm3, %ymm1 @@ -1539,16 +1539,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX512VL-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT:    vpsllw %xmm3, %ymm5, %ymm3 +; AVX512VL-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX512VL-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX512VL-NEXT:    vpand %ymm3, %ymm4, %ymm3  ; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VL-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm4, %ymm1, %ymm1 -; AVX512VL-NEXT:    vpsrlw %xmm4, %ymm5, %ymm4 -; AVX512VL-NEXT:    vpsrlw $8, %ymm4, %ymm4 +; AVX512VL-NEXT:    vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VL-NEXT:    vpsrlw $8, %xmm4, %xmm4  ; AVX512VL-NEXT:    vpbroadcastb %xmm4, %ymm4  ; AVX512VL-NEXT:    vpand %ymm4, %ymm1, %ymm1  ; AVX512VL-NEXT:    vpor %ymm1, %ymm3, %ymm1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index 648948085f3..7e01a5999c6 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -799,27 +799,27 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %ymm4, %ymm4  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm5, %ymm0, %ymm6 -; AVX512F-NEXT:    vpcmpeqd %ymm9, %ymm9, %ymm9 -; AVX512F-NEXT:    vpsllw %xmm5, %ymm9, %ymm8 -; AVX512F-NEXT:    vpbroadcastb %xmm8, %ymm8 -; AVX512F-NEXT:    vpand %ymm8, %ymm6, %ymm6 -; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT:    vpsubb %xmm4, %xmm7, %xmm7 -; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT:    vpsrlw %xmm7, %ymm2, %ymm2 -; AVX512F-NEXT:    vpsrlw %xmm7, %ymm9, %ymm9 -; AVX512F-NEXT:    vpsrlw $8, %ymm9, %ymm9 -; AVX512F-NEXT:    vpbroadcastb %xmm9, %ymm9 -; AVX512F-NEXT:    vpand %ymm9, %ymm2, %ymm2 -; AVX512F-NEXT:    vpor %ymm2, %ymm6, %ymm2 -; AVX512F-NEXT:    vpxor %xmm6, %xmm6, %xmm6 -; AVX512F-NEXT:    vpcmpeqb %ymm6, %ymm4, %ymm4 -; AVX512F-NEXT:    vpblendvb %ymm4, %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT:    vpsllw %xmm5, %ymm1, %ymm2 -; AVX512F-NEXT:    vpand %ymm8, %ymm2, %ymm2 -; AVX512F-NEXT:    vpsrlw %xmm7, %ymm3, %ymm3 -; AVX512F-NEXT:    vpand %ymm9, %ymm3, %ymm3 -; AVX512F-NEXT:    vpor %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8 +; AVX512F-NEXT:    vpsllw %xmm5, %xmm8, %xmm7 +; AVX512F-NEXT:    vpbroadcastb %xmm7, %ymm7 +; AVX512F-NEXT:    vpand %ymm7, %ymm6, %ymm9 +; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT:    vpsubb %xmm4, %xmm6, %xmm6 +; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT:    vpsrlw %xmm6, %ymm2, %ymm10 +; AVX512F-NEXT:    vpsrlw %xmm6, %xmm8, %xmm2 +; AVX512F-NEXT:    vpsrlw $8, %xmm2, %xmm2 +; AVX512F-NEXT:    vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT:    vpand %ymm2, %ymm10, %ymm8 +; AVX512F-NEXT:    vpor %ymm8, %ymm9, %ymm8 +; AVX512F-NEXT:    vpxor %xmm9, %xmm9, %xmm9 +; AVX512F-NEXT:    vpcmpeqb %ymm9, %ymm4, %ymm4 +; AVX512F-NEXT:    vpblendvb %ymm4, %ymm0, %ymm8, %ymm0 +; AVX512F-NEXT:    vpsllw %xmm5, %ymm1, %ymm5 +; AVX512F-NEXT:    vpand %ymm7, %ymm5, %ymm5 +; AVX512F-NEXT:    vpsrlw %xmm6, %ymm3, %ymm3 +; AVX512F-NEXT:    vpand %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT:    vpor %ymm2, %ymm5, %ymm2  ; AVX512F-NEXT:    vpblendvb %ymm4, %ymm1, %ymm2, %ymm1  ; AVX512F-NEXT:    retq  ; @@ -829,27 +829,27 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm4, %ymm4  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm5, %ymm0, %ymm6 -; AVX512VL-NEXT:    vpcmpeqd %ymm9, %ymm9, %ymm9 -; AVX512VL-NEXT:    vpsllw %xmm5, %ymm9, %ymm8 -; AVX512VL-NEXT:    vpbroadcastb %xmm8, %ymm8 -; AVX512VL-NEXT:    vpand %ymm8, %ymm6, %ymm6 -; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT:    vpsubb %xmm4, %xmm7, %xmm7 -; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT:    vpsrlw %xmm7, %ymm2, %ymm2 -; AVX512VL-NEXT:    vpsrlw %xmm7, %ymm9, %ymm9 -; AVX512VL-NEXT:    vpsrlw $8, %ymm9, %ymm9 -; AVX512VL-NEXT:    vpbroadcastb %xmm9, %ymm9 -; AVX512VL-NEXT:    vpand %ymm9, %ymm2, %ymm2 -; AVX512VL-NEXT:    vpor %ymm2, %ymm6, %ymm2 -; AVX512VL-NEXT:    vpxor %xmm6, %xmm6, %xmm6 -; AVX512VL-NEXT:    vpcmpeqb %ymm6, %ymm4, %ymm4 -; AVX512VL-NEXT:    vpblendvb %ymm4, %ymm0, %ymm2, %ymm0 -; AVX512VL-NEXT:    vpsllw %xmm5, %ymm1, %ymm2 -; AVX512VL-NEXT:    vpand %ymm8, %ymm2, %ymm2 -; AVX512VL-NEXT:    vpsrlw %xmm7, %ymm3, %ymm3 -; AVX512VL-NEXT:    vpand %ymm9, %ymm3, %ymm3 -; AVX512VL-NEXT:    vpor %ymm3, %ymm2, %ymm2 +; AVX512VL-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8 +; AVX512VL-NEXT:    vpsllw %xmm5, %xmm8, %xmm7 +; AVX512VL-NEXT:    vpbroadcastb %xmm7, %ymm7 +; AVX512VL-NEXT:    vpand %ymm7, %ymm6, %ymm9 +; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT:    vpsubb %xmm4, %xmm6, %xmm6 +; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT:    vpsrlw %xmm6, %ymm2, %ymm10 +; AVX512VL-NEXT:    vpsrlw %xmm6, %xmm8, %xmm2 +; AVX512VL-NEXT:    vpsrlw $8, %xmm2, %xmm2 +; AVX512VL-NEXT:    vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT:    vpand %ymm2, %ymm10, %ymm8 +; AVX512VL-NEXT:    vpor %ymm8, %ymm9, %ymm8 +; AVX512VL-NEXT:    vpxor %xmm9, %xmm9, %xmm9 +; AVX512VL-NEXT:    vpcmpeqb %ymm9, %ymm4, %ymm4 +; AVX512VL-NEXT:    vpblendvb %ymm4, %ymm0, %ymm8, %ymm0 +; AVX512VL-NEXT:    vpsllw %xmm5, %ymm1, %ymm5 +; AVX512VL-NEXT:    vpand %ymm7, %ymm5, %ymm5 +; AVX512VL-NEXT:    vpsrlw %xmm6, %ymm3, %ymm3 +; AVX512VL-NEXT:    vpand %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT:    vpor %ymm2, %ymm5, %ymm2  ; AVX512VL-NEXT:    vpblendvb %ymm4, %ymm1, %ymm2, %ymm1  ; AVX512VL-NEXT:    retq  ; @@ -859,16 +859,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsllw %xmm3, %zmm0, %zmm4 -; AVX512BW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512BW-NEXT:    vpsllw %xmm3, %zmm5, %zmm3 +; AVX512BW-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX512BW-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512BW-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512BW-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsrlw %xmm4, %zmm1, %zmm1 -; AVX512BW-NEXT:    vpsrlw %xmm4, %zmm5, %zmm4 -; AVX512BW-NEXT:    vpsrlw $8, %zmm4, %zmm4 +; AVX512BW-NEXT:    vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512BW-NEXT:    vpsrlw $8, %xmm4, %xmm4  ; AVX512BW-NEXT:    vpbroadcastb %xmm4, %zmm4  ; AVX512BW-NEXT:    vpandq %zmm4, %zmm1, %zmm1  ; AVX512BW-NEXT:    vporq %zmm1, %zmm3, %zmm1 @@ -883,16 +883,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512VBMI2-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2  ; AVX512VBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VBMI2-NEXT:    vpsllw %xmm3, %zmm0, %zmm4 -; AVX512VBMI2-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VBMI2-NEXT:    vpsllw %xmm3, %zmm5, %zmm3 +; AVX512VBMI2-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VBMI2-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX512VBMI2-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512VBMI2-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VBMI2-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512VBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VBMI2-NEXT:    vpsrlw %xmm4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT:    vpsrlw %xmm4, %zmm5, %zmm4 -; AVX512VBMI2-NEXT:    vpsrlw $8, %zmm4, %zmm4 +; AVX512VBMI2-NEXT:    vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VBMI2-NEXT:    vpsrlw $8, %xmm4, %xmm4  ; AVX512VBMI2-NEXT:    vpbroadcastb %xmm4, %zmm4  ; AVX512VBMI2-NEXT:    vpandq %zmm4, %zmm1, %zmm1  ; AVX512VBMI2-NEXT:    vporq %zmm1, %zmm3, %zmm1 @@ -907,16 +907,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsllw %xmm3, %zmm0, %zmm4 -; AVX512VLBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLBW-NEXT:    vpsllw %xmm3, %zmm5, %zmm3 +; AVX512VLBW-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512VLBW-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VLBW-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsrlw %xmm4, %zmm1, %zmm1 -; AVX512VLBW-NEXT:    vpsrlw %xmm4, %zmm5, %zmm4 -; AVX512VLBW-NEXT:    vpsrlw $8, %zmm4, %zmm4 +; AVX512VLBW-NEXT:    vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VLBW-NEXT:    vpsrlw $8, %xmm4, %xmm4  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm4, %zmm4  ; AVX512VLBW-NEXT:    vpandq %zmm4, %zmm1, %zmm1  ; AVX512VLBW-NEXT:    vporq %zmm1, %zmm3, %zmm1 @@ -931,16 +931,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512VLVBMI2-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2  ; AVX512VLVBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLVBMI2-NEXT:    vpsllw %xmm3, %zmm0, %zmm4 -; AVX512VLVBMI2-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLVBMI2-NEXT:    vpsllw %xmm3, %zmm5, %zmm3 +; AVX512VLVBMI2-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLVBMI2-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX512VLVBMI2-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512VLVBMI2-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512VLVBMI2-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VLVBMI2-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512VLVBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLVBMI2-NEXT:    vpsrlw %xmm4, %zmm1, %zmm1 -; AVX512VLVBMI2-NEXT:    vpsrlw %xmm4, %zmm5, %zmm4 -; AVX512VLVBMI2-NEXT:    vpsrlw $8, %zmm4, %zmm4 +; AVX512VLVBMI2-NEXT:    vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VLVBMI2-NEXT:    vpsrlw $8, %xmm4, %xmm4  ; AVX512VLVBMI2-NEXT:    vpbroadcastb %xmm4, %zmm4  ; AVX512VLVBMI2-NEXT:    vpandq %zmm4, %zmm1, %zmm1  ; AVX512VLVBMI2-NEXT:    vporq %zmm1, %zmm3, %zmm1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index b56b55553fc..65fa6f20fee 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -793,16 +793,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind  ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX2-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX2-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX2-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX2-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0 @@ -814,16 +814,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX512F-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512F-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512F-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX512F-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX512F-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512F-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512F-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512F-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512F-NEXT:    vpor %ymm0, %ymm2, %ymm0 @@ -835,16 +835,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX512VL-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512VL-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX512VL-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512VL-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX512VL-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX512VL-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VL-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512VL-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512VL-NEXT:    vpor %ymm0, %ymm2, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index c096a6cb86e..de7959d6b5a 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -387,16 +387,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX512F-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT:    vpsllw %xmm3, %ymm5, %ymm6 +; AVX512F-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT:    vpsllw %xmm3, %xmm5, %xmm6  ; AVX512F-NEXT:    vpbroadcastb %xmm6, %ymm6  ; AVX512F-NEXT:    vpand %ymm6, %ymm4, %ymm4  ; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512F-NEXT:    vpsubb %xmm2, %xmm7, %xmm2  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512F-NEXT:    vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512F-NEXT:    vpsrlw $8, %ymm5, %ymm5 +; AVX512F-NEXT:    vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512F-NEXT:    vpsrlw $8, %xmm5, %xmm5  ; AVX512F-NEXT:    vpbroadcastb %xmm5, %ymm5  ; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0  ; AVX512F-NEXT:    vpor %ymm0, %ymm4, %ymm0 @@ -413,16 +413,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX512VL-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT:    vpsllw %xmm3, %ymm5, %ymm6 +; AVX512VL-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT:    vpsllw %xmm3, %xmm5, %xmm6  ; AVX512VL-NEXT:    vpbroadcastb %xmm6, %ymm6  ; AVX512VL-NEXT:    vpand %ymm6, %ymm4, %ymm4  ; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VL-NEXT:    vpsubb %xmm2, %xmm7, %xmm2  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512VL-NEXT:    vpsrlw $8, %ymm5, %ymm5 +; AVX512VL-NEXT:    vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512VL-NEXT:    vpsrlw $8, %xmm5, %xmm5  ; AVX512VL-NEXT:    vpbroadcastb %xmm5, %ymm5  ; AVX512VL-NEXT:    vpand %ymm5, %ymm0, %ymm0  ; AVX512VL-NEXT:    vpor %ymm0, %ymm4, %ymm0 @@ -440,8 +440,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm3  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsllw %xmm3, %zmm0, %zmm4 -; AVX512BW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512BW-NEXT:    vpsllw %xmm3, %zmm5, %zmm3 +; AVX512BW-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX512BW-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512BW-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512BW-NEXT:    vpxor %xmm4, %xmm4, %xmm4 @@ -449,8 +449,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm1  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm5, %zmm1 -; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT:    vpsrlw %xmm1, %xmm5, %xmm1 +; AVX512BW-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1  ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vporq %zmm0, %zmm3, %zmm0 @@ -463,8 +463,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm3  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsllw %xmm3, %zmm0, %zmm4 -; AVX512VLBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLBW-NEXT:    vpsllw %xmm3, %zmm5, %zmm3 +; AVX512VLBW-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT:    vpsllw %xmm3, %xmm5, %xmm3  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512VLBW-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512VLBW-NEXT:    vpxor %xmm4, %xmm4, %xmm4 @@ -472,8 +472,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm1  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT:    vpsrlw %xmm1, %zmm5, %zmm1 -; AVX512VLBW-NEXT:    vpsrlw $8, %zmm1, %zmm1 +; AVX512VLBW-NEXT:    vpsrlw %xmm1, %xmm5, %xmm1 +; AVX512VLBW-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm1, %zmm1  ; AVX512VLBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0  ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm3, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index c27828b0e76..61b7c55e557 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -1492,16 +1492,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %  ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsrlw %xmm3, %ymm1, %ymm4 -; AVX2-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX2-NEXT:    vpsrlw %xmm3, %ymm5, %ymm3 -; AVX2-NEXT:    vpsrlw $8, %ymm3, %ymm3 +; AVX2-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX2-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX2-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX2-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX2-NEXT:    vpand %ymm3, %ymm4, %ymm3  ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX2-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsllw %xmm4, %ymm0, %ymm0 -; AVX2-NEXT:    vpsllw %xmm4, %ymm5, %ymm4 +; AVX2-NEXT:    vpsllw %xmm4, %xmm5, %xmm4  ; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4  ; AVX2-NEXT:    vpand %ymm4, %ymm0, %ymm0  ; AVX2-NEXT:    vpor %ymm3, %ymm0, %ymm0 @@ -1516,16 +1516,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm3, %ymm1, %ymm4 -; AVX512F-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT:    vpsrlw %xmm3, %ymm5, %ymm3 -; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3 +; AVX512F-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512F-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512F-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX512F-NEXT:    vpand %ymm3, %ymm4, %ymm3  ; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512F-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm4, %ymm0, %ymm0 -; AVX512F-NEXT:    vpsllw %xmm4, %ymm5, %ymm4 +; AVX512F-NEXT:    vpsllw %xmm4, %xmm5, %xmm4  ; AVX512F-NEXT:    vpbroadcastb %xmm4, %ymm4  ; AVX512F-NEXT:    vpand %ymm4, %ymm0, %ymm0  ; AVX512F-NEXT:    vpor %ymm3, %ymm0, %ymm0 @@ -1540,16 +1540,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm3, %ymm1, %ymm4 -; AVX512VL-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT:    vpsrlw %xmm3, %ymm5, %ymm3 -; AVX512VL-NEXT:    vpsrlw $8, %ymm3, %ymm3 +; AVX512VL-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VL-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512VL-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX512VL-NEXT:    vpand %ymm3, %ymm4, %ymm3  ; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VL-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm4, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpsllw %xmm4, %ymm5, %ymm4 +; AVX512VL-NEXT:    vpsllw %xmm4, %xmm5, %xmm4  ; AVX512VL-NEXT:    vpbroadcastb %xmm4, %ymm4  ; AVX512VL-NEXT:    vpand %ymm4, %ymm0, %ymm0  ; AVX512VL-NEXT:    vpor %ymm3, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index b73724b77e9..f2b31d4b80d 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -787,26 +787,26 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %ymm4, %ymm4  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm5, %ymm2, %ymm6 -; AVX512F-NEXT:    vpcmpeqd %ymm9, %ymm9, %ymm9 -; AVX512F-NEXT:    vpsrlw %xmm5, %ymm9, %ymm8 -; AVX512F-NEXT:    vpsrlw $8, %ymm8, %ymm8 -; AVX512F-NEXT:    vpbroadcastb %xmm8, %ymm8 -; AVX512F-NEXT:    vpand %ymm8, %ymm6, %ymm6 -; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT:    vpsubb %xmm4, %xmm7, %xmm7 -; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT:    vpsllw %xmm7, %ymm0, %ymm0 -; AVX512F-NEXT:    vpsllw %xmm7, %ymm9, %ymm9 -; AVX512F-NEXT:    vpbroadcastb %xmm9, %ymm9 -; AVX512F-NEXT:    vpand %ymm9, %ymm0, %ymm0 -; AVX512F-NEXT:    vpor %ymm6, %ymm0, %ymm0 -; AVX512F-NEXT:    vpxor %xmm6, %xmm6, %xmm6 -; AVX512F-NEXT:    vpcmpeqb %ymm6, %ymm4, %ymm4 +; AVX512F-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8 +; AVX512F-NEXT:    vpsrlw %xmm5, %xmm8, %xmm7 +; AVX512F-NEXT:    vpsrlw $8, %xmm7, %xmm7 +; AVX512F-NEXT:    vpbroadcastb %xmm7, %ymm7 +; AVX512F-NEXT:    vpand %ymm7, %ymm6, %ymm9 +; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT:    vpsubb %xmm4, %xmm6, %xmm6 +; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT:    vpsllw %xmm6, %ymm0, %ymm10 +; AVX512F-NEXT:    vpsllw %xmm6, %xmm8, %xmm0 +; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm8 +; AVX512F-NEXT:    vpand %ymm8, %ymm10, %ymm0 +; AVX512F-NEXT:    vpor %ymm9, %ymm0, %ymm0 +; AVX512F-NEXT:    vpxor %xmm9, %xmm9, %xmm9 +; AVX512F-NEXT:    vpcmpeqb %ymm9, %ymm4, %ymm4  ; AVX512F-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0  ; AVX512F-NEXT:    vpsrlw %xmm5, %ymm3, %ymm2 -; AVX512F-NEXT:    vpand %ymm8, %ymm2, %ymm2 -; AVX512F-NEXT:    vpsllw %xmm7, %ymm1, %ymm1 -; AVX512F-NEXT:    vpand %ymm9, %ymm1, %ymm1 +; AVX512F-NEXT:    vpand %ymm7, %ymm2, %ymm2 +; AVX512F-NEXT:    vpsllw %xmm6, %ymm1, %ymm1 +; AVX512F-NEXT:    vpand %ymm8, %ymm1, %ymm1  ; AVX512F-NEXT:    vpor %ymm2, %ymm1, %ymm1  ; AVX512F-NEXT:    vpblendvb %ymm4, %ymm3, %ymm1, %ymm1  ; AVX512F-NEXT:    retq @@ -817,26 +817,26 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm4, %ymm4  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm5, %ymm2, %ymm6 -; AVX512VL-NEXT:    vpcmpeqd %ymm9, %ymm9, %ymm9 -; AVX512VL-NEXT:    vpsrlw %xmm5, %ymm9, %ymm8 -; AVX512VL-NEXT:    vpsrlw $8, %ymm8, %ymm8 -; AVX512VL-NEXT:    vpbroadcastb %xmm8, %ymm8 -; AVX512VL-NEXT:    vpand %ymm8, %ymm6, %ymm6 -; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT:    vpsubb %xmm4, %xmm7, %xmm7 -; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT:    vpsllw %xmm7, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpsllw %xmm7, %ymm9, %ymm9 -; AVX512VL-NEXT:    vpbroadcastb %xmm9, %ymm9 -; AVX512VL-NEXT:    vpand %ymm9, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpor %ymm6, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpxor %xmm6, %xmm6, %xmm6 -; AVX512VL-NEXT:    vpcmpeqb %ymm6, %ymm4, %ymm4 +; AVX512VL-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8 +; AVX512VL-NEXT:    vpsrlw %xmm5, %xmm8, %xmm7 +; AVX512VL-NEXT:    vpsrlw $8, %xmm7, %xmm7 +; AVX512VL-NEXT:    vpbroadcastb %xmm7, %ymm7 +; AVX512VL-NEXT:    vpand %ymm7, %ymm6, %ymm9 +; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT:    vpsubb %xmm4, %xmm6, %xmm6 +; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT:    vpsllw %xmm6, %ymm0, %ymm10 +; AVX512VL-NEXT:    vpsllw %xmm6, %xmm8, %xmm0 +; AVX512VL-NEXT:    vpbroadcastb %xmm0, %ymm8 +; AVX512VL-NEXT:    vpand %ymm8, %ymm10, %ymm0 +; AVX512VL-NEXT:    vpor %ymm9, %ymm0, %ymm0 +; AVX512VL-NEXT:    vpxor %xmm9, %xmm9, %xmm9 +; AVX512VL-NEXT:    vpcmpeqb %ymm9, %ymm4, %ymm4  ; AVX512VL-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0  ; AVX512VL-NEXT:    vpsrlw %xmm5, %ymm3, %ymm2 -; AVX512VL-NEXT:    vpand %ymm8, %ymm2, %ymm2 -; AVX512VL-NEXT:    vpsllw %xmm7, %ymm1, %ymm1 -; AVX512VL-NEXT:    vpand %ymm9, %ymm1, %ymm1 +; AVX512VL-NEXT:    vpand %ymm7, %ymm2, %ymm2 +; AVX512VL-NEXT:    vpsllw %xmm6, %ymm1, %ymm1 +; AVX512VL-NEXT:    vpand %ymm8, %ymm1, %ymm1  ; AVX512VL-NEXT:    vpor %ymm2, %ymm1, %ymm1  ; AVX512VL-NEXT:    vpblendvb %ymm4, %ymm3, %ymm1, %ymm1  ; AVX512VL-NEXT:    retq @@ -847,16 +847,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsrlw %xmm3, %zmm1, %zmm4 -; AVX512BW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512BW-NEXT:    vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512BW-NEXT:    vpsrlw $8, %zmm3, %zmm3 +; AVX512BW-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512BW-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512BW-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512BW-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512BW-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsllw %xmm4, %zmm0, %zmm0 -; AVX512BW-NEXT:    vpsllw %xmm4, %zmm5, %zmm4 +; AVX512BW-NEXT:    vpsllw %xmm4, %xmm5, %xmm4  ; AVX512BW-NEXT:    vpbroadcastb %xmm4, %zmm4  ; AVX512BW-NEXT:    vpandq %zmm4, %zmm0, %zmm0  ; AVX512BW-NEXT:    vporq %zmm3, %zmm0, %zmm0 @@ -870,16 +870,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512VBMI2-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2  ; AVX512VBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VBMI2-NEXT:    vpsrlw %xmm3, %zmm1, %zmm4 -; AVX512VBMI2-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VBMI2-NEXT:    vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512VBMI2-NEXT:    vpsrlw $8, %zmm3, %zmm3 +; AVX512VBMI2-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VBMI2-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VBMI2-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512VBMI2-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512VBMI2-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VBMI2-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512VBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VBMI2-NEXT:    vpsllw %xmm4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT:    vpsllw %xmm4, %zmm5, %zmm4 +; AVX512VBMI2-NEXT:    vpsllw %xmm4, %xmm5, %xmm4  ; AVX512VBMI2-NEXT:    vpbroadcastb %xmm4, %zmm4  ; AVX512VBMI2-NEXT:    vpandq %zmm4, %zmm0, %zmm0  ; AVX512VBMI2-NEXT:    vporq %zmm3, %zmm0, %zmm0 @@ -893,16 +893,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsrlw %xmm3, %zmm1, %zmm4 -; AVX512VLBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLBW-NEXT:    vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512VLBW-NEXT:    vpsrlw $8, %zmm3, %zmm3 +; AVX512VLBW-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLBW-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512VLBW-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VLBW-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsllw %xmm4, %zmm0, %zmm0 -; AVX512VLBW-NEXT:    vpsllw %xmm4, %zmm5, %zmm4 +; AVX512VLBW-NEXT:    vpsllw %xmm4, %xmm5, %xmm4  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm4, %zmm4  ; AVX512VLBW-NEXT:    vpandq %zmm4, %zmm0, %zmm0  ; AVX512VLBW-NEXT:    vporq %zmm3, %zmm0, %zmm0 @@ -916,16 +916,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %  ; AVX512VLVBMI2-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2  ; AVX512VLVBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLVBMI2-NEXT:    vpsrlw %xmm3, %zmm1, %zmm4 -; AVX512VLVBMI2-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLVBMI2-NEXT:    vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512VLVBMI2-NEXT:    vpsrlw $8, %zmm3, %zmm3 +; AVX512VLVBMI2-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLVBMI2-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLVBMI2-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512VLVBMI2-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512VLVBMI2-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512VLVBMI2-NEXT:    vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VLVBMI2-NEXT:    vpsubb %xmm2, %xmm4, %xmm4  ; AVX512VLVBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLVBMI2-NEXT:    vpsllw %xmm4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT:    vpsllw %xmm4, %zmm5, %zmm4 +; AVX512VLVBMI2-NEXT:    vpsllw %xmm4, %xmm5, %xmm4  ; AVX512VLVBMI2-NEXT:    vpbroadcastb %xmm4, %zmm4  ; AVX512VLVBMI2-NEXT:    vpandq %zmm4, %zmm0, %zmm0  ; AVX512VLVBMI2-NEXT:    vporq %zmm3, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index bc477c2e7bb..6e17724bdc1 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -863,16 +863,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind  ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX2-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX2-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX2-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX2-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0 @@ -886,16 +886,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX512F-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512F-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512F-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX512F-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX512F-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512F-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512F-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512F-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512F-NEXT:    vpor %ymm0, %ymm2, %ymm0 @@ -909,16 +909,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX512VL-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512VL-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX512VL-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512VL-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX512VL-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX512VL-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VL-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512VL-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512VL-NEXT:    vpor %ymm0, %ymm2, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index ee8e2a988f0..2a25efd50ff 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -405,16 +405,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX512F-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT:    vpsllw %xmm3, %ymm5, %ymm6 +; AVX512F-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT:    vpsllw %xmm3, %xmm5, %xmm6  ; AVX512F-NEXT:    vpbroadcastb %xmm6, %ymm6  ; AVX512F-NEXT:    vpand %ymm6, %ymm4, %ymm4  ; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512F-NEXT:    vpsubb %xmm2, %xmm7, %xmm2  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512F-NEXT:    vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512F-NEXT:    vpsrlw $8, %ymm5, %ymm5 +; AVX512F-NEXT:    vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512F-NEXT:    vpsrlw $8, %xmm5, %xmm5  ; AVX512F-NEXT:    vpbroadcastb %xmm5, %ymm5  ; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0  ; AVX512F-NEXT:    vpor %ymm0, %ymm4, %ymm0 @@ -433,16 +433,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX512VL-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT:    vpsllw %xmm3, %ymm5, %ymm6 +; AVX512VL-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT:    vpsllw %xmm3, %xmm5, %xmm6  ; AVX512VL-NEXT:    vpbroadcastb %xmm6, %ymm6  ; AVX512VL-NEXT:    vpand %ymm6, %ymm4, %ymm4  ; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VL-NEXT:    vpsubb %xmm2, %xmm7, %xmm2  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512VL-NEXT:    vpsrlw $8, %ymm5, %ymm5 +; AVX512VL-NEXT:    vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512VL-NEXT:    vpsrlw $8, %xmm5, %xmm5  ; AVX512VL-NEXT:    vpbroadcastb %xmm5, %ymm5  ; AVX512VL-NEXT:    vpand %ymm5, %ymm0, %ymm0  ; AVX512VL-NEXT:    vpor %ymm0, %ymm4, %ymm0 @@ -460,9 +460,9 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm3  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsrlw %xmm3, %zmm0, %zmm4 -; AVX512BW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512BW-NEXT:    vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512BW-NEXT:    vpsrlw $8, %zmm3, %zmm3 +; AVX512BW-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512BW-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512BW-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512BW-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512BW-NEXT:    vpxor %xmm4, %xmm4, %xmm4 @@ -470,7 +470,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm1  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT:    vpsllw %xmm1, %zmm5, %zmm1 +; AVX512BW-NEXT:    vpsllw %xmm1, %xmm5, %xmm1  ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1  ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vporq %zmm3, %zmm0, %zmm0 @@ -483,9 +483,9 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm3  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsrlw %xmm3, %zmm0, %zmm4 -; AVX512VLBW-NEXT:    vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLBW-NEXT:    vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512VLBW-NEXT:    vpsrlw $8, %zmm3, %zmm3 +; AVX512VLBW-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT:    vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLBW-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm3, %zmm3  ; AVX512VLBW-NEXT:    vpandq %zmm3, %zmm4, %zmm3  ; AVX512VLBW-NEXT:    vpxor %xmm4, %xmm4, %xmm4 @@ -493,7 +493,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind  ; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm1  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT:    vpsllw %xmm1, %zmm5, %zmm1 +; AVX512VLBW-NEXT:    vpsllw %xmm1, %xmm5, %xmm1  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm1, %zmm1  ; AVX512VLBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0  ; AVX512VLBW-NEXT:    vporq %zmm3, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll index 7a7d96d2d58..4c366a61f39 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll @@ -157,8 +157,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {  ; AVX2-NEXT:    vpmuludq %ymm2, %ymm0, %ymm2  ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm3  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT:    vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT:    vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX2-NEXT:    vmovq %xmm0, %rax @@ -181,8 +181,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {  ; AVX512BW-NEXT:    vpmuludq %ymm2, %ymm0, %ymm2  ; AVX512BW-NEXT:    vpsrlq $32, %ymm0, %ymm3  ; AVX512BW-NEXT:    vpmuludq %ymm1, %ymm3, %ymm3 -; AVX512BW-NEXT:    vpaddq %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX512BW-NEXT:    vpaddq %xmm3, %xmm2, %xmm2 +; AVX512BW-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BW-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX512BW-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BW-NEXT:    vmovq %xmm0, %rax @@ -205,8 +205,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {  ; AVX512BWVL-NEXT:    vpmuludq %ymm1, %ymm2, %ymm2  ; AVX512BWVL-NEXT:    vpsrlq $32, %ymm1, %ymm3  ; AVX512BWVL-NEXT:    vpmuludq %ymm3, %ymm0, %ymm3 -; AVX512BWVL-NEXT:    vpaddq %ymm2, %ymm3, %ymm2 -; AVX512BWVL-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BWVL-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BWVL-NEXT:    vmovq %xmm0, %rax @@ -349,8 +349,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {  ; AVX2-NEXT:    vpmuludq %ymm2, %ymm0, %ymm2  ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm3  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT:    vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT:    vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX2-NEXT:    vmovq %xmm0, %rax @@ -382,8 +382,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {  ; AVX512BW-NEXT:    vpmuludq %zmm1, %zmm2, %zmm2  ; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm3  ; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT:    vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT:    vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BW-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BW-NEXT:    vmovq %xmm0, %rax @@ -415,8 +415,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {  ; AVX512BWVL-NEXT:    vpmuludq %zmm1, %zmm2, %zmm2  ; AVX512BWVL-NEXT:    vpsrlq $32, %zmm1, %zmm3  ; AVX512BWVL-NEXT:    vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BWVL-NEXT:    vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BWVL-NEXT:    vpsllq $32, %zmm2, %zmm2 +; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BWVL-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0  ; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BWVL-NEXT:    vmovq %xmm0, %rax @@ -652,8 +652,8 @@ define i64 @test_v16i64(<16 x i64> %a0) {  ; AVX2-NEXT:    vpmuludq %ymm2, %ymm0, %ymm2  ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm3  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT:    vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT:    vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX2-NEXT:    vmovq %xmm0, %rax @@ -693,8 +693,8 @@ define i64 @test_v16i64(<16 x i64> %a0) {  ; AVX512BW-NEXT:    vpmuludq %zmm1, %zmm2, %zmm2  ; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm3  ; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT:    vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT:    vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BW-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BW-NEXT:    vmovq %xmm0, %rax @@ -734,8 +734,8 @@ define i64 @test_v16i64(<16 x i64> %a0) {  ; AVX512BWVL-NEXT:    vpmuludq %zmm1, %zmm2, %zmm2  ; AVX512BWVL-NEXT:    vpsrlq $32, %zmm1, %zmm3  ; AVX512BWVL-NEXT:    vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BWVL-NEXT:    vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BWVL-NEXT:    vpsllq $32, %zmm2, %zmm2 +; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BWVL-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0  ; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BWVL-NEXT:    vmovq %xmm0, %rax diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll index a533959b8f2..12cef44b3e2 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll @@ -157,8 +157,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {  ; AVX2-NEXT:    vpmuludq %ymm2, %ymm0, %ymm2  ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm3  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT:    vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT:    vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX2-NEXT:    vmovq %xmm0, %rax @@ -181,8 +181,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {  ; AVX512BW-NEXT:    vpmuludq %ymm2, %ymm0, %ymm2  ; AVX512BW-NEXT:    vpsrlq $32, %ymm0, %ymm3  ; AVX512BW-NEXT:    vpmuludq %ymm1, %ymm3, %ymm3 -; AVX512BW-NEXT:    vpaddq %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX512BW-NEXT:    vpaddq %xmm3, %xmm2, %xmm2 +; AVX512BW-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BW-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX512BW-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BW-NEXT:    vmovq %xmm0, %rax @@ -205,8 +205,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {  ; AVX512BWVL-NEXT:    vpmuludq %ymm1, %ymm2, %ymm2  ; AVX512BWVL-NEXT:    vpsrlq $32, %ymm1, %ymm3  ; AVX512BWVL-NEXT:    vpmuludq %ymm3, %ymm0, %ymm3 -; AVX512BWVL-NEXT:    vpaddq %ymm2, %ymm3, %ymm2 -; AVX512BWVL-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BWVL-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BWVL-NEXT:    vmovq %xmm0, %rax @@ -349,8 +349,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {  ; AVX2-NEXT:    vpmuludq %ymm2, %ymm0, %ymm2  ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm3  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT:    vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT:    vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX2-NEXT:    vmovq %xmm0, %rax @@ -382,8 +382,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {  ; AVX512BW-NEXT:    vpmuludq %zmm1, %zmm2, %zmm2  ; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm3  ; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT:    vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT:    vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BW-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BW-NEXT:    vmovq %xmm0, %rax @@ -415,8 +415,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {  ; AVX512BWVL-NEXT:    vpmuludq %zmm1, %zmm2, %zmm2  ; AVX512BWVL-NEXT:    vpsrlq $32, %zmm1, %zmm3  ; AVX512BWVL-NEXT:    vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BWVL-NEXT:    vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BWVL-NEXT:    vpsllq $32, %zmm2, %zmm2 +; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BWVL-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0  ; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BWVL-NEXT:    vmovq %xmm0, %rax @@ -652,8 +652,8 @@ define i64 @test_v16i64(<16 x i64> %a0) {  ; AVX2-NEXT:    vpmuludq %ymm2, %ymm0, %ymm2  ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm3  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT:    vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT:    vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT:    vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX2-NEXT:    vmovq %xmm0, %rax @@ -693,8 +693,8 @@ define i64 @test_v16i64(<16 x i64> %a0) {  ; AVX512BW-NEXT:    vpmuludq %zmm1, %zmm2, %zmm2  ; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm3  ; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT:    vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT:    vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BW-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BW-NEXT:    vmovq %xmm0, %rax @@ -734,8 +734,8 @@ define i64 @test_v16i64(<16 x i64> %a0) {  ; AVX512BWVL-NEXT:    vpmuludq %zmm1, %zmm2, %zmm2  ; AVX512BWVL-NEXT:    vpsrlq $32, %zmm1, %zmm3  ; AVX512BWVL-NEXT:    vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BWVL-NEXT:    vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BWVL-NEXT:    vpsllq $32, %zmm2, %zmm2 +; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT:    vpsllq $32, %xmm2, %xmm2  ; AVX512BWVL-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0  ; AVX512BWVL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0  ; AVX512BWVL-NEXT:    vmovq %xmm0, %rax diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index d7ca7442799..aa3c647cc2f 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -783,16 +783,16 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX2-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX2-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX2-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX2-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0 @@ -804,16 +804,16 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX512F-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512F-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512F-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX512F-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX512F-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512F-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512F-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512F-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512F-NEXT:    vpor %ymm0, %ymm2, %ymm0 @@ -825,16 +825,16 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm2, %ymm0, %ymm3 -; AVX512VL-NEXT:    vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512VL-NEXT:    vpsllw %xmm2, %ymm4, %ymm2 +; AVX512VL-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512VL-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX512VL-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX512VL-NEXT:    vpand %ymm2, %ymm3, %ymm2  ; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VL-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512VL-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512VL-NEXT:    vpor %ymm0, %ymm2, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index 54725e32d39..b7793ed6c92 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -377,16 +377,16 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX512F-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT:    vpsllw %xmm3, %ymm5, %ymm6 +; AVX512F-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT:    vpsllw %xmm3, %xmm5, %xmm6  ; AVX512F-NEXT:    vpbroadcastb %xmm6, %ymm6  ; AVX512F-NEXT:    vpand %ymm6, %ymm4, %ymm4  ; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512F-NEXT:    vpsubb %xmm2, %xmm7, %xmm2  ; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512F-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512F-NEXT:    vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512F-NEXT:    vpsrlw $8, %ymm5, %ymm5 +; AVX512F-NEXT:    vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512F-NEXT:    vpsrlw $8, %xmm5, %xmm5  ; AVX512F-NEXT:    vpbroadcastb %xmm5, %ymm5  ; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0  ; AVX512F-NEXT:    vpor %ymm0, %ymm4, %ymm0 @@ -403,16 +403,16 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsllw %xmm3, %ymm0, %ymm4 -; AVX512VL-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT:    vpsllw %xmm3, %ymm5, %ymm6 +; AVX512VL-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT:    vpsllw %xmm3, %xmm5, %xmm6  ; AVX512VL-NEXT:    vpbroadcastb %xmm6, %ymm6  ; AVX512VL-NEXT:    vpand %ymm6, %ymm4, %ymm4  ; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]  ; AVX512VL-NEXT:    vpsubb %xmm2, %xmm7, %xmm2  ; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VL-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT:    vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512VL-NEXT:    vpsrlw $8, %ymm5, %ymm5 +; AVX512VL-NEXT:    vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512VL-NEXT:    vpsrlw $8, %xmm5, %xmm5  ; AVX512VL-NEXT:    vpbroadcastb %xmm5, %ymm5  ; AVX512VL-NEXT:    vpand %ymm5, %ymm0, %ymm0  ; AVX512VL-NEXT:    vpor %ymm0, %ymm4, %ymm0 @@ -430,13 +430,13 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512BW-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsllw %xmm2, %zmm0, %zmm3 -; AVX512BW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4 -; AVX512BW-NEXT:    vpsllw %xmm2, %zmm4, %zmm2 +; AVX512BW-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512BW-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX512BW-NEXT:    vpbroadcastb %xmm2, %zmm2  ; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm2  ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm4, %zmm1 -; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512BW-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1  ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vporq %zmm0, %zmm2, %zmm0 @@ -449,13 +449,13 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512VLBW-NEXT:    vpsubb %xmm1, %xmm3, %xmm1  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsllw %xmm2, %zmm0, %zmm3 -; AVX512VLBW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4 -; AVX512VLBW-NEXT:    vpsllw %xmm2, %zmm4, %zmm2 +; AVX512VLBW-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512VLBW-NEXT:    vpsllw %xmm2, %xmm4, %xmm2  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm2, %zmm2  ; AVX512VLBW-NEXT:    vpandq %zmm2, %zmm3, %zmm2  ; AVX512VLBW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT:    vpsrlw %xmm1, %zmm4, %zmm1 -; AVX512VLBW-NEXT:    vpsrlw $8, %zmm1, %zmm1 +; AVX512VLBW-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VLBW-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm1, %zmm1  ; AVX512VLBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0  ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm2, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index fdf7f4aa109..7f6d49e7660 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -883,9 +883,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX2:       # %bb.0:  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX2-NEXT:    vpsrlw %xmm1, %ymm2, %ymm2 -; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2 +; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT:    vpsrlw $8, %xmm2, %xmm2  ; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0  ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -922,9 +922,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX512DQ:       # %bb.0:  ; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2 +; AVX512DQ-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQ-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2 +; AVX512DQ-NEXT:    vpsrlw $8, %xmm2, %xmm2  ; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0  ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -946,9 +946,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX512DQVL:       # %bb.0:  ; AVX512DQVL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQVL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQVL-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQVL-NEXT:    vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512DQVL-NEXT:    vpsrlw $8, %ymm2, %ymm2 +; AVX512DQVL-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2 +; AVX512DQVL-NEXT:    vpsrlw $8, %xmm2, %xmm2  ; AVX512DQVL-NEXT:    vpbroadcastb %xmm2, %ymm2  ; AVX512DQVL-NEXT:    vpand %ymm2, %ymm0, %ymm0  ; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -990,9 +990,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; X32-AVX2:       # %bb.0:  ; X32-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; X32-AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; X32-AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; X32-AVX2-NEXT:    vpsrlw %xmm1, %ymm2, %ymm2 -; X32-AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2 +; X32-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; X32-AVX2-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2 +; X32-AVX2-NEXT:    vpsrlw $8, %xmm2, %xmm2  ; X32-AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2  ; X32-AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0  ; X32-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -1185,7 +1185,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {  ; AVX2:       # %bb.0:  ; AVX2-NEXT:    vpmulhw {{.*}}(%rip), %ymm0, %ymm1  ; AVX2-NEXT:    vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] -; AVX2-NEXT:    vpsraw $1, %ymm0, %ymm0 +; AVX2-NEXT:    vpsraw $1, %xmm0, %xmm0  ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15]  ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]  ; AVX2-NEXT:    retq @@ -1248,7 +1248,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {  ; X32-AVX2:       # %bb.0:  ; X32-AVX2-NEXT:    vpmulhw {{\.LCPI.*}}, %ymm0, %ymm1  ; X32-AVX2-NEXT:    vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] -; X32-AVX2-NEXT:    vpsraw $1, %ymm0, %ymm0 +; X32-AVX2-NEXT:    vpsraw $1, %xmm0, %xmm0  ; X32-AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15]  ; X32-AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]  ; X32-AVX2-NEXT:    retl diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll index 5635f1ea3dd..e4f676cecd6 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -183,9 +183,9 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512DQ:       # %bb.0:  ; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512DQ-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm3, %ymm3 -; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3 +; AVX512DQ-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX512DQ-NEXT:    vpsrlw %xmm2, %xmm3, %xmm3 +; AVX512DQ-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512DQ-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX512DQ-NEXT:    vpand %ymm3, %ymm0, %ymm0  ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -202,9 +202,9 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512BW:       # %bb.0:  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 -; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm2, %zmm2 -; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2 +; AVX512BW-NEXT:    vpsrlw $8, %xmm2, %xmm2  ; AVX512BW-NEXT:    vpbroadcastb %xmm2, %zmm2  ; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0  ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index ae087ba3618..698a45fad4d 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -720,9 +720,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX2:       # %bb.0:  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX2-NEXT:    vpsrlw %xmm1, %ymm2, %ymm1 -; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT:    vpsrlw %xmm1, %xmm2, %xmm1 +; AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    retq @@ -755,9 +755,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX512DQ:       # %bb.0:  ; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT:    vpsrlw %xmm1, %ymm2, %ymm1 -; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX512DQ-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQ-NEXT:    vpsrlw %xmm1, %xmm2, %xmm1 +; AVX512DQ-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512DQ-NEXT:    retq @@ -775,9 +775,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX512DQVL:       # %bb.0:  ; AVX512DQVL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQVL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQVL-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQVL-NEXT:    vpsrlw %xmm1, %ymm2, %ymm1 -; AVX512DQVL-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; AVX512DQVL-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT:    vpsrlw %xmm1, %xmm2, %xmm1 +; AVX512DQVL-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512DQVL-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512DQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512DQVL-NEXT:    retq @@ -809,9 +809,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; X32-AVX2:       # %bb.0:  ; X32-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; X32-AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 -; X32-AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; X32-AVX2-NEXT:    vpsrlw %xmm1, %ymm2, %ymm1 -; X32-AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1 +; X32-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; X32-AVX2-NEXT:    vpsrlw %xmm1, %xmm2, %xmm1 +; X32-AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; X32-AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1  ; X32-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; X32-AVX2-NEXT:    retl diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll index 9a44fc31820..f32b56d6035 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -147,9 +147,9 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512DQ:       # %bb.0:  ; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512DQ-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm3, %ymm3 -; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3 +; AVX512DQ-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX512DQ-NEXT:    vpsrlw %xmm2, %xmm3, %xmm3 +; AVX512DQ-NEXT:    vpsrlw $8, %xmm3, %xmm3  ; AVX512DQ-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX512DQ-NEXT:    vpand %ymm3, %ymm0, %ymm0  ; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm1, %ymm1 @@ -160,9 +160,9 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512BW:       # %bb.0:  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 -; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm2, %zmm1 -; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT:    vpsrlw %xmm1, %xmm2, %xmm1 +; AVX512BW-NEXT:    vpsrlw $8, %xmm1, %xmm1  ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1  ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    retq diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index bcd24b16795..273ff325f26 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -653,8 +653,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX2:       # %bb.0:  ; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX2-NEXT:    vpsllw %xmm1, %ymm2, %ymm1 +; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT:    vpsllw %xmm1, %xmm2, %xmm1  ; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX2-NEXT:    retq @@ -683,8 +683,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX512DQ:       # %bb.0:  ; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQ-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT:    vpsllw %xmm1, %ymm2, %ymm1 +; AVX512DQ-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQ-NEXT:    vpsllw %xmm1, %xmm2, %xmm1  ; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512DQ-NEXT:    retq @@ -702,8 +702,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX512DQVL:       # %bb.0:  ; AVX512DQVL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQVL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 -; AVX512DQVL-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQVL-NEXT:    vpsllw %xmm1, %ymm2, %ymm1 +; AVX512DQVL-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT:    vpsllw %xmm1, %xmm2, %xmm1  ; AVX512DQVL-NEXT:    vpbroadcastb %xmm1, %ymm1  ; AVX512DQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; AVX512DQVL-NEXT:    retq @@ -736,8 +736,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; X32-AVX2:       # %bb.0:  ; X32-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; X32-AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 -; X32-AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2 -; X32-AVX2-NEXT:    vpsllw %xmm1, %ymm2, %ymm1 +; X32-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; X32-AVX2-NEXT:    vpsllw %xmm1, %xmm2, %xmm1  ; X32-AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1  ; X32-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0  ; X32-AVX2-NEXT:    retl diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll index f1e8515b927..f63e1ab8d94 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll @@ -142,8 +142,8 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512DQ:       # %bb.0:  ; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm0, %ymm0 -; AVX512DQ-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm3, %ymm3 +; AVX512DQ-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX512DQ-NEXT:    vpsllw %xmm2, %xmm3, %xmm3  ; AVX512DQ-NEXT:    vpbroadcastb %xmm3, %ymm3  ; AVX512DQ-NEXT:    vpand %ymm3, %ymm0, %ymm0  ; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm1, %ymm1 @@ -154,8 +154,8 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512BW:       # %bb.0:  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 -; AVX512BW-NEXT:    vpsllw %xmm1, %zmm2, %zmm1 +; AVX512BW-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT:    vpsllw %xmm1, %xmm2, %xmm1  ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1  ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    retq  | 

