summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2017-01-09 02:44:34 +0000
committerCraig Topper <craig.topper@gmail.com>2017-01-09 02:44:34 +0000
commit6393afce978a30737e170150b7c34de4986cd2a8 (patch)
tree7baf83460bb948fc151904c6784dfc3095b2176c /llvm
parent3e6490399e0ac56d25819470ed7e53017f25351e (diff)
downloadbcm5719-llvm-6393afce978a30737e170150b7c34de4986cd2a8.tar.gz
bcm5719-llvm-6393afce978a30737e170150b7c34de4986cd2a8.zip
[AVX-512] Add patterns to use a zero masked VPTERNLOG instruction for vselects of all ones and all zeros.
Previously we emitted a VPTERNLOG and a separate masked move. llvm-svn: 291415
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td16
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp15
-rw-r--r--llvm/test/CodeGen/X86/avx512-calling-conv.ll24
-rw-r--r--llvm/test/CodeGen/X86/avx512-cvt.ll14
-rw-r--r--llvm/test/CodeGen/X86/avx512-ext.ll3
-rw-r--r--llvm/test/CodeGen/X86/avx512-insert-extract.ll56
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll110
-rw-r--r--llvm/test/CodeGen/X86/avx512-vbroadcast.ll3
-rw-r--r--llvm/test/CodeGen/X86/avx512-vec-cmp.ll6
-rw-r--r--llvm/test/CodeGen/X86/sse-fsignum.ll11
-rw-r--r--llvm/test/CodeGen/X86/vector-compare-results.ll24
-rw-r--r--llvm/test/CodeGen/X86/vector-sext.ll45
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-v1.ll74
13 files changed, 184 insertions, 217 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 774a904a826..75a265bf8a1 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -443,6 +443,22 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
}
+// Alias instructions that allow VPTERNLOG to be used with a mask to create
+// a mix of all ones and all zeros elements. This is done this way to force
+// the same register to be used as input for all three sources.
+let isPseudo = 1, Predicates = [HasAVX512] in {
+def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
+ (ins VK16WM:$mask), "",
+ [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
+ (v16i32 immAllOnesV),
+ (v16i32 immAllZerosV)))]>;
+def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
+ (ins VK8WM:$mask), "",
+ [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
+ (bc_v8i64 (v16i32 immAllOnesV)),
+ (bc_v8i64 (v16i32 immAllZerosV))))]>;
+}
+
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in {
def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 189c6f89be3..e3484d062bc 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6864,6 +6864,21 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(Reg, RegState::Undef).addImm(0xff);
return true;
}
+ case X86::AVX512_512_SEXT_MASK_32:
+ case X86::AVX512_512_SEXT_MASK_64: {
+ unsigned Reg = MIB->getOperand(0).getReg();
+ unsigned MaskReg = MIB->getOperand(1).getReg();
+ unsigned MaskState = getRegState(MIB->getOperand(1));
+ unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
+ X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
+ MI.RemoveOperand(1);
+ MIB->setDesc(get(Opc));
+ // VPTERNLOG needs 3 register inputs and an immediate.
+ // 0xff will return 1s for any input.
+ MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
+ .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
+ return true;
+ }
case X86::VMOVAPSZ128rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index 532678ae72f..1a91bc1dee9 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -25,8 +25,7 @@ define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -48,8 +47,7 @@ define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0
; KNL_X32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_X32-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL_X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
; KNL_X32-NEXT: retl
%c = and <16 x i1>%a, %b
@@ -65,8 +63,7 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -88,8 +85,7 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
; KNL_X32-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL_X32-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL_X32-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0
; KNL_X32-NEXT: retl
%c = and <8 x i1>%a, %b
@@ -180,8 +176,7 @@ define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
; KNL-NEXT: Lcfi1:
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: callq _func16xi1
; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
@@ -210,8 +205,7 @@ define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
; KNL_X32-NEXT: Lcfi1:
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL_X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
; KNL_X32-NEXT: calll _func16xi1
; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
@@ -285,8 +279,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; KNL-NEXT: movb $85, %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
@@ -322,8 +315,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; KNL_X32-NEXT: movb $85, %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
-; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL_X32-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0
; KNL_X32-NEXT: addl $12, %esp
; KNL_X32-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index c2eb19d1665..5e50a3aef2f 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -740,8 +740,7 @@ define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
; KNL: ## BB#0:
; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vcvtdq2ps %zmm0, %zmm0
; KNL-NEXT: retq
;
@@ -805,11 +804,10 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
; KNL-NEXT: vcmpltpd %zmm1, %zmm2, %k1
; KNL-NEXT: vcmpltpd %zmm0, %zmm2, %k2
-; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k2} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm1, %ymm1
; KNL-NEXT: vcvtdq2pd %ymm1, %zmm1
; KNL-NEXT: retq
@@ -834,8 +832,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; KNL: ## BB#0:
; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0
; KNL-NEXT: retq
@@ -858,8 +855,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vxorps %ymm1, %ymm1, %ymm1
; KNL-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vcvtdq2ps %ymm0, %ymm0
; KNL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index 32bd0804d63..358225b6fe0 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1457,8 +1457,7 @@ define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
; KNL-LABEL: sext_16i1_16i32:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: sext_16i1_16i32:
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 26d14fa0840..cb8ed0e59a3 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -365,11 +365,10 @@ define i16 @test16(i1 *%addr, i16 %a) {
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %esi, %k2
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z}
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
-; KNL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; KNL-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; KNL-NEXT: vpslld $31, %zmm2, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
@@ -402,11 +401,10 @@ define i8 @test17(i1 *%addr, i8 %a) {
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %esi, %k2
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
-; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
@@ -1242,30 +1240,29 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; KNL-NEXT: vpextrd $1, %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k2
-; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k2} {z}
+; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT: vmovd %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k2
-; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 {%k2} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,8,2,3,4,5,6,7]
-; KNL-NEXT: vpermi2q %zmm2, %zmm3, %zmm4
-; KNL-NEXT: vpsllq $63, %zmm4, %zmm2
-; KNL-NEXT: vptestmq %zmm2, %zmm2, %k2
-; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k2} {z}
-; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,8,3,4,5,6,7]
-; KNL-NEXT: vpermi2q %zmm3, %zmm2, %zmm4
-; KNL-NEXT: vpsllq $63, %zmm4, %zmm2
-; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1
-; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
+; KNL-NEXT: vptestmq %zmm1, %zmm1, %k2
+; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
+; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpextrd $3, %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,8,4,5,6,7]
-; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm1
-; KNL-NEXT: vpsllq $63, %zmm1, %zmm0
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: retq
@@ -1306,11 +1303,10 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y)
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k2
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index d48f63536e0..b127585dc87 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -344,8 +344,7 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; KNL-NEXT: LBB17_1:
; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
; KNL-NEXT: LBB17_3:
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -382,8 +381,7 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
; KNL-NEXT: LBB18_3:
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -472,8 +470,7 @@ define <16 x i1> @test15(i32 %x, i32 %y) {
; KNL-NEXT: movw $1, %cx
; KNL-NEXT: cmovgw %ax, %cx
; KNL-NEXT: kmovw %ecx, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -510,28 +507,27 @@ define <64 x i8> @test16(i64 %x) {
; KNL-NEXT: movl %edi, (%rsp)
; KNL-NEXT: shrq $32, %rdi
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; KNL-NEXT: kmovw (%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k2} {z}
-; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; KNL-NEXT: movl $1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
+; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z}
-; KNL-NEXT: vpmovdb %zmm0, %xmm0
-; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
-; KNL-NEXT: vpsllw $7, %ymm2, %ymm0
-; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
+; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
@@ -574,30 +570,29 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; KNL-NEXT: movl %edi, (%rsp)
; KNL-NEXT: shrq $32, %rdi
; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; KNL-NEXT: kmovw (%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z}
-; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %edx, %esi
; KNL-NEXT: setg %al
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
@@ -635,18 +630,17 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kshiftlw $6, %k2, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k2} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,2,3,4,5,8,7]
-; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
-; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
+; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: kshiftlw $7, %k0, %k0
; KNL-NEXT: korw %k0, %k1, %k1
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: retq
;
@@ -1387,8 +1381,7 @@ define <8 x i64> @load_8i1(<8 x i1>* %a) {
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: load_8i1:
@@ -1405,8 +1398,7 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) {
; KNL-LABEL: load_16i1:
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; SKX-LABEL: load_16i1:
@@ -1424,8 +1416,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
@@ -1444,8 +1435,7 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
@@ -1465,10 +1455,9 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) {
; KNL: ## BB#0:
; KNL-NEXT: kmovw (%rdi), %k1
; KNL-NEXT: kmovw 2(%rdi), %k2
-; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT: vpmovdw %zmm1, %ymm1
; KNL-NEXT: retq
;
@@ -1489,17 +1478,16 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) {
; KNL-NEXT: kmovw 2(%rdi), %k2
; KNL-NEXT: kmovw 4(%rdi), %k3
; KNL-NEXT: kmovw 6(%rdi), %k4
-; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z}
-; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k3} {z}
-; KNL-NEXT: vpmovdb %zmm2, %xmm2
-; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k4} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k4} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; KNL-NEXT: retq
;
; SKX-LABEL: load_64i1:
diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
index 840239b9011..1991ee4f337 100644
--- a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -218,8 +218,7 @@ define <16 x i32> @test_vbroadcast() {
; ALL: # BB#0: # %entry
; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0
; ALL-NEXT: vcmpunordps %zmm0, %zmm0, %k1
-; ALL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; ALL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; ALL-NEXT: knotw %k1, %k1
; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; ALL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 5a71053ebca..391d2a38853 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -880,8 +880,7 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1
; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
; KNL-NEXT: kxnorw %k1, %k0, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: retq
;
@@ -905,8 +904,7 @@ define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32>
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; KNL-NEXT: vpcmpgtd %zmm3, %zmm2, %k1
; KNL-NEXT: kxorw %k1, %k0, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/sse-fsignum.ll b/llvm/test/CodeGen/X86/sse-fsignum.ll
index 7159d4c8717..32594a27698 100644
--- a/llvm/test/CodeGen/X86/sse-fsignum.ll
+++ b/llvm/test/CodeGen/X86/sse-fsignum.ll
@@ -93,15 +93,14 @@ define void @signum32b(<8 x float>*) {
; AVX512F-NEXT: vmovaps (%rdi), %ymm0
; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
-; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} {z}
-; AVX512F-NEXT: vpmovqd %zmm3, %ymm3
-; AVX512F-NEXT: vcvtdq2ps %ymm3, %ymm3
+; AVX512F-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; AVX512F-NEXT: vpmovqd %zmm2, %ymm2
+; AVX512F-NEXT: vcvtdq2ps %ymm2, %ymm2
; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX512F-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; AVX512F-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vmovaps %ymm0, (%rdi)
; AVX512F-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/vector-compare-results.ll b/llvm/test/CodeGen/X86/vector-compare-results.ll
index 42e47f3ef12..c34f333ef78 100644
--- a/llvm/test/CodeGen/X86/vector-compare-results.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-results.ll
@@ -608,8 +608,7 @@ define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind {
; AVX512F-LABEL: test_cmp_v8f64:
; AVX512F: # BB#0:
; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -623,8 +622,7 @@ define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind {
; AVX512BW-LABEL: test_cmp_v8f64:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
; AVX512BW-NEXT: retq
%1 = fcmp ogt <8 x double> %a0, %a1
@@ -670,8 +668,7 @@ define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind {
; AVX512F-LABEL: test_cmp_v16f32:
; AVX512F: # BB#0:
; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -685,8 +682,7 @@ define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind {
; AVX512BW-LABEL: test_cmp_v16f32:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
; AVX512BW-NEXT: retq
%1 = fcmp ogt <16 x float> %a0, %a1
@@ -783,8 +779,7 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX512F-LABEL: test_cmp_v8i64:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -798,8 +793,7 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX512BW-LABEL: test_cmp_v8i64:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
; AVX512BW-NEXT: retq
%1 = icmp sgt <8 x i64> %a0, %a1
@@ -848,8 +842,7 @@ define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind {
; AVX512F-LABEL: test_cmp_v16i32:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -863,8 +856,7 @@ define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind {
; AVX512BW-LABEL: test_cmp_v16i32:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
; AVX512BW-NEXT: retq
%1 = icmp sgt <16 x i32> %a0, %a1
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 39fbc7611de..774d615ae89 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1244,8 +1244,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: movzbl (%rdi), %eax
; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512F-NEXT: retq
;
@@ -1253,8 +1252,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: movzbl (%rdi), %eax
; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512BW-NEXT: retq
;
@@ -1435,8 +1433,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: movzbl (%rdi), %eax
; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512F-NEXT: retq
@@ -1445,8 +1442,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: movzbl (%rdi), %eax
; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
@@ -1642,8 +1638,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: movzbl (%rdi), %eax
; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512F-NEXT: retq
;
@@ -1651,8 +1646,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: movzbl (%rdi), %eax
; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512BW-NEXT: retq
;
@@ -1945,8 +1939,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: movzbl (%rdi), %eax
; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -1954,8 +1947,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: movzbl (%rdi), %eax
; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
; AVX512BW-NEXT: retq
;
@@ -2348,8 +2340,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: movzbl (%rdi), %eax
; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
; AVX512F-NEXT: retq
;
@@ -2357,8 +2348,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: movzbl (%rdi), %eax
; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -2860,8 +2850,7 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
; AVX512-LABEL: load_sext_16i1_to_16i8:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: retq
;
@@ -3398,8 +3387,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; AVX512-LABEL: load_sext_16i1_to_16i16:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: retq
;
@@ -4244,12 +4232,11 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
; AVX512: # BB#0: # %entry
; AVX512-NEXT: kmovw (%rdi), %k1
; AVX512-NEXT: kmovw 2(%rdi), %k2
-; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
-; AVX512-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z}
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; AVX512-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
index 3ad92737a2e..4312b67546d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -71,13 +71,12 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %
; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
-; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -101,14 +100,13 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z}
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
-; AVX512F-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; AVX512F-NEXT: vpslld $31, %zmm3, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vpslld $31, %zmm2, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -157,13 +155,12 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) {
; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm1
-; AVX512F-NEXT: vpbroadcastq %xmm1, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -185,8 +182,7 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
@@ -215,8 +211,7 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
@@ -241,8 +236,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
@@ -271,8 +265,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
; AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2
; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
@@ -301,13 +294,12 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; AVX512F-NEXT: movb $51, %al
; AVX512F-NEXT: kmovw %eax, %k2
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
@@ -337,10 +329,10 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,1,2,3,4,5,6,7]
-; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
+; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
@@ -367,8 +359,7 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -403,9 +394,8 @@ define i64 @shuf64i1_zero(i64 %a) {
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $96, %rsp
; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
OpenPOWER on IntegriCloud