summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp27
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.cpp10
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.h3
-rw-r--r--llvm/test/CodeGen/X86/avx512-calling-conv.ll27
-rw-r--r--llvm/test/CodeGen/X86/avx512-ext.ll8
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll90
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx512-select.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx512-vbroadcast.ll4
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll36
-rw-r--r--llvm/test/CodeGen/X86/masked_memop.ll4
-rw-r--r--llvm/test/CodeGen/X86/pmul.ll61
-rw-r--r--llvm/test/CodeGen/X86/vector-sext.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll2
14 files changed, 112 insertions, 168 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4b28c3b627d..b52098ef44c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4499,22 +4499,6 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg,
if (Subtarget.hasBWI())
if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg))
return Opc;
- if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
- if (Subtarget.hasVLX())
- return X86::VMOVAPSZ128rr;
- DestReg = get512BitSuperRegister(DestReg);
- SrcReg = get512BitSuperRegister(SrcReg);
- return X86::VMOVAPSZrr;
- }
- if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
- if (Subtarget.hasVLX())
- return X86::VMOVAPSZ256rr;
- DestReg = get512BitSuperRegister(DestReg);
- SrcReg = get512BitSuperRegister(SrcReg);
- return X86::VMOVAPSZrr;
- }
- if (X86::VR512RegClass.contains(DestReg, SrcReg))
- return X86::VMOVAPSZrr;
if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg))
return X86::KMOVWkk;
if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) {
@@ -4535,6 +4519,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// First deal with the normal symmetric copies.
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool HasVLX = Subtarget.hasVLX();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -4556,12 +4541,14 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
+ else if (X86::VR128XRegClass.contains(DestReg, SrcReg))
+ Opc = HasVLX ? X86::VMOVAPSZ128rr : HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
+ else if (X86::VR256XRegClass.contains(DestReg, SrcReg))
+ Opc = HasVLX ? X86::VMOVAPSZ256rr : X86::VMOVAPSYrr;
+ else if (X86::VR512RegClass.contains(DestReg, SrcReg))
+ Opc = X86::VMOVAPSZrr;
else if (HasAVX512)
Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget);
- else if (X86::VR128RegClass.contains(DestReg, SrcReg))
- Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
- else if (X86::VR256RegClass.contains(DestReg, SrcReg))
- Opc = X86::VMOVAPSYrr;
if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 6324bd4a954..1b2fece6052 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -691,13 +691,3 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
FrameReg = getX86SubSuperRegister(FrameReg, 32);
return FrameReg;
}
-
-unsigned llvm::get512BitSuperRegister(unsigned Reg) {
- if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
- return X86::ZMM0 + (Reg - X86::XMM0);
- if (Reg >= X86::YMM0 && Reg <= X86::YMM31)
- return X86::ZMM0 + (Reg - X86::YMM0);
- if (Reg >= X86::ZMM0 && Reg <= X86::ZMM31)
- return Reg;
- llvm_unreachable("Unexpected SIMD register");
-}
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 8d0094cbf3d..468012b4394 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -137,9 +137,6 @@ public:
unsigned getSlotSize() const { return SlotSize; }
};
-//get512BitRegister - X86 utility - returns 512-bit super register
-unsigned get512BitSuperRegister(unsigned Reg);
-
} // End llvm namespace
#endif
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index c1d0f2baf4f..963a3867e68 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -335,30 +335,21 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
}
define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
-; KNL-LABEL: test8:
-; KNL: ## BB#0:
-; KNL-NEXT: testb $1, %dil
-; KNL-NEXT: jne LBB8_2
-; KNL-NEXT: ## BB#1:
-; KNL-NEXT: vmovaps %zmm1, %zmm0
-; KNL-NEXT: LBB8_2:
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test8:
-; SKX: ## BB#0:
-; SKX-NEXT: testb $1, %dil
-; SKX-NEXT: jne LBB8_2
-; SKX-NEXT: ## BB#1:
-; SKX-NEXT: vmovaps %xmm1, %xmm0
-; SKX-NEXT: LBB8_2:
-; SKX-NEXT: retq
+; ALL_X64-LABEL: test8:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: testb $1, %dil
+; ALL_X64-NEXT: jne LBB8_2
+; ALL_X64-NEXT: ## BB#1:
+; ALL_X64-NEXT: vmovaps %xmm1, %xmm0
+; ALL_X64-NEXT: LBB8_2:
+; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test8:
; KNL_X32: ## BB#0:
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: jne LBB8_2
; KNL_X32-NEXT: ## BB#1:
-; KNL_X32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_X32-NEXT: vmovaps %xmm1, %xmm0
; KNL_X32-NEXT: LBB8_2:
; KNL_X32-NEXT: retl
%res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index e45dda1ec8f..4d8f95a00f5 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -163,7 +163,7 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
-; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL-NEXT: vmovdqa %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_32x8mem_to_32x16:
@@ -192,7 +192,7 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
-; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL-NEXT: vmovdqa %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: sext_32x8mem_to_32x16:
@@ -213,7 +213,7 @@ define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL-NEXT: vmovdqa %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_32x8_to_32x16:
@@ -258,7 +258,7 @@ define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
; KNL-NEXT: vpmovsxbw %xmm0, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovsxbw %xmm0, %ymm1
-; KNL-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL-NEXT: vmovdqa %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: sext_32x8_to_32x16:
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index d9c5659e629..62948615fc0 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -198,7 +198,7 @@ define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
@@ -225,7 +225,7 @@ define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
@@ -2681,7 +2681,7 @@ define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x f
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
ret <4 x float> %res
@@ -2693,7 +2693,7 @@ define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x f
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
ret <4 x float> %res
@@ -2705,7 +2705,7 @@ define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x f
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
ret <4 x float> %res
@@ -2717,7 +2717,7 @@ define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x f
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
ret <4 x float> %res
@@ -2729,7 +2729,7 @@ define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
ret <4 x float> %res
@@ -2763,7 +2763,7 @@ define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
ret <2 x double> %res
@@ -2775,7 +2775,7 @@ define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
ret <2 x double> %res
@@ -2787,7 +2787,7 @@ define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
ret <2 x double> %res
@@ -2799,7 +2799,7 @@ define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
ret <2 x double> %res
@@ -2811,7 +2811,7 @@ define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
@@ -2845,7 +2845,7 @@ define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
ret <4 x float> %res
@@ -2877,7 +2877,7 @@ define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x floa
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
ret <4 x float> %res
@@ -2910,7 +2910,7 @@ define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
ret <2 x double> %res
@@ -2942,7 +2942,7 @@ define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x d
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
@@ -4191,7 +4191,7 @@ define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float>
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
@@ -4218,7 +4218,7 @@ define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x dou
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
@@ -4438,7 +4438,7 @@ define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x d
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %xmm2, %xmm3
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
@@ -4842,7 +4842,7 @@ define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x fl
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
ret <4 x float> %res
@@ -4895,7 +4895,7 @@ define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovapd %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
ret <2 x double> %res
@@ -5523,10 +5523,10 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x fl
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovaps %xmm0, %xmm5
; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1
@@ -5547,9 +5547,9 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x f
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm0, %zmm4
+; CHECK-NEXT: vmovaps %xmm0, %xmm4
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
@@ -5617,9 +5617,9 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %xmm0, %xmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vmovapd %zmm0, %zmm4
+; CHECK-NEXT: vmovapd %xmm0, %xmm4
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
@@ -5641,10 +5641,10 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %xmm0, %xmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovapd %zmm0, %zmm5
+; CHECK-NEXT: vmovapd %xmm0, %xmm5
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1
@@ -5741,11 +5741,11 @@ define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x do
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfmadd132sd %xmm1, %xmm2, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovaps %xmm0, %xmm5
; CHECK-NEXT: vfmadd132sd {rz-sae}, %xmm1, %xmm2, %xmm5 {%k1}
; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
@@ -5769,11 +5769,11 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x floa
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfmadd132ss %xmm1, %xmm2, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovaps %xmm0, %xmm5
; CHECK-NEXT: vfmadd132ss {rz-sae}, %xmm1, %xmm2, %xmm5 {%k1}
; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
@@ -5797,7 +5797,7 @@ define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x d
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovaps %xmm1, %xmm3
; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1 {%k1} {z}
; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0
@@ -5816,7 +5816,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x flo
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
@@ -5830,11 +5830,11 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x d
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vmovaps %zmm2, %zmm5
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
@@ -5858,11 +5858,11 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x flo
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vmovaps %xmm1, %xmm4
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vmovaps %zmm2, %zmm5
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
@@ -5885,7 +5885,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%q = load float, float* %ptr_b
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -5912,7 +5912,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x
; CHECK: ## BB#0:
; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: vfmadd213ss (%rdi), %xmm0, %xmm1 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%q = load float, float* %ptr_b
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 6bf9931828b..a91e939bc7a 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -418,7 +418,7 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: jg LBB20_2
; KNL-NEXT: ## BB#1:
-; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: vmovaps %xmm1, %xmm0
; KNL-NEXT: LBB20_2:
; KNL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index ee9be946c76..a3709c3181a 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -40,7 +40,7 @@ define float @select02(float %a, float %b, float %c, float %eps) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpless %xmm0, %xmm3, %k1
; CHECK-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%cmp = fcmp oge float %a, %eps
%cond = select i1 %cmp, float %c, float %b
@@ -52,7 +52,7 @@ define double @select03(double %a, double %b, double %c, double %eps) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmplesd %xmm0, %xmm3, %k1
; CHECK-NEXT: vmovsd %xmm2, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vmovapd %zmm1, %zmm0
+; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
%cmp = fcmp oge double %a, %eps
%cond = select i1 %cmp, double %c, double %b
diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
index d7660e0b4ea..3cb9ea2b224 100644
--- a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -359,7 +359,7 @@ define <64 x i8> @_invec32xi8(<32 x i8>%a) {
; AVX512F-LABEL: _invec32xi8:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512F-NEXT: vmovdqa %ymm0, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: _invec32xi8:
@@ -374,7 +374,7 @@ define <32 x i16> @_invec16xi16(<16 x i16>%a) {
; AVX512F-LABEL: _invec16xi16:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512F-NEXT: vmovdqa %ymm0, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: _invec16xi16:
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 8efb97ba4a3..96f62cb640f 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -291,7 +291,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
; KNL_64-NEXT: kxnorw %k0, %k0, %k2
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa %ymm2, %ymm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test6:
@@ -301,7 +301,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
; KNL_32-NEXT: kxnorw %k0, %k0, %k2
; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2}
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa %ymm1, %ymm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test6:
@@ -336,7 +336,7 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_64-NEXT: kmovw %k1, %k2
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k2}
-; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2
+; KNL_64-NEXT: vmovdqa %ymm1, %ymm2
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
; KNL_64-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; KNL_64-NEXT: retq
@@ -349,7 +349,7 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw %k1, %k2
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2
+; KNL_32-NEXT: vmovdqa %ymm1, %ymm2
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
; KNL_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; KNL_32-NEXT: retl
@@ -851,7 +851,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovapd %zmm2, %zmm0
+; KNL_64-NEXT: vmovapd %ymm2, %ymm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test16:
@@ -868,7 +868,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovapd %zmm2, %zmm0
+; KNL_32-NEXT: vmovapd %ymm2, %ymm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test16:
@@ -905,7 +905,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovapd %zmm2, %zmm0
+; KNL_64-NEXT: vmovapd %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test17:
@@ -918,7 +918,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovapd %zmm2, %zmm0
+; KNL_32-NEXT: vmovapd %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test17:
@@ -1165,7 +1165,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
-; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: vmovaps %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test22:
@@ -1181,7 +1181,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
-; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: vmovaps %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test22:
@@ -1226,7 +1226,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test23:
@@ -1239,7 +1239,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test23:
@@ -1271,7 +1271,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test24:
@@ -1283,7 +1283,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test24:
@@ -1317,7 +1317,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test25:
@@ -1330,7 +1330,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
+; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test25:
@@ -1364,7 +1364,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
-; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test26:
@@ -1377,7 +1377,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
-; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
; KNL_32-NEXT: retl
;
; SKX-LABEL: test26:
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll
index f2f91eecd64..434d50b84f2 100644
--- a/llvm/test/CodeGen/X86/masked_memop.ll
+++ b/llvm/test/CodeGen/X86/masked_memop.ll
@@ -454,7 +454,7 @@ define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float>
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
; AVX512F-NEXT: vmovups (%rdi), %zmm1 {%k1}
-; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: vmovaps %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; SKX-LABEL: test11a:
@@ -501,7 +501,7 @@ define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) {
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1}
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; SKX-LABEL: test11b:
diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll
index 199d6a63ff9..7c416873897 100644
--- a/llvm/test/CodeGen/X86/pmul.ll
+++ b/llvm/test/CodeGen/X86/pmul.ll
@@ -360,47 +360,26 @@ define <2 x i64> @mul_v2i64spill(<2 x i64> %i, <2 x i64> %j) nounwind {
; SSE-NEXT: addq $40, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: mul_v2i64spill:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: subq $40, %rsp
-; AVX2-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
-; AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX2-NEXT: callq foo
-; AVX2-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
-; AVX2-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload
-; AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm0
-; AVX2-NEXT: vpsrlq $32, %xmm2, %xmm1
-; AVX2-NEXT: vmovdqa %xmm2, %xmm3
-; AVX2-NEXT: vpmuludq %xmm1, %xmm4, %xmm1
-; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlq $32, %xmm4, %xmm2
-; AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: addq $40, %rsp
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: mul_v2i64spill:
-; AVX512: # BB#0: # %entry
-; AVX512-NEXT: subq $40, %rsp
-; AVX512-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
-; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; AVX512-NEXT: callq foo
-; AVX512-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
-; AVX512-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload
-; AVX512-NEXT: vpmuludq %xmm2, %xmm4, %xmm0
-; AVX512-NEXT: vpsrlq $32, %xmm2, %xmm1
-; AVX512-NEXT: vmovdqa64 %zmm2, %zmm3
-; AVX512-NEXT: vpmuludq %xmm1, %xmm4, %xmm1
-; AVX512-NEXT: vpsllq $32, %xmm1, %xmm1
-; AVX512-NEXT: vpsrlq $32, %xmm4, %xmm2
-; AVX512-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
-; AVX512-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: addq $40, %rsp
-; AVX512-NEXT: retq
+; AVX-LABEL: mul_v2i64spill:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: callq foo
+; AVX-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
+; AVX-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload
+; AVX-NEXT: vpmuludq %xmm2, %xmm4, %xmm0
+; AVX-NEXT: vpsrlq $32, %xmm2, %xmm1
+; AVX-NEXT: vmovdqa %xmm2, %xmm3
+; AVX-NEXT: vpmuludq %xmm1, %xmm4, %xmm1
+; AVX-NEXT: vpsllq $32, %xmm1, %xmm1
+; AVX-NEXT: vpsrlq $32, %xmm4, %xmm2
+; AVX-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: retq
entry:
; Use a call to force spills.
call void @foo()
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index a7138bcbda2..6cdd05f180e 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -168,7 +168,7 @@ define <32 x i16> @sext_32i8_to_32i16(<32 x i8> %A) nounwind uwtable readnone ss
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1
-; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512F-NEXT: vmovdqa %ymm2, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: sext_32i8_to_32i16:
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 68cdc8498f9..72dda578338 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -145,7 +145,7 @@ define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
+; AVX512F-NEXT: vmovdqa %ymm2, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: zext_32i8_to_32i16:
OpenPOWER on IntegriCloud