diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 27 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.h | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-calling-conv.ll | 27 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-ext.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 90 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-select.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-vbroadcast.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 36 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_memop.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pmul.ll | 61 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-sext.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-zext.ll | 2 |
14 files changed, 112 insertions, 168 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 4b28c3b627d..b52098ef44c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4499,22 +4499,6 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg, if (Subtarget.hasBWI()) if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg)) return Opc; - if (X86::VR128XRegClass.contains(DestReg, SrcReg)) { - if (Subtarget.hasVLX()) - return X86::VMOVAPSZ128rr; - DestReg = get512BitSuperRegister(DestReg); - SrcReg = get512BitSuperRegister(SrcReg); - return X86::VMOVAPSZrr; - } - if (X86::VR256XRegClass.contains(DestReg, SrcReg)) { - if (Subtarget.hasVLX()) - return X86::VMOVAPSZ256rr; - DestReg = get512BitSuperRegister(DestReg); - SrcReg = get512BitSuperRegister(SrcReg); - return X86::VMOVAPSZrr; - } - if (X86::VR512RegClass.contains(DestReg, SrcReg)) - return X86::VMOVAPSZrr; if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) return X86::KMOVWkk; if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) { @@ -4535,6 +4519,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // First deal with the normal symmetric copies. bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); + bool HasVLX = Subtarget.hasVLX(); unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; @@ -4556,12 +4541,14 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; + else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) + Opc = HasVLX ? X86::VMOVAPSZ128rr : HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; + else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) + Opc = HasVLX ? X86::VMOVAPSZ256rr : X86::VMOVAPSYrr; + else if (X86::VR512RegClass.contains(DestReg, SrcReg)) + Opc = X86::VMOVAPSZrr; else if (HasAVX512) Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget); - else if (X86::VR128RegClass.contains(DestReg, SrcReg)) - Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; - else if (X86::VR256RegClass.contains(DestReg, SrcReg)) - Opc = X86::VMOVAPSYrr; if (!Opc) Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 6324bd4a954..1b2fece6052 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -691,13 +691,3 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { FrameReg = getX86SubSuperRegister(FrameReg, 32); return FrameReg; } - -unsigned llvm::get512BitSuperRegister(unsigned Reg) { - if (Reg >= X86::XMM0 && Reg <= X86::XMM31) - return X86::ZMM0 + (Reg - X86::XMM0); - if (Reg >= X86::YMM0 && Reg <= X86::YMM31) - return X86::ZMM0 + (Reg - X86::YMM0); - if (Reg >= X86::ZMM0 && Reg <= X86::ZMM31) - return Reg; - llvm_unreachable("Unexpected SIMD register"); -} diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index 8d0094cbf3d..468012b4394 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -137,9 +137,6 @@ public: unsigned getSlotSize() const { return SlotSize; } }; -//get512BitRegister - X86 utility - returns 512-bit super register -unsigned get512BitSuperRegister(unsigned Reg); - } // End llvm namespace #endif diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll index c1d0f2baf4f..963a3867e68 100644 --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -335,30 +335,21 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) { } define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) { -; KNL-LABEL: test8: -; KNL: ## BB#0: -; KNL-NEXT: testb $1, %dil -; KNL-NEXT: jne LBB8_2 -; KNL-NEXT: ## BB#1: -; KNL-NEXT: vmovaps %zmm1, %zmm0 -; KNL-NEXT: LBB8_2: -; KNL-NEXT: retq -; -; SKX-LABEL: test8: -; SKX: ## BB#0: -; SKX-NEXT: testb $1, %dil -; SKX-NEXT: jne LBB8_2 -; SKX-NEXT: ## BB#1: -; SKX-NEXT: vmovaps %xmm1, %xmm0 -; SKX-NEXT: LBB8_2: -; SKX-NEXT: retq +; ALL_X64-LABEL: test8: +; ALL_X64: ## BB#0: +; ALL_X64-NEXT: testb $1, %dil +; ALL_X64-NEXT: jne LBB8_2 +; ALL_X64-NEXT: ## BB#1: +; ALL_X64-NEXT: vmovaps %xmm1, %xmm0 +; ALL_X64-NEXT: LBB8_2: +; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test8: ; KNL_X32: ## BB#0: ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: jne LBB8_2 ; KNL_X32-NEXT: ## BB#1: -; KNL_X32-NEXT: vmovaps %zmm1, %zmm0 +; KNL_X32-NEXT: vmovaps %xmm1, %xmm0 ; KNL_X32-NEXT: LBB8_2: ; KNL_X32-NEXT: retl %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2 diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index e45dda1ec8f..4d8f95a00f5 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -163,7 +163,7 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1 -; KNL-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL-NEXT: vmovdqa %ymm2, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_32x8mem_to_32x16: @@ -192,7 +192,7 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1 -; KNL-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL-NEXT: vmovdqa %ymm2, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_32x8mem_to_32x16: @@ -213,7 +213,7 @@ define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL-NEXT: vmovdqa %ymm2, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_32x8_to_32x16: @@ -258,7 +258,7 @@ define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { ; KNL-NEXT: vpmovsxbw %xmm0, %ymm2 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm1 -; KNL-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL-NEXT: vmovdqa %ymm2, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_32x8_to_32x16: diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index d9c5659e629..62948615fc0 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -198,7 +198,7 @@ define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> % ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} @@ -225,7 +225,7 @@ define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} @@ -2681,7 +2681,7 @@ define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x f ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0) ret <4 x float> %res @@ -2693,7 +2693,7 @@ define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x f ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) ret <4 x float> %res @@ -2705,7 +2705,7 @@ define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x f ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2) ret <4 x float> %res @@ -2717,7 +2717,7 @@ define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x f ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3) ret <4 x float> %res @@ -2729,7 +2729,7 @@ define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, < ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) ret <4 x float> %res @@ -2763,7 +2763,7 @@ define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0) ret <2 x double> %res @@ -2775,7 +2775,7 @@ define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) ret <2 x double> %res @@ -2787,7 +2787,7 @@ define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2) ret <2 x double> %res @@ -2799,7 +2799,7 @@ define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3) ret <2 x double> %res @@ -2811,7 +2811,7 @@ define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1 ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) ret <2 x double> %res @@ -2845,7 +2845,7 @@ define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) ret <4 x float> %res @@ -2877,7 +2877,7 @@ define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x floa ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) ret <4 x float> %res @@ -2910,7 +2910,7 @@ define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) ret <2 x double> %res @@ -2942,7 +2942,7 @@ define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x d ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) ret <2 x double> %res @@ -4191,7 +4191,7 @@ define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} @@ -4218,7 +4218,7 @@ define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x dou ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} @@ -4438,7 +4438,7 @@ define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x d ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovapd %zmm2, %zmm3 +; CHECK-NEXT: vmovapd %xmm2, %xmm3 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z} ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 @@ -4842,7 +4842,7 @@ define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x fl ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) ret <4 x float> %res @@ -4895,7 +4895,7 @@ define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) ret <2 x double> %res @@ -5523,10 +5523,10 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x fl ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; CHECK-NEXT: vmovaps %zmm0, %zmm5 +; CHECK-NEXT: vmovaps %xmm0, %xmm5 ; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1} ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1 @@ -5547,9 +5547,9 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x f ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} @@ -5617,9 +5617,9 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm3 +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} -; CHECK-NEXT: vmovapd %zmm0, %zmm4 +; CHECK-NEXT: vmovapd %xmm0, %xmm4 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} @@ -5641,10 +5641,10 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm3 +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 -; CHECK-NEXT: vmovapd %zmm0, %zmm5 +; CHECK-NEXT: vmovapd %xmm0, %xmm5 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z} ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1 @@ -5741,11 +5741,11 @@ define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x do ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vfmadd132sd %xmm1, %xmm2, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm4 +; CHECK-NEXT: vmovaps %xmm1, %xmm4 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm4 -; CHECK-NEXT: vmovaps %zmm0, %zmm5 +; CHECK-NEXT: vmovaps %xmm0, %xmm5 ; CHECK-NEXT: vfmadd132sd {rz-sae}, %xmm1, %xmm2, %xmm5 {%k1} ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1 ; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0 @@ -5769,11 +5769,11 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x floa ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vfmadd132ss %xmm1, %xmm2, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm4 +; CHECK-NEXT: vmovaps %xmm1, %xmm4 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm4 -; CHECK-NEXT: vmovaps %zmm0, %zmm5 +; CHECK-NEXT: vmovaps %xmm0, %xmm5 ; CHECK-NEXT: vfmadd132ss {rz-sae}, %xmm1, %xmm2, %xmm5 {%k1} ; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm0, %xmm1 ; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0 @@ -5797,7 +5797,7 @@ define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x d ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm1, %zmm3 +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1 {%k1} {z} ; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 @@ -5816,7 +5816,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x flo ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 {%k1} {z} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) @@ -5830,11 +5830,11 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x d ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm4 +; CHECK-NEXT: vmovaps %xmm1, %xmm4 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm4 -; CHECK-NEXT: vmovaps %zmm2, %zmm5 +; CHECK-NEXT: vmovaps %xmm2, %xmm5 ; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1} ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1 ; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0 @@ -5858,11 +5858,11 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x flo ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm4 +; CHECK-NEXT: vmovaps %xmm1, %xmm4 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm4 -; CHECK-NEXT: vmovaps %zmm2, %zmm5 +; CHECK-NEXT: vmovaps %xmm2, %xmm5 ; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1} ; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm0, %xmm1 ; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0 @@ -5885,7 +5885,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x ; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -5912,7 +5912,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kxorw %k0, %k0, %k1 ; CHECK-NEXT: vfmadd213ss (%rdi), %xmm0, %xmm1 {%k1} {z} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 6bf9931828b..a91e939bc7a 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -418,7 +418,7 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: jg LBB20_2 ; KNL-NEXT: ## BB#1: -; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: vmovaps %xmm1, %xmm0 ; KNL-NEXT: LBB20_2: ; KNL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index ee9be946c76..a3709c3181a 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -40,7 +40,7 @@ define float @select02(float %a, float %b, float %c, float %eps) { ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpless %xmm0, %xmm3, %k1 ; CHECK-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %cmp = fcmp oge float %a, %eps %cond = select i1 %cmp, float %c, float %b @@ -52,7 +52,7 @@ define double @select03(double %a, double %b, double %c, double %eps) { ; CHECK: ## BB#0: ; CHECK-NEXT: vcmplesd %xmm0, %xmm3, %k1 ; CHECK-NEXT: vmovsd %xmm2, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovapd %zmm1, %zmm0 +; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq %cmp = fcmp oge double %a, %eps %cond = select i1 %cmp, double %c, double %b diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll index d7660e0b4ea..3cb9ea2b224 100644 --- a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll @@ -359,7 +359,7 @@ define <64 x i8> @_invec32xi8(<32 x i8>%a) { ; AVX512F-LABEL: _invec32xi8: ; AVX512F: # BB#0: ; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 +; AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: _invec32xi8: @@ -374,7 +374,7 @@ define <32 x i16> @_invec16xi16(<16 x i16>%a) { ; AVX512F-LABEL: _invec16xi16: ; AVX512F: # BB#0: ; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 +; AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: _invec16xi16: diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 8efb97ba4a3..96f62cb640f 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -291,7 +291,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { ; KNL_64-NEXT: kxnorw %k0, %k0, %k2 ; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL_64-NEXT: vmovdqa %ymm2, %ymm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test6: @@ -301,7 +301,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { ; KNL_32-NEXT: kxnorw %k0, %k0, %k2 ; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2} ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1} -; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0 +; KNL_32-NEXT: vmovdqa %ymm1, %ymm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test6: @@ -336,7 +336,7 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) { ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 ; KNL_64-NEXT: kmovw %k1, %k2 ; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k2} -; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2 +; KNL_64-NEXT: vmovdqa %ymm1, %ymm2 ; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} ; KNL_64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 ; KNL_64-NEXT: retq @@ -349,7 +349,7 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) { ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 ; KNL_32-NEXT: kmovw %k1, %k2 ; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2} -; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2 +; KNL_32-NEXT: vmovdqa %ymm1, %ymm2 ; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} ; KNL_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0 ; KNL_32-NEXT: retl @@ -851,7 +851,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x ; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} -; KNL_64-NEXT: vmovapd %zmm2, %zmm0 +; KNL_64-NEXT: vmovapd %ymm2, %ymm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test16: @@ -868,7 +868,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x ; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} -; KNL_32-NEXT: vmovapd %zmm2, %zmm0 +; KNL_32-NEXT: vmovapd %ymm2, %ymm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test16: @@ -905,7 +905,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} -; KNL_64-NEXT: vmovapd %zmm2, %zmm0 +; KNL_64-NEXT: vmovapd %xmm2, %xmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test17: @@ -918,7 +918,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} -; KNL_32-NEXT: vmovapd %zmm2, %zmm0 +; KNL_32-NEXT: vmovapd %xmm2, %xmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test17: @@ -1165,7 +1165,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 ; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vmovaps %zmm2, %zmm0 +; KNL_64-NEXT: vmovaps %xmm2, %xmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test22: @@ -1181,7 +1181,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vmovaps %zmm2, %zmm0 +; KNL_32-NEXT: vmovaps %xmm2, %xmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test22: @@ -1226,7 +1226,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} -; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test23: @@ -1239,7 +1239,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} -; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test23: @@ -1271,7 +1271,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; KNL_64-NEXT: movb $3, %al ; KNL_64-NEXT: kmovw %eax, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1} -; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0 +; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test24: @@ -1283,7 +1283,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} -; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0 +; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test24: @@ -1317,7 +1317,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} -; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test25: @@ -1330,7 +1330,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} -; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 +; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test25: @@ -1364,7 +1364,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { ; KNL_64-NEXT: movb $3, %al ; KNL_64-NEXT: kmovw %eax, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1} -; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0 +; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test26: @@ -1377,7 +1377,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { ; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2 ; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} -; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0 +; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test26: diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index f2f91eecd64..434d50b84f2 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -454,7 +454,7 @@ define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 ; AVX512F-NEXT: vmovups (%rdi), %zmm1 {%k1} -; AVX512F-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-NEXT: vmovaps %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: test11a: @@ -501,7 +501,7 @@ define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) { ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} -; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512F-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: test11b: diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index 199d6a63ff9..7c416873897 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -360,47 +360,26 @@ define <2 x i64> @mul_v2i64spill(<2 x i64> %i, <2 x i64> %j) nounwind { ; SSE-NEXT: addq $40, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: mul_v2i64spill: -; AVX2: # BB#0: # %entry -; AVX2-NEXT: subq $40, %rsp -; AVX2-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill -; AVX2-NEXT: callq foo -; AVX2-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload -; AVX2-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload -; AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm0 -; AVX2-NEXT: vpsrlq $32, %xmm2, %xmm1 -; AVX2-NEXT: vmovdqa %xmm2, %xmm3 -; AVX2-NEXT: vpmuludq %xmm1, %xmm4, %xmm1 -; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1 -; AVX2-NEXT: vpsrlq $32, %xmm4, %xmm2 -; AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 -; AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: addq $40, %rsp -; AVX2-NEXT: retq -; -; AVX512-LABEL: mul_v2i64spill: -; AVX512: # BB#0: # %entry -; AVX512-NEXT: subq $40, %rsp -; AVX512-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill -; AVX512-NEXT: callq foo -; AVX512-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload -; AVX512-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload -; AVX512-NEXT: vpmuludq %xmm2, %xmm4, %xmm0 -; AVX512-NEXT: vpsrlq $32, %xmm2, %xmm1 -; AVX512-NEXT: vmovdqa64 %zmm2, %zmm3 -; AVX512-NEXT: vpmuludq %xmm1, %xmm4, %xmm1 -; AVX512-NEXT: vpsllq $32, %xmm1, %xmm1 -; AVX512-NEXT: vpsrlq $32, %xmm4, %xmm2 -; AVX512-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; AVX512-NEXT: vpsllq $32, %xmm2, %xmm2 -; AVX512-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: addq $40, %rsp -; AVX512-NEXT: retq +; AVX-LABEL: mul_v2i64spill: +; AVX: # BB#0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: callq foo +; AVX-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload +; AVX-NEXT: vmovdqa (%rsp), %xmm4 # 16-byte Reload +; AVX-NEXT: vpmuludq %xmm2, %xmm4, %xmm0 +; AVX-NEXT: vpsrlq $32, %xmm2, %xmm1 +; AVX-NEXT: vmovdqa %xmm2, %xmm3 +; AVX-NEXT: vpmuludq %xmm1, %xmm4, %xmm1 +; AVX-NEXT: vpsllq $32, %xmm1, %xmm1 +; AVX-NEXT: vpsrlq $32, %xmm4, %xmm2 +; AVX-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vpsllq $32, %xmm2, %xmm2 +; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: retq entry: ; Use a call to force spills. call void @foo() diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index a7138bcbda2..6cdd05f180e 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -168,7 +168,7 @@ define <32 x i16> @sext_32i8_to_32i16(<32 x i8> %A) nounwind uwtable readnone ss ; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512F-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: sext_32i8_to_32i16: diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index 68cdc8498f9..72dda578338 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -145,7 +145,7 @@ define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) { ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512F-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: zext_32i8_to_32i16: |