summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/avx512-intrinsics.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-intrinsics.ll')
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll146
1 files changed, 73 insertions, 73 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 65ed7737438..cab1aae1142 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -679,7 +679,7 @@ define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret <8 x i64> %res
@@ -713,7 +713,7 @@ define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret <16 x i32> %res
@@ -724,7 +724,7 @@ define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret <8 x i64> %res
@@ -861,7 +861,7 @@ define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src,
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
ret <8 x i64> %res
@@ -1342,7 +1342,7 @@ define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1374,7 +1374,7 @@ define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1406,7 +1406,7 @@ define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1438,7 +1438,7 @@ define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1470,7 +1470,7 @@ define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1502,7 +1502,7 @@ define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1534,7 +1534,7 @@ define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1,
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1566,7 +1566,7 @@ define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1599,7 +1599,7 @@ define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1,
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1631,7 +1631,7 @@ define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1663,7 +1663,7 @@ define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1,
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
@@ -1695,7 +1695,7 @@ define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
@@ -1960,7 +1960,7 @@ define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -1991,7 +1991,7 @@ define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
@@ -2026,7 +2026,7 @@ define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i3
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
@@ -2064,7 +2064,7 @@ define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -2095,7 +2095,7 @@ define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
@@ -2130,7 +2130,7 @@ define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i3
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
@@ -2168,7 +2168,7 @@ define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64>
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -2199,7 +2199,7 @@ define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
@@ -2234,7 +2234,7 @@ define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64>
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
@@ -2272,7 +2272,7 @@ define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64>
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -2303,7 +2303,7 @@ define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
@@ -2338,7 +2338,7 @@ define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64>
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
@@ -2376,7 +2376,7 @@ define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -2407,7 +2407,7 @@ define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
@@ -2443,7 +2443,7 @@ define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64>
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
@@ -2483,7 +2483,7 @@ define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
@@ -2514,7 +2514,7 @@ define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
@@ -2550,7 +2550,7 @@ define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64>
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
@@ -2590,7 +2590,7 @@ define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
@@ -2621,7 +2621,7 @@ define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
@@ -2656,7 +2656,7 @@ define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
@@ -3285,7 +3285,7 @@ define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
ret <2 x double> %res
@@ -3297,7 +3297,7 @@ define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
ret <2 x double> %res
@@ -3309,7 +3309,7 @@ define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
ret <2 x double> %res
@@ -3321,7 +3321,7 @@ define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
ret <2 x double> %res
@@ -3333,7 +3333,7 @@ define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
@@ -3432,7 +3432,7 @@ define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
ret <2 x double> %res
@@ -3464,7 +3464,7 @@ define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x d
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
@@ -3788,7 +3788,7 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
@@ -3806,7 +3806,7 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0,
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0
@@ -3823,7 +3823,7 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0,
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
@@ -3840,7 +3840,7 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
@@ -3857,7 +3857,7 @@ define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm2
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm2
; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
@@ -3875,7 +3875,7 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm2
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm2
; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0
@@ -3895,7 +3895,7 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0,
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
@@ -3913,7 +3913,7 @@ define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
@@ -3930,7 +3930,7 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
@@ -4956,7 +4956,7 @@ define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x d
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm3
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
@@ -5197,7 +5197,7 @@ define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
@@ -5214,7 +5214,7 @@ define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
@@ -5231,7 +5231,7 @@ define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
@@ -5248,7 +5248,7 @@ define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i6
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
@@ -5995,7 +5995,7 @@ define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
@@ -6017,10 +6017,10 @@ define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0,
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1
@@ -6041,10 +6041,10 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x fl
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1
@@ -6065,9 +6065,9 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x f
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm0, %zmm4
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
@@ -6088,10 +6088,10 @@ define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1}
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1
@@ -6111,9 +6111,9 @@ define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0,
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm0, %zmm4
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
@@ -6135,9 +6135,9 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %zmm0, %zmm4
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
@@ -6159,10 +6159,10 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovaps %zmm0, %zmm5
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1
OpenPOWER on IntegriCloud