diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-select.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/nontemporal-2.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll | 12 |
6 files changed, 46 insertions, 26 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index 5e71fffff02..03cbc1f5ec1 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -962,10 +962,30 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { } define <8 x double> @fabs_v8f64(<8 x double> %p) -; CHECK-LABEL: fabs_v8f64: -; CHECK: ## BB#0: -; CHECK-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 -; CHECK-NEXT: retq +; AVX512F-LABEL: fabs_v8f64: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fabs_v8f64: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: fabs_v8f64: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: fabs_v8f64: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: fabs_v8f64: +; SKX: ## BB#0: +; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq { %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) ret <8 x double> %t diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 22a07c07341..b7efa3e9cd2 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -1025,7 +1025,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) { ; CHECK-LABEL: test_xor_epi64: ; CHECK: ## BB#0: -; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) ret < 8 x i64> %res @@ -1047,7 +1047,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i6 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) { ; CHECK-LABEL: test_or_epi64: ; CHECK: ## BB#0: -; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) ret < 8 x i64> %res @@ -1069,7 +1069,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) { ; CHECK-LABEL: test_and_epi64: ; CHECK: ## BB#0: -; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) ret < 8 x i64> %res diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index d4b9c19202e..09c09ef9875 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -8,7 +8,7 @@ define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind { ; CHECK-NEXT: cmpl $255, %edi ; CHECK-NEXT: je LBB0_2 ; CHECK-NEXT: ## BB#1: -; CHECK-NEXT: vmovaps %zmm0, %zmm1 +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 ; CHECK-NEXT: LBB0_2: ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq @@ -25,9 +25,9 @@ define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind { ; CHECK-NEXT: cmpl $255, %edi ; CHECK-NEXT: je LBB1_2 ; CHECK-NEXT: ## BB#1: -; CHECK-NEXT: vmovaps %zmm0, %zmm1 +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 ; CHECK-NEXT: LBB1_2: -; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %cmpres = icmp eq i32 %a, 255 %selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 9126e659774..f9126b4614e 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -2330,7 +2330,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_mask_andnot_epi64_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0xc1] +; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) ret <2 x i64> %res @@ -2360,7 +2360,7 @@ define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { ; CHECK-LABEL: test_mask_andnot_epi64_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0x07] +; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <2 x i64>, <2 x i64>* %ptr_b %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) @@ -2434,7 +2434,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_mask_andnot_epi64_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0xc1] +; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) ret <4 x i64> %res @@ -2464,7 +2464,7 @@ define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { ; CHECK-LABEL: test_mask_andnot_epi64_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0x07] +; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i64>, <4 x i64>* %ptr_b %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) diff --git a/llvm/test/CodeGen/X86/nontemporal-2.ll b/llvm/test/CodeGen/X86/nontemporal-2.ll index b97d38e18f4..accc1f8bbea 100644 --- a/llvm/test/CodeGen/X86/nontemporal-2.ll +++ b/llvm/test/CodeGen/X86/nontemporal-2.ll @@ -117,7 +117,7 @@ define void @test_zero_v4f32(<4 x float>* %dst) { ; VLX-LABEL: test_zero_v4f32: ; VLX: # BB#0: ; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntps %xmm0, (%rdi) +; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1 ret void @@ -139,7 +139,7 @@ define void @test_zero_v4i32(<4 x i32>* %dst) { ; VLX-LABEL: test_zero_v4i32: ; VLX: # BB#0: ; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntps %xmm0, (%rdi) +; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1 store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1 @@ -162,7 +162,7 @@ define void @test_zero_v2f64(<2 x double>* %dst) { ; VLX-LABEL: test_zero_v2f64: ; VLX: # BB#0: ; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntps %xmm0, (%rdi) +; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1 ret void @@ -184,7 +184,7 @@ define void @test_zero_v2i64(<2 x i64>* %dst) { ; VLX-LABEL: test_zero_v2i64: ; VLX: # BB#0: ; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntps %xmm0, (%rdi) +; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1 ret void @@ -206,7 +206,7 @@ define void @test_zero_v8i16(<8 x i16>* %dst) { ; VLX-LABEL: test_zero_v8i16: ; VLX: # BB#0: ; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntps %xmm0, (%rdi) +; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1 ret void @@ -228,7 +228,7 @@ define void @test_zero_v16i8(<16 x i8>* %dst) { ; VLX-LABEL: test_zero_v16i8: ; VLX: # BB#0: ; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntps %xmm0, (%rdi) +; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1 ret void diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index b457969d31e..5dec0b8fa11 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -76,7 +76,7 @@ declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind read define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_andpd - ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() %2 = bitcast <2 x double> %a0 to <2 x i64> %3 = bitcast <2 x double> %a1 to <2 x i64> @@ -89,7 +89,7 @@ define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) { ;CHECK-LABEL: stack_fold_andpd_ymm - ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + ;CHECK: vpandq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() %2 = bitcast <4 x double> %a0 to <4 x i64> %3 = bitcast <4 x double> %a1 to <4 x i64> @@ -198,7 +198,7 @@ declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind read define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_orpd - ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() %2 = bitcast <2 x double> %a0 to <2 x i64> %3 = bitcast <2 x double> %a1 to <2 x i64> @@ -211,7 +211,7 @@ define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) { ;CHECK-LABEL: stack_fold_orpd_ymm - ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + ;CHECK: vporq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() %2 = bitcast <4 x double> %a0 to <4 x i64> %3 = bitcast <4 x double> %a1 to <4 x i64> @@ -316,7 +316,7 @@ declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind read define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_xorpd - ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() %2 = bitcast <2 x double> %a0 to <2 x i64> %3 = bitcast <2 x double> %a1 to <2 x i64> @@ -329,7 +329,7 @@ define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) { ;CHECK-LABEL: stack_fold_xorpd_ymm - ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + ;CHECK: vpxorq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() %2 = bitcast <4 x double> %a0 to <4 x i64> %3 = bitcast <4 x double> %a1 to <4 x i64> |

