diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-bugfix-25270.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-calling-conv.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-extract-subvector.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-logic.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-mov.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-vec-cmp.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-mov.ll | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fma_patterns.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fma_patterns_wide.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/nontemporal-loads.ll | 160 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-lzcnt-128.ll | 78 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-lzcnt-256.ll | 104 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-tzcnt-128.ll | 138 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-tzcnt-256.ll | 184 |
17 files changed, 242 insertions, 582 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index 03cbc1f5ec1..72219a8413e 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -603,10 +603,30 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { } define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { -; CHECK-LABEL: andd512fold: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq +; AVX512F-LABEL: andd512fold: +; AVX512F: ## BB#0: ## %entry +; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: andd512fold: +; AVX512VL: ## BB#0: ## %entry +; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: andd512fold: +; AVX512BW: ## BB#0: ## %entry +; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: andd512fold: +; AVX512DQ: ## BB#0: ## %entry +; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: andd512fold: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %a = load <16 x i32>, <16 x i32>* %x, align 4 %b = and <16 x i32> %y, %a diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll index 1cf1c076796..7e8b5219dc4 100644 --- a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll +++ b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll @@ -9,8 +9,8 @@ define void @bar__512(<16 x i32>* %var) #0 { ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $112, %rsp ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, (%rsp) ## 64-byte Spill +; CHECK-NEXT: vmovups (%rbx), %zmm0 +; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 ; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx) ; CHECK-NEXT: callq _Print__512 diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll index fce592a5318..c1d0f2baf4f 100644 --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -4,15 +4,10 @@ ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32 define <16 x i1> @test1() { -; KNL-LABEL: test1: -; KNL: ## BB#0: -; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: test1: -; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm0, %xmm0, %xmm0 -; SKX-NEXT: retq +; ALL_X64-LABEL: test1: +; ALL_X64: ## BB#0: +; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test1: ; KNL_X32: ## BB#0: diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index d2410e4a0a5..3f0df70ba0d 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -761,7 +761,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) { ; ; SKX-LABEL: sitofp_16i1_double: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; SKX-NEXT: vxorpd %zmm2, %zmm2, %zmm2 ; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0 ; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ; SKX-NEXT: vpmovm2d %k1, %ymm0 @@ -787,7 +787,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) { ; ; SKX-LABEL: sitofp_8i1_double: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1 +; SKX-NEXT: vxorpd %zmm1, %zmm1, %zmm1 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 ; SKX-NEXT: vpmovm2d %k0, %ymm0 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 @@ -811,7 +811,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) { ; ; SKX-LABEL: sitofp_8i1_float: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; SKX-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0 ; SKX-NEXT: vpmovm2d %k0, %ymm0 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 @@ -831,7 +831,7 @@ define <4 x float> @sitofp_4i1_float(<4 x float> %a) { ; ; SKX-LABEL: sitofp_4i1_float: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 @@ -854,7 +854,7 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) { ; ; SKX-LABEL: sitofp_4i1_double: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; SKX-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 @@ -890,7 +890,7 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) { ; ; SKX-LABEL: sitofp_2i1_float: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 @@ -911,7 +911,7 @@ define <2 x double> @sitofp_2i1_double(<2 x double> %a) { ; ; SKX-LABEL: sitofp_2i1_double: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 ; SKX-NEXT: vpmovm2q %k0, %xmm0 ; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/avx512-extract-subvector.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector.ll index 31b68bd6905..de4b541c9db 100644 --- a/llvm/test/CodeGen/X86/avx512-extract-subvector.ll +++ b/llvm/test/CodeGen/X86/avx512-extract-subvector.ll @@ -156,7 +156,7 @@ entry: define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v2i64_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu64 %xmm0, (%rdi) +; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> @@ -168,7 +168,7 @@ entry: define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v4i32_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) +; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -180,7 +180,7 @@ entry: define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8i16_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) +; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -192,7 +192,7 @@ entry: define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v16i8_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) +; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -228,7 +228,7 @@ entry: define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v2i64_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu64 %xmm0, (%rdi) +; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> @@ -240,7 +240,7 @@ entry: define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v4i32_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) +; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -252,7 +252,7 @@ entry: define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v8i16_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) +; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -264,7 +264,7 @@ entry: define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v16i8_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) +; SKX-NEXT: vmovups %xmm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -300,7 +300,7 @@ entry: define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v4i64_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu64 %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -312,7 +312,7 @@ entry: define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v8i32_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -324,7 +324,7 @@ entry: define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v16i16_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -336,7 +336,7 @@ entry: define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v32i8_store_lo: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index c8d3b519425..c14455b3c38 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -125,10 +125,15 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { } define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { -; ALL-LABEL: andd512fold: -; ALL: ## BB#0: ## %entry -; ALL-NEXT: vpandd (%rdi), %zmm0, %zmm0 -; ALL-NEXT: retq +; KNL-LABEL: andd512fold: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: andd512fold: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %a = load <16 x i32>, <16 x i32>* %x, align 4 %b = and <16 x i32> %y, %a diff --git a/llvm/test/CodeGen/X86/avx512-mov.ll b/llvm/test/CodeGen/X86/avx512-mov.ll index 7c5c028f060..fcbc056a5b5 100644 --- a/llvm/test/CodeGen/X86/avx512-mov.ll +++ b/llvm/test/CodeGen/X86/avx512-mov.ll @@ -151,7 +151,7 @@ define <4 x i32> @test15(i32* %x) { define <16 x i32> @test16(i8 * %addr) { ; CHECK-LABEL: test16: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07] +; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i32>* %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 @@ -161,7 +161,7 @@ define <16 x i32> @test16(i8 * %addr) { define <16 x i32> @test17(i8 * %addr) { ; CHECK-LABEL: test17: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i32>* %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 @@ -171,7 +171,7 @@ define <16 x i32> @test17(i8 * %addr) { define void @test18(i8 * %addr, <8 x i64> %data) { ; CHECK-LABEL: test18: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07] +; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 64 @@ -181,7 +181,7 @@ define void @test18(i8 * %addr, <8 x i64> %data) { define void @test19(i8 * %addr, <16 x i32> %data) { ; CHECK-LABEL: test19: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07] +; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 1 @@ -191,7 +191,7 @@ define void @test19(i8 * %addr, <16 x i32> %data) { define void @test20(i8 * %addr, <16 x i32> %data) { ; CHECK-LABEL: test20: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07] +; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 64 @@ -201,7 +201,7 @@ define void @test20(i8 * %addr, <16 x i32> %data) { define <8 x i64> @test21(i8 * %addr) { ; CHECK-LABEL: test21: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i64>* %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 @@ -211,7 +211,7 @@ define <8 x i64> @test21(i8 * %addr) { define void @test22(i8 * %addr, <8 x i64> %data) { ; CHECK-LABEL: test22: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07] +; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 1 @@ -221,7 +221,7 @@ define void @test22(i8 * %addr, <8 x i64> %data) { define <8 x i64> @test23(i8 * %addr) { ; CHECK-LABEL: test23: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] +; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i64>* %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index 5bda3bd173d..f0cd231238c 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -79,7 +79,7 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { ; ; SKX-LABEL: test7: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq @@ -99,7 +99,7 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { ; ; SKX-LABEL: test8: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512vl-mov.ll b/llvm/test/CodeGen/X86/avx512vl-mov.ll index c822365f924..0617803f865 100644 --- a/llvm/test/CodeGen/X86/avx512vl-mov.ll +++ b/llvm/test/CodeGen/X86/avx512vl-mov.ll @@ -4,7 +4,7 @@ define <8 x i32> @test_256_1(i8 * %addr) { ; CHECK-LABEL: test_256_1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07] +; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* %res = load <8 x i32>, <8 x i32>* %vaddr, align 1 @@ -14,7 +14,7 @@ define <8 x i32> @test_256_1(i8 * %addr) { define <8 x i32> @test_256_2(i8 * %addr) { ; CHECK-LABEL: test_256_2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* %res = load <8 x i32>, <8 x i32>* %vaddr, align 32 @@ -24,7 +24,7 @@ define <8 x i32> @test_256_2(i8 * %addr) { define void @test_256_3(i8 * %addr, <4 x i64> %data) { ; CHECK-LABEL: test_256_3: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07] +; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* store <4 x i64>%data, <4 x i64>* %vaddr, align 32 @@ -34,7 +34,7 @@ define void @test_256_3(i8 * %addr, <4 x i64> %data) { define void @test_256_4(i8 * %addr, <8 x i32> %data) { ; CHECK-LABEL: test_256_4: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07] +; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* store <8 x i32>%data, <8 x i32>* %vaddr, align 1 @@ -44,7 +44,7 @@ define void @test_256_4(i8 * %addr, <8 x i32> %data) { define void @test_256_5(i8 * %addr, <8 x i32> %data) { ; CHECK-LABEL: test_256_5: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07] +; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* store <8 x i32>%data, <8 x i32>* %vaddr, align 32 @@ -54,7 +54,7 @@ define void @test_256_5(i8 * %addr, <8 x i32> %data) { define <4 x i64> @test_256_6(i8 * %addr) { ; CHECK-LABEL: test_256_6: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* %res = load <4 x i64>, <4 x i64>* %vaddr, align 32 @@ -64,7 +64,7 @@ define <4 x i64> @test_256_6(i8 * %addr) { define void @test_256_7(i8 * %addr, <4 x i64> %data) { ; CHECK-LABEL: test_256_7: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07] +; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* store <4 x i64>%data, <4 x i64>* %vaddr, align 1 @@ -74,7 +74,7 @@ define void @test_256_7(i8 * %addr, <4 x i64> %data) { define <4 x i64> @test_256_8(i8 * %addr) { ; CHECK-LABEL: test_256_8: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07] +; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* %res = load <4 x i64>, <4 x i64>* %vaddr, align 1 @@ -392,7 +392,7 @@ define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) { define <4 x i32> @test_128_1(i8 * %addr) { ; CHECK-LABEL: test_128_1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07] +; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* %res = load <4 x i32>, <4 x i32>* %vaddr, align 1 @@ -402,7 +402,7 @@ define <4 x i32> @test_128_1(i8 * %addr) { define <4 x i32> @test_128_2(i8 * %addr) { ; CHECK-LABEL: test_128_2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* %res = load <4 x i32>, <4 x i32>* %vaddr, align 16 @@ -412,7 +412,7 @@ define <4 x i32> @test_128_2(i8 * %addr) { define void @test_128_3(i8 * %addr, <2 x i64> %data) { ; CHECK-LABEL: test_128_3: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07] +; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* store <2 x i64>%data, <2 x i64>* %vaddr, align 16 @@ -422,7 +422,7 @@ define void @test_128_3(i8 * %addr, <2 x i64> %data) { define void @test_128_4(i8 * %addr, <4 x i32> %data) { ; CHECK-LABEL: test_128_4: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07] +; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* store <4 x i32>%data, <4 x i32>* %vaddr, align 1 @@ -432,7 +432,7 @@ define void @test_128_4(i8 * %addr, <4 x i32> %data) { define void @test_128_5(i8 * %addr, <4 x i32> %data) { ; CHECK-LABEL: test_128_5: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07] +; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* store <4 x i32>%data, <4 x i32>* %vaddr, align 16 @@ -442,7 +442,7 @@ define void @test_128_5(i8 * %addr, <4 x i32> %data) { define <2 x i64> @test_128_6(i8 * %addr) { ; CHECK-LABEL: test_128_6: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* %res = load <2 x i64>, <2 x i64>* %vaddr, align 16 @@ -452,7 +452,7 @@ define <2 x i64> @test_128_6(i8 * %addr) { define void @test_128_7(i8 * %addr, <2 x i64> %data) { ; CHECK-LABEL: test_128_7: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07] +; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* store <2 x i64>%data, <2 x i64>* %vaddr, align 1 @@ -462,7 +462,7 @@ define void @test_128_7(i8 * %addr, <2 x i64> %data) { define <2 x i64> @test_128_8(i8 * %addr) { ; CHECK-LABEL: test_128_8: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07] +; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* %res = load <2 x i64>, <2 x i64>* %vaddr, align 1 diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index 371361ddeef..f5df00fd98d 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1131,7 +1131,7 @@ define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { ; ; AVX512-LABEL: test_v4f32_fneg_fmul: ; AVX512: # BB#0: -; AVX512-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 ; AVX512-NEXT: retq %m = fmul nsz <4 x float> %x, %y @@ -1154,7 +1154,7 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { ; ; AVX512-LABEL: test_v4f64_fneg_fmul: ; AVX512: # BB#0: -; AVX512-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; AVX512-NEXT: vxorpd %ymm2, %ymm2, %ymm2 ; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 ; AVX512-NEXT: retq %m = fmul nsz <4 x double> %x, %y diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll index 2c49bb851c3..d41977cf937 100644 --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -749,7 +749,7 @@ define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 ; ; AVX512-LABEL: test_v16f32_fneg_fmul: ; AVX512: # BB#0: -; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; AVX512-NEXT: vxorps %zmm2, %zmm2, %zmm2 ; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 ; AVX512-NEXT: retq %m = fmul nsz <16 x float> %x, %y @@ -774,7 +774,7 @@ define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 { ; ; AVX512-LABEL: test_v8f64_fneg_fmul: ; AVX512: # BB#0: -; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; AVX512-NEXT: vxorpd %zmm2, %zmm2, %zmm2 ; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 ; AVX512-NEXT: retq %m = fmul nsz <8 x double> %x, %y diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll index bc06d8f1904..6334a8be3ea 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -419,13 +419,13 @@ define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF: ; ALL: # BB#0: -; ALL-NEXT: vmovdqu32 (%rdi), %zmm0 +; ALL-NEXT: vmovups (%rdi), %zmm0 ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 +; X32-AVX512F-NEXT: vmovups (%eax), %zmm0 ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 diff --git a/llvm/test/CodeGen/X86/nontemporal-loads.ll b/llvm/test/CodeGen/X86/nontemporal-loads.ll index 3127e614330..70828b67292 100644 --- a/llvm/test/CodeGen/X86/nontemporal-loads.ll +++ b/llvm/test/CodeGen/X86/nontemporal-loads.ll @@ -59,7 +59,7 @@ define <4 x i32> @test_v4i32(<4 x i32>* %src) { ; ; AVX512VL-LABEL: test_v4i32: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 (%rdi), %xmm0 +; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 ; AVX512VL-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1 ret <4 x i32> %1 @@ -229,7 +229,7 @@ define <8 x i32> @test_v8i32(<8 x i32>* %src) { ; ; AVX512VL-LABEL: test_v8i32: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 (%rdi), %ymm0 +; AVX512VL-NEXT: vmovaps (%rdi), %ymm0 ; AVX512VL-NEXT: retq %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1 ret <8 x i32> %1 @@ -1165,20 +1165,10 @@ define <4 x i32> @test_unaligned_v4i32(<4 x i32>* %src) { ; AVX-NEXT: vmovups (%rdi), %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: test_unaligned_v4i32: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovups (%rdi), %xmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_unaligned_v4i32: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovups (%rdi), %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_unaligned_v4i32: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu32 (%rdi), %xmm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_unaligned_v4i32: +; AVX512: # BB#0: +; AVX512-NEXT: vmovups (%rdi), %xmm0 +; AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %src, align 1, !nontemporal !1 ret <4 x i32> %1 } @@ -1213,20 +1203,10 @@ define <2 x i64> @test_unaligned_v2i64(<2 x i64>* %src) { ; AVX-NEXT: vmovups (%rdi), %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: test_unaligned_v2i64: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovups (%rdi), %xmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_unaligned_v2i64: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovups (%rdi), %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_unaligned_v2i64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_unaligned_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: vmovups (%rdi), %xmm0 +; AVX512-NEXT: retq %1 = load <2 x i64>, <2 x i64>* %src, align 1, !nontemporal !1 ret <2 x i64> %1 } @@ -1242,20 +1222,10 @@ define <8 x i16> @test_unaligned_v8i16(<8 x i16>* %src) { ; AVX-NEXT: vmovups (%rdi), %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: test_unaligned_v8i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovups (%rdi), %xmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_unaligned_v8i16: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovups (%rdi), %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_unaligned_v8i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_unaligned_v8i16: +; AVX512: # BB#0: +; AVX512-NEXT: vmovups (%rdi), %xmm0 +; AVX512-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %src, align 1, !nontemporal !1 ret <8 x i16> %1 } @@ -1271,20 +1241,10 @@ define <16 x i8> @test_unaligned_v16i8(<16 x i8>* %src) { ; AVX-NEXT: vmovups (%rdi), %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: test_unaligned_v16i8: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovups (%rdi), %xmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_unaligned_v16i8: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovups (%rdi), %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_unaligned_v16i8: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_unaligned_v16i8: +; AVX512: # BB#0: +; AVX512-NEXT: vmovups (%rdi), %xmm0 +; AVX512-NEXT: retq %1 = load <16 x i8>, <16 x i8>* %src, align 1, !nontemporal !1 ret <16 x i8> %1 } @@ -1323,20 +1283,10 @@ define <8 x i32> @test_unaligned_v8i32(<8 x i32>* %src) { ; AVX-NEXT: vmovups (%rdi), %ymm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: test_unaligned_v8i32: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovups (%rdi), %ymm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_unaligned_v8i32: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovups (%rdi), %ymm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_unaligned_v8i32: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu32 (%rdi), %ymm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_unaligned_v8i32: +; AVX512: # BB#0: +; AVX512-NEXT: vmovups (%rdi), %ymm0 +; AVX512-NEXT: retq %1 = load <8 x i32>, <8 x i32>* %src, align 1, !nontemporal !1 ret <8 x i32> %1 } @@ -1373,20 +1323,10 @@ define <4 x i64> @test_unaligned_v4i64(<4 x i64>* %src) { ; AVX-NEXT: vmovups (%rdi), %ymm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: test_unaligned_v4i64: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovups (%rdi), %ymm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_unaligned_v4i64: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovups (%rdi), %ymm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_unaligned_v4i64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_unaligned_v4i64: +; AVX512: # BB#0: +; AVX512-NEXT: vmovups (%rdi), %ymm0 +; AVX512-NEXT: retq %1 = load <4 x i64>, <4 x i64>* %src, align 1, !nontemporal !1 ret <4 x i64> %1 } @@ -1403,20 +1343,10 @@ define <16 x i16> @test_unaligned_v16i16(<16 x i16>* %src) { ; AVX-NEXT: vmovups (%rdi), %ymm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: test_unaligned_v16i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovups (%rdi), %ymm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_unaligned_v16i16: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovups (%rdi), %ymm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_unaligned_v16i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_unaligned_v16i16: +; AVX512: # BB#0: +; AVX512-NEXT: vmovups (%rdi), %ymm0 +; AVX512-NEXT: retq %1 = load <16 x i16>, <16 x i16>* %src, align 1, !nontemporal !1 ret <16 x i16> %1 } @@ -1433,20 +1363,10 @@ define <32 x i8> @test_unaligned_v32i8(<32 x i8>* %src) { ; AVX-NEXT: vmovups (%rdi), %ymm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: test_unaligned_v32i8: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovups (%rdi), %ymm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_unaligned_v32i8: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovups (%rdi), %ymm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_unaligned_v32i8: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_unaligned_v32i8: +; AVX512: # BB#0: +; AVX512-NEXT: vmovups (%rdi), %ymm0 +; AVX512-NEXT: retq %1 = load <32 x i8>, <32 x i8>* %src, align 1, !nontemporal !1 ret <32 x i8> %1 } @@ -1493,7 +1413,7 @@ define <16 x i32> @test_unaligned_v16i32(<16 x i32>* %src) { ; ; AVX512-LABEL: test_unaligned_v16i32: ; AVX512: # BB#0: -; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 +; AVX512-NEXT: vmovups (%rdi), %zmm0 ; AVX512-NEXT: retq %1 = load <16 x i32>, <16 x i32>* %src, align 1, !nontemporal !1 ret <16 x i32> %1 @@ -1539,7 +1459,7 @@ define <8 x i64> @test_unaligned_v8i64(<8 x i64>* %src) { ; ; AVX512-LABEL: test_unaligned_v8i64: ; AVX512: # BB#0: -; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512-NEXT: vmovups (%rdi), %zmm0 ; AVX512-NEXT: retq %1 = load <8 x i64>, <8 x i64>* %src, align 1, !nontemporal !1 ret <8 x i64> %1 @@ -1573,8 +1493,8 @@ define <32 x i16> @test_unaligned_v32i16(<32 x i16>* %src) { ; ; AVX512VL-LABEL: test_unaligned_v32i16: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 -; AVX512VL-NEXT: vmovdqu64 32(%rdi), %ymm1 +; AVX512VL-NEXT: vmovups (%rdi), %ymm0 +; AVX512VL-NEXT: vmovups 32(%rdi), %ymm1 ; AVX512VL-NEXT: retq %1 = load <32 x i16>, <32 x i16>* %src, align 1, !nontemporal !1 ret <32 x i16> %1 @@ -1608,8 +1528,8 @@ define <64 x i8> @test_unaligned_v64i8(<64 x i8>* %src) { ; ; AVX512VL-LABEL: test_unaligned_v64i8: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 -; AVX512VL-NEXT: vmovdqu64 32(%rdi), %ymm1 +; AVX512VL-NEXT: vmovups (%rdi), %ymm0 +; AVX512VL-NEXT: vmovups 32(%rdi), %ymm1 ; AVX512VL-NEXT: retq %1 = load <64 x i8>, <64 x i8>* %src, align 1, !nontemporal !1 ret <64 x i8> %1 diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll index 06c78557533..deffdc3cdc7 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll @@ -1632,15 +1632,10 @@ define <4 x i32> @foldv4i32() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv4i32: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv4i32: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv4i32: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24] +; AVX512-NEXT: retq ; ; X32-SSE-LABEL: foldv4i32: ; X32-SSE: # BB#0: @@ -1661,15 +1656,10 @@ define <4 x i32> @foldv4i32u() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv4i32u: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv4i32u: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv4i32u: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24] +; AVX512-NEXT: retq ; ; X32-SSE-LABEL: foldv4i32u: ; X32-SSE: # BB#0: @@ -1690,15 +1680,10 @@ define <8 x i16> @foldv8i16() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv8i16: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv8i16: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv8i16: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9] +; AVX512-NEXT: retq ; ; X32-SSE-LABEL: foldv8i16: ; X32-SSE: # BB#0: @@ -1719,15 +1704,10 @@ define <8 x i16> @foldv8i16u() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv8i16u: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv8i16u: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv8i16u: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9] +; AVX512-NEXT: retq ; ; X32-SSE-LABEL: foldv8i16u: ; X32-SSE: # BB#0: @@ -1748,15 +1728,10 @@ define <16 x i8> @foldv16i8() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv16i8: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv16i8: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv16i8: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2] +; AVX512-NEXT: retq ; ; X32-SSE-LABEL: foldv16i8: ; X32-SSE: # BB#0: @@ -1777,15 +1752,10 @@ define <16 x i8> @foldv16i8u() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv16i8u: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv16i8u: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv16i8u: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2] +; AVX512-NEXT: retq ; ; X32-SSE-LABEL: foldv16i8u: ; X32-SSE: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-256.ll b/llvm/test/CodeGen/X86/vector-lzcnt-256.ll index a44be124420..e73dbb6a185 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-256.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-256.ll @@ -596,15 +596,10 @@ define <4 x i64> @foldv4i64() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv4i64: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv4i64: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv4i64: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] +; AVX512-NEXT: retq %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0) ret <4 x i64> %out } @@ -615,15 +610,10 @@ define <4 x i64> @foldv4i64u() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv4i64u: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv4i64u: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv4i64u: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] +; AVX512-NEXT: retq %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1) ret <4 x i64> %out } @@ -634,15 +624,10 @@ define <8 x i32> @foldv8i32() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv8i32: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv8i32: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv8i32: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] +; AVX512-NEXT: retq %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0) ret <8 x i32> %out } @@ -653,15 +638,10 @@ define <8 x i32> @foldv8i32u() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv8i32u: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv8i32u: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv8i32u: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] +; AVX512-NEXT: retq %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1) ret <8 x i32> %out } @@ -672,15 +652,10 @@ define <16 x i16> @foldv16i16() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv16i16: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv16i16: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv16i16: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] +; AVX512-NEXT: retq %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0) ret <16 x i16> %out } @@ -691,15 +666,10 @@ define <16 x i16> @foldv16i16u() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv16i16u: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv16i16u: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv16i16u: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] +; AVX512-NEXT: retq %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1) ret <16 x i16> %out } @@ -710,15 +680,10 @@ define <32 x i8> @foldv32i8() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv32i8: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv32i8: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv32i8: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] +; AVX512-NEXT: retq %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0) ret <32 x i8> %out } @@ -729,15 +694,10 @@ define <32 x i8> @foldv32i8u() nounwind { ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] ; AVX-NEXT: retq ; -; AVX512VLCD-LABEL: foldv32i8u: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: foldv32i8u: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] -; AVX512CD-NEXT: retq +; AVX512-LABEL: foldv32i8u: +; AVX512: ## BB#0: +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] +; AVX512-NEXT: retq %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1) ret <32 x i8> %out } diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll index c9ad6e40d1c..34ac6e476f2 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll @@ -1435,25 +1435,10 @@ define <4 x i32> @foldv4i32() nounwind { ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: foldv4i32: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv4i32: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv4i32: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [8,0,32,0] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv4i32: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0] -; AVX512CD-NEXT: retq +; AVX-LABEL: foldv4i32: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0] +; AVX-NEXT: retq ; ; X32-SSE-LABEL: foldv4i32: ; X32-SSE: # BB#0: @@ -1469,25 +1454,10 @@ define <4 x i32> @foldv4i32u() nounwind { ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: foldv4i32u: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv4i32u: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv4i32u: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [8,0,32,0] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv4i32u: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0] -; AVX512CD-NEXT: retq +; AVX-LABEL: foldv4i32u: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0] +; AVX-NEXT: retq ; ; X32-SSE-LABEL: foldv4i32u: ; X32-SSE: # BB#0: @@ -1503,25 +1473,10 @@ define <8 x i16> @foldv8i16() nounwind { ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] ; SSE-NEXT: retq ; -; AVX1-LABEL: foldv8i16: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv8i16: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv8i16: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv8i16: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] -; AVX512CD-NEXT: retq +; AVX-LABEL: foldv8i16: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] +; AVX-NEXT: retq ; ; X32-SSE-LABEL: foldv8i16: ; X32-SSE: # BB#0: @@ -1537,25 +1492,10 @@ define <8 x i16> @foldv8i16u() nounwind { ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] ; SSE-NEXT: retq ; -; AVX1-LABEL: foldv8i16u: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv8i16u: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv8i16u: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv8i16u: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] -; AVX512CD-NEXT: retq +; AVX-LABEL: foldv8i16u: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3] +; AVX-NEXT: retq ; ; X32-SSE-LABEL: foldv8i16u: ; X32-SSE: # BB#0: @@ -1571,25 +1511,10 @@ define <16 x i8> @foldv16i8() nounwind { ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] ; SSE-NEXT: retq ; -; AVX1-LABEL: foldv16i8: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv16i8: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv16i8: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv16i8: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] -; AVX512CD-NEXT: retq +; AVX-LABEL: foldv16i8: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] +; AVX-NEXT: retq ; ; X32-SSE-LABEL: foldv16i8: ; X32-SSE: # BB#0: @@ -1605,25 +1530,10 @@ define <16 x i8> @foldv16i8u() nounwind { ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] ; SSE-NEXT: retq ; -; AVX1-LABEL: foldv16i8u: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv16i8u: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv16i8u: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv16i8u: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] -; AVX512CD-NEXT: retq +; AVX-LABEL: foldv16i8u: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5] +; AVX-NEXT: retq ; ; X32-SSE-LABEL: foldv16i8u: ; X32-SSE: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-256.ll b/llvm/test/CodeGen/X86/vector-tzcnt-256.ll index 286bc50ec72..435ddc02250 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-256.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-256.ll @@ -713,193 +713,73 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind { } define <4 x i64> @foldv4i64() nounwind { -; AVX1-LABEL: foldv4i64: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv4i64: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv4i64: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,64,0] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv4i64: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] -; AVX512CD-NEXT: retq +; ALL-LABEL: foldv4i64: +; ALL: # BB#0: +; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] +; ALL-NEXT: retq %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0) ret <4 x i64> %out } define <4 x i64> @foldv4i64u() nounwind { -; AVX1-LABEL: foldv4i64u: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv4i64u: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv4i64u: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,64,0] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv4i64u: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] -; AVX512CD-NEXT: retq +; ALL-LABEL: foldv4i64u: +; ALL: # BB#0: +; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] +; ALL-NEXT: retq %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1) ret <4 x i64> %out } define <8 x i32> @foldv8i32() nounwind { -; AVX1-LABEL: foldv8i32: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv8i32: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv8i32: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv8i32: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] -; AVX512CD-NEXT: retq +; ALL-LABEL: foldv8i32: +; ALL: # BB#0: +; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] +; ALL-NEXT: retq %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0) ret <8 x i32> %out } define <8 x i32> @foldv8i32u() nounwind { -; AVX1-LABEL: foldv8i32u: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv8i32u: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv8i32u: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv8i32u: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] -; AVX512CD-NEXT: retq +; ALL-LABEL: foldv8i32u: +; ALL: # BB#0: +; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] +; ALL-NEXT: retq %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1) ret <8 x i32> %out } define <16 x i16> @foldv16i16() nounwind { -; AVX1-LABEL: foldv16i16: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv16i16: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv16i16: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv16i16: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] -; AVX512CD-NEXT: retq +; ALL-LABEL: foldv16i16: +; ALL: # BB#0: +; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] +; ALL-NEXT: retq %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0) ret <16 x i16> %out } define <16 x i16> @foldv16i16u() nounwind { -; AVX1-LABEL: foldv16i16u: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv16i16u: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv16i16u: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv16i16u: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] -; AVX512CD-NEXT: retq +; ALL-LABEL: foldv16i16u: +; ALL: # BB#0: +; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] +; ALL-NEXT: retq %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1) ret <16 x i16> %out } define <32 x i8> @foldv32i8() nounwind { -; AVX1-LABEL: foldv32i8: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv32i8: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv32i8: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv32i8: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] -; AVX512CD-NEXT: retq +; ALL-LABEL: foldv32i8: +; ALL: # BB#0: +; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] +; ALL-NEXT: retq %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0) ret <32 x i8> %out } define <32 x i8> @foldv32i8u() nounwind { -; AVX1-LABEL: foldv32i8u: -; AVX1: # BB#0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: foldv32i8u: -; AVX2: # BB#0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] -; AVX2-NEXT: retq -; -; AVX512CDVL-LABEL: foldv32i8u: -; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] -; AVX512CDVL-NEXT: retq -; -; AVX512CD-LABEL: foldv32i8u: -; AVX512CD: # BB#0: -; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] -; AVX512CD-NEXT: retq +; ALL-LABEL: foldv32i8u: +; ALL: # BB#0: +; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] +; ALL-NEXT: retq %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1) ret <32 x i8> %out } |