diff options
author | Craig Topper <craig.topper@intel.com> | 2017-08-30 16:38:33 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-08-30 16:38:33 +0000 |
commit | afce0baacd6a10e891518d3f584600de366a5e0b (patch) | |
tree | 6f795088f69bd2f3fcfc8edded12ff031c274515 /llvm/test/CodeGen | |
parent | 89e8d5e9557d29148c9ff118b0e5eb9e6a3a5277 (diff) | |
download | bcm5719-llvm-afce0baacd6a10e891518d3f584600de366a5e0b.tar.gz bcm5719-llvm-afce0baacd6a10e891518d3f584600de366a5e0b.zip |
[AVX512] Don't use 32-bit elements version of AND/OR/XOR/ANDN during isel unless we're matching a masked op or broadcast
Selecting 32-bit element logical ops without a select or broadcast requires matching a bitconvert on the inputs to the and. But that's a weird thing to rely on. It's entirely possible that one of the inputs doesn't have a bitcast and one does.
Since there's no functional difference, just remove the extra patterns and save some isel table size.
Differential Revision: https://reviews.llvm.org/D36854
llvm-svn: 312138
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-logic.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-bitreverse.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-lzcnt-512.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-512.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-math.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-tzcnt-512.ll | 16 |
9 files changed, 32 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index debfb2974fa..1bcd3c60e3b 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -607,17 +607,17 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; AVX512F-LABEL: andd512fold: ; AVX512F: # BB#0: # %entry -; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512F-NEXT: vpandq (%rdi), %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: andd512fold: ; AVX512VL: # BB#0: # %entry -; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512VL-NEXT: vpandq (%rdi), %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: andd512fold: ; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: andd512fold: diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index afb463d9fe4..0e4a88bd0b4 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -959,7 +959,7 @@ define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) { define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: test_xor_epi32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) ret < 16 x i32> %res @@ -981,7 +981,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: test_or_epi32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) ret < 16 x i32> %res @@ -1003,7 +1003,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: test_and_epi32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) ret < 16 x i32> %res diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index 6e08753dbbb..c96c63dd0a4 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -7,7 +7,7 @@ define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon ; ALL-LABEL: vpandd: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; ALL-NEXT: retq entry: ; Force the execution domain with an add. @@ -21,7 +21,7 @@ define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readno ; ALL-LABEL: vpandnd: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpandnd %zmm0, %zmm1, %zmm0 +; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0 ; ALL-NEXT: retq entry: ; Force the execution domain with an add. @@ -37,7 +37,7 @@ define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ; ALL-LABEL: vpord: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; ALL-NEXT: retq entry: ; Force the execution domain with an add. @@ -51,7 +51,7 @@ define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon ; ALL-LABEL: vpxord: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; ALL-NEXT: retq entry: ; Force the execution domain with an add. @@ -132,7 +132,7 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; KNL-LABEL: andd512fold: ; KNL: ## BB#0: ## %entry -; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; KNL-NEXT: vpandq (%rdi), %zmm0, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: andd512fold: diff --git a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll index 14bdb3853b0..1194f96b01a 100644 --- a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll +++ b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll @@ -1335,7 +1335,7 @@ define <16 x i32> @f16xi32_i128(<16 x i32> %a) { ; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retl ; ; AVX-64-LABEL: f16xi32_i128: @@ -1369,7 +1369,7 @@ define <16 x i32> @f16xi32_i128(<16 x i32> %a) { ; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512F-64-NEXT: retq %res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a %res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1 diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index af91d35a6ee..485911280c6 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -2051,17 +2051,17 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind { ; AVX512F-NEXT: vpslld $4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512F-NEXT: vpsrld $4, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1 ; AVX512F-NEXT: vpslld $2, %zmm1, %zmm1 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512F-NEXT: vpsrld $2, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1 ; AVX512F-NEXT: vpslld $1, %zmm1, %zmm1 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_bitreverse_v16i32: diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll index 2a200d093e1..a7ef5980a23 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll @@ -176,7 +176,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512BW-LABEL: testv16i32: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1 @@ -206,7 +206,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512DQ-LABEL: testv16i32: ; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1 @@ -263,7 +263,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512BW-LABEL: testv16i32u: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1 @@ -293,7 +293,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512DQ-LABEL: testv16i32u: ; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1 diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index 9403ea12ff2..bf02f94b161 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -696,7 +696,7 @@ define <16 x i32> @splatconstant_rotate_mask_v16i32(<16 x i32> %a) nounwind { ; AVX512-LABEL: splatconstant_rotate_mask_v16i32: ; AVX512: # BB#0: ; AVX512-NEXT: vprold $4, %zmm0, %zmm0 -; AVX512-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: retq %shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> %lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28> diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index f22bc95aa93..bbeb9055d05 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -3116,7 +3116,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; ; AVX512-LABEL: trunc_and_v16i32_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -3830,7 +3830,7 @@ define <16 x i8> @trunc_xor_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; ; AVX512-LABEL: trunc_xor_v16i32_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -4544,7 +4544,7 @@ define <16 x i8> @trunc_or_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind ; ; AVX512-LABEL: trunc_or_v16i32_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-512.ll b/llvm/test/CodeGen/X86/vector-tzcnt-512.ll index a604e41c031..4d3858863e3 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-512.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-512.ll @@ -139,7 +139,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512CD: # BB#0: ; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512CD-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; AVX512CD-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1 @@ -175,7 +175,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512CDBW: # BB#0: ; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 -; AVX512CDBW-NEXT: vpandd %zmm2, %zmm0, %zmm0 +; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; AVX512CDBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; AVX512CDBW-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -197,7 +197,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 -; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -219,7 +219,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512VPOPCNTDQ: # BB#0: ; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -233,7 +233,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512CD: # BB#0: ; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 ; AVX512CD-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0 @@ -243,7 +243,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512CDBW: # BB#0: ; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512CDBW-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 ; AVX512CDBW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0 @@ -253,7 +253,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 -; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -275,7 +275,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512VPOPCNTDQ: # BB#0: ; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 |