diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-02-08 22:26:39 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-02-08 22:26:39 +0000 |
| commit | 9e030c9e0010b423e877d7a7c8ef3742b16a079f (patch) | |
| tree | a089d03822df489973a6d207a0af2bc4321fe3c2 /llvm/test/CodeGen | |
| parent | 1b5b4ccb77b0bd17a09015722c73698ebd72cc95 (diff) | |
| download | bcm5719-llvm-9e030c9e0010b423e877d7a7c8ef3742b16a079f.tar.gz bcm5719-llvm-9e030c9e0010b423e877d7a7c8ef3742b16a079f.zip | |
[X86] Improve combineCastedMaskArithmetic to fold (bitcast (vXi1 (and/or/xor X, C)))->(vXi1 (and/or/xor (bitcast X), (bitcast C)) where C is a constant build_vector.
Most vxi1 constant build vectors have to be implemented in the scalar domain anyway so we'll probably end up with a cast there later. But by then its too late to do the combine to get rid of it.
llvm-svn: 324662
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 20 | ||||
| -rwxr-xr-x | llvm/test/CodeGen/X86/avx512-schedule.ll | 10 |
3 files changed, 13 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index f19f062f066..f68461856c3 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3754,14 +3754,9 @@ declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone define i16 @test_kandn(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kandn: ; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: kandnw %k1, %k0, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax +; CHECK-NEXT: orl $-9, %edi +; CHECK-NEXT: andl %esi, %edi +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1) diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index f035b724f85..cce2f43164d 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -513,11 +513,8 @@ define void @test7(<8 x i1> %mask) { ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 -; KNL-NEXT: movb $85, %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb %al, %al +; KNL-NEXT: orb $85, %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -525,20 +522,16 @@ define void @test7(<8 x i1> %mask) { ; SKX: ## %bb.0: ## %allocas ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k0 -; SKX-NEXT: movb $85, %al -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kortestb %k1, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: orb $85, %al ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test7: ; AVX512BW: ## %bb.0: ## %allocas ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 -; AVX512BW-NEXT: movb $85, %al -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: testb %al, %al +; AVX512BW-NEXT: orb $85, %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -547,9 +540,8 @@ define void @test7(<8 x i1> %mask) { ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512DQ-NEXT: movb $85, %al -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kortestb %k1, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: orb $85, %al ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq allocas: diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 99798c72d44..5cb9dbb5113 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -7020,18 +7020,16 @@ define void @vcmp_test7(<8 x i1> %mask) { ; GENERIC: # %bb.0: # %allocas ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: movb $85, %al # sched: [1:0.33] -; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kortestb %k1, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: orb $85, %al # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vcmp_test7: ; SKX: # %bb.0: # %allocas ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: movb $85, %al # sched: [1:0.25] -; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: kortestb %k1, %k0 # sched: [3:1.00] +; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: orb $85, %al # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] allocas: %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> |

