diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-12-09 23:10:59 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-12-09 23:10:59 +0000 |
| commit | f4e3044db99adc2043bb351da5418ef6af920ca2 (patch) | |
| tree | 0e5f956786efdb422e91a5044e26b4a073fca41b | |
| parent | 5ac75d5628a1758c37e5e2c4f6ee229d399f9331 (diff) | |
| download | bcm5719-llvm-f4e3044db99adc2043bb351da5418ef6af920ca2.tar.gz bcm5719-llvm-f4e3044db99adc2043bb351da5418ef6af920ca2.zip | |
[X86] Use KMOV instructions to zero upper bits of vectors when possible.
llvm-svn: 320268
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrVecCompiler.td | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-mov.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr34605.ll | 3 |
3 files changed, 36 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td index 900ce6eb7cf..c1cb4dcb16b 100644 --- a/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -495,13 +495,19 @@ let Predicates = [HasBWI, HasVLX] in { // If the bits are not zero we have to fall back to explicitly zeroing by // using shifts. -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512, NoDQI] in { def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16), (i8 8)), (i8 8))>; } +let Predicates = [HasDQI] in { + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v8i1 VK8:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>; +} + let Predicates = [HasVLX, HasDQI] in { def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), (v2i1 VK2:$mask), (iPTR 0))), @@ -526,26 +532,37 @@ let Predicates = [HasVLX] in { let Predicates = [HasBWI] in { def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + (v16i1 VK16:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>; + + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + (v16i1 VK16:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + (v32i1 VK32:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>; +} + +let Predicates = [HasBWI, NoDQI] in { + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32), (i8 24)), (i8 24))>; - def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), - (v16i1 VK16:$mask), (iPTR 0))), - (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK16:$mask, VK32), - (i8 16)), (i8 16))>; def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64), (i8 56)), (i8 56))>; +} + +let Predicates = [HasBWI, HasDQI] in { + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + (v8i1 VK8:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), - (v16i1 VK16:$mask), (iPTR 0))), - (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK16:$mask, VK64), - (i8 48)), (i8 48))>; - def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), - (v32i1 VK32:$mask), (iPTR 0))), - (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK32:$mask, VK64), - (i8 32)), (i8 32))>; + (v8i1 VK8:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>; } let Predicates = [HasBWI, HasVLX] in { diff --git a/llvm/test/CodeGen/X86/avx512bw-mov.ll b/llvm/test/CodeGen/X86/avx512bw-mov.ll index e2c27910f09..7158fb262c0 100644 --- a/llvm/test/CodeGen/X86/avx512bw-mov.ll +++ b/llvm/test/CodeGen/X86/avx512bw-mov.ll @@ -102,8 +102,7 @@ define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x ; CHECK: ## %bb.0: ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 -; CHECK-NEXT: kshiftlq $48, %k0, %k0 -; CHECK-NEXT: kshiftrq $48, %k0, %k1 +; CHECK-NEXT: kmovw %k0, %k1 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; CHECK-NEXT: retq @@ -117,8 +116,7 @@ define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x ; CHECK: ## %bb.0: ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 -; CHECK-NEXT: kshiftlq $32, %k0, %k0 -; CHECK-NEXT: kshiftrq $32, %k0, %k1 +; CHECK-NEXT: kmovd %k0, %k1 ; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; CHECK-NEXT: retq @@ -147,8 +145,7 @@ define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 ; CHECK: ## %bb.0: ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 -; CHECK-NEXT: kshiftld $16, %k0, %k0 -; CHECK-NEXT: kshiftrd $16, %k0, %k1 +; CHECK-NEXT: kmovw %k0, %k1 ; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; CHECK-NEXT: retq @@ -163,8 +160,7 @@ define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> ; CHECK-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 -; CHECK-NEXT: kshiftlq $48, %k0, %k0 -; CHECK-NEXT: kshiftrq $48, %k0, %k1 +; CHECK-NEXT: kmovw %k0, %k1 ; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask) @@ -178,8 +174,7 @@ define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> ; CHECK-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 -; CHECK-NEXT: kshiftlq $32, %k0, %k0 -; CHECK-NEXT: kshiftrq $32, %k0, %k1 +; CHECK-NEXT: kmovd %k0, %k1 ; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask) @@ -208,8 +203,7 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1 ; CHECK-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 ; CHECK-NEXT: vpmovb2m %zmm0, %k0 -; CHECK-NEXT: kshiftld $16, %k0, %k0 -; CHECK-NEXT: kshiftrd $16, %k0, %k1 +; CHECK-NEXT: kmovw %k0, %k1 ; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask) diff --git a/llvm/test/CodeGen/X86/pr34605.ll b/llvm/test/CodeGen/X86/pr34605.ll index 2209db4e544..2d51a53dc41 100644 --- a/llvm/test/CodeGen/X86/pr34605.ll +++ b/llvm/test/CodeGen/X86/pr34605.ll @@ -15,8 +15,7 @@ define void @pr34605(i8* nocapture %s, i32 %p) { ; CHECK-NEXT: kunpckdq %k0, %k1, %k0 ; CHECK-NEXT: movl $1, %ecx ; CHECK-NEXT: kmovd %ecx, %k1 -; CHECK-NEXT: kshiftlq $32, %k1, %k1 -; CHECK-NEXT: kshiftrq $32, %k1, %k1 +; CHECK-NEXT: kmovd %k1, %k1 ; CHECK-NEXT: kandq %k1, %k0, %k1 ; CHECK-NEXT: vmovdqu8 {{\.LCPI.*}}, %zmm0 {%k1} {z} ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 |

