diff options
| author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2017-06-22 06:47:41 +0000 |
|---|---|---|
| committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2017-06-22 06:47:41 +0000 |
| commit | 2dac0b4d583f685691635448de73fdd0f6191582 (patch) | |
| tree | 72323032aec0b2f4067bc3cf40263e6e40d489d7 /llvm/test | |
| parent | 3c4933fcc6014e1f07d32ac35cd3c8513bc302f1 (diff) | |
| download | bcm5719-llvm-2dac0b4d583f685691635448de73fdd0f6191582.tar.gz bcm5719-llvm-2dac0b4d583f685691635448de73fdd0f6191582.zip | |
AVX-512: Lowering Masked Gather intrinsic - fixed a bug
Masked gather for vector length 2 is lowered incorrectly for element type i32.
The type <2 x i32> was automatically extended to <2 x i64> and we generated VPGATHERQQ instead of VPGATHERQD.
The type <2 x float> is extended to <4 x float>, so there is no bug for this type, but the sequence may be more optimal.
In this patch I'm fixing <2 x i32>bug and optimizing <2 x float> sequence for GATHERs only. The same fix should be done for Scatters as well.
Differential revision: https://reviews.llvm.org/D34343
llvm-svn: 305987
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 69 |
1 files changed, 61 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 91087f650ad..77254ba6760 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -1226,6 +1226,57 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ret <2 x float>%res } +define <2 x float> @test22a(float* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x float> %src0) { +; KNL_64-LABEL: test22a: +; KNL_64: # BB#0: +; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def> +; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero +; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] +; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} +; KNL_64-NEXT: vmovaps %xmm2, %xmm0 +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: test22a: +; KNL_32: # BB#0: +; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def> +; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero +; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} +; KNL_32-NEXT: vmovaps %xmm2, %xmm0 +; KNL_32-NEXT: vzeroupper +; KNL_32-NEXT: retl +; +; SKX-LABEL: test22a: +; SKX: # BB#0: +; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 +; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1} +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq +; +; SKX_32-LABEL: test22a: +; SKX_32: # BB#0: +; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm2 {%k1} +; SKX_32-NEXT: vmovaps %xmm2, %xmm0 +; SKX_32-NEXT: retl + %gep.random = getelementptr float, float* %base, <2 x i64> %ind + %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0) + ret <2 x float>%res +} + declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>) @@ -1262,8 +1313,9 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; SKX: # BB#0: ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 ; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1} -; SKX-NEXT: vmovdqa %xmm2, %xmm0 +; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1} +; SKX-NEXT: vpmovsxdq %xmm1, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test23: @@ -1271,8 +1323,9 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 ; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1} -; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 +; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1} +; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind @@ -1307,16 +1360,16 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; SKX-LABEL: test24: ; SKX: # BB#0: ; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} -; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1} +; SKX-NEXT: vpmovsxdq %xmm1, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test24: ; SKX_32: # BB#0: ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1} -; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 +; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1} +; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind |

