diff options
author | Craig Topper <craig.topper@intel.com> | 2019-10-07 23:02:03 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-10-07 23:02:03 +0000 |
commit | 7647d3ec7003ff5264d9d3aa1c2262a8f6853be5 (patch) | |
tree | 8c74c929a78e01c45a3375cb56662df1938d989e | |
parent | 2b9f0b064b48cd14298be7ce99549da0cd5d5596 (diff) | |
download | bcm5719-llvm-7647d3ec7003ff5264d9d3aa1c2262a8f6853be5.tar.gz bcm5719-llvm-7647d3ec7003ff5264d9d3aa1c2262a8f6853be5.zip |
[X86] Add test cases for zero extending a gather index from less than i32 to i64.
We should be able to use a smaller zero extend.
llvm-svn: 373981
-rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 9bea63f3055..fe870e51538 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -2689,6 +2689,108 @@ define <8 x float> @sext_v8i8_index(float* %base, <8 x i8> %ind) { } declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>) +; Make sure we also allow index to be zero extended from a smaller than i32 element size. +define <16 x float> @zext_i8_index(float* %base, <16 x i8> %ind) { +; KNL_64-LABEL: zext_i8_index: +; KNL_64: # %bb.0: +; KNL_64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; KNL_64-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL_64-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k2 +; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2} +; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: zext_i8_index: +; KNL_32: # %bb.0: +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; KNL_32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_32-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL_32-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k2 +; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2} +; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; KNL_32-NEXT: retl +; +; SKX-LABEL: zext_i8_index: +; SKX: # %bb.0: +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 +; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k2 +; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2} +; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; SKX-NEXT: retq +; +; SKX_32-LABEL: zext_i8_index: +; SKX_32: # %bb.0: +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX_32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX_32-NEXT: vextracti128 $1, %ymm0, %xmm0 +; SKX_32-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX_32-NEXT: kxnorw %k0, %k0, %k1 +; SKX_32-NEXT: kxnorw %k0, %k0, %k2 +; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2} +; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; SKX_32-NEXT: retl + + %zext_ind = zext <16 x i8> %ind to <16 x i64> + %gep.random = getelementptr float, float *%base, <16 x i64> %zext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) + ret <16 x float>%res +} + +; Make sure we also allow index to be zero extended from a smaller than i32 element size. +define <8 x float> @zext_v8i8_index(float* %base, <8 x i8> %ind) { +; KNL_64-LABEL: zext_v8i8_index: +; KNL_64: # %bb.0: +; KNL_64-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: zext_v8i8_index: +; KNL_32: # %bb.0: +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 +; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; KNL_32-NEXT: retl +; +; SKX-LABEL: zext_v8i8_index: +; SKX: # %bb.0: +; SKX-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; SKX-NEXT: retq +; +; SKX_32-LABEL: zext_v8i8_index: +; SKX_32: # %bb.0: +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; SKX_32-NEXT: kxnorw %k0, %k0, %k1 +; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; SKX_32-NEXT: retl + + %zext_ind = zext <8 x i8> %ind to <8 x i64> + %gep.random = getelementptr float, float *%base, <8 x i64> %zext_ind + + %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %gep.random, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef) + ret <8 x float>%res +} + ; Index requires promotion define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32> %ind, <2 x i1> %mask) { ; KNL_64-LABEL: test_scatter_2i32_index: |