diff options
author | Craig Topper <craig.topper@intel.com> | 2019-06-23 23:51:21 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-06-23 23:51:21 +0000 |
commit | e8da65c698edf099208fa041d956490cf25e70a3 (patch) | |
tree | 8c328924481e598dc26f938438f6b1af0a8836ad /llvm/test | |
parent | c6094f0495bfb874e908b4ef3ff8a6adde92be34 (diff) | |
download | bcm5719-llvm-e8da65c698edf099208fa041d956490cf25e70a3.tar.gz bcm5719-llvm-e8da65c698edf099208fa041d956490cf25e70a3.zip |
[X86] Turn v16i16->v16i8 truncate+store into a any_extend+truncstore if we avx512f, but not avx512bw.
Ideally we'd be able to represent this truncate as a any_extend to
v16i32 and a truncate, but SelectionDAG doens't know how to not
fold those together.
We have isel patterns to use a vpmovzxwd+vpdmovdb for the truncate,
but we aren't able to simultaneously fold the load and the store
from the isel pattern. By pulling the truncate into the store we
can successfully hide it from the DAG combiner. Then we can isel
pattern match the truncstore and load+any_extend separately.
llvm-svn: 364163
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-widen.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc.ll | 4 |
4 files changed, 8 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll index 737925eca04..656dcf9b64f 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll @@ -88,20 +88,18 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind { define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind { ; AVX512F-LABEL: trunc_v32i16_to_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512F-NEXT: vpmovdb %zmm1, 16(%rsi) -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpmovdb %zmm0, (%rsi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_v32i16_to_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512VL-NEXT: vpmovdb %zmm1, 16(%rsi) -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpmovdb %zmm0, (%rsi) ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll index 6f94e0c6086..c42f91f50c0 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -88,20 +88,18 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind { define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind { ; AVX512F-LABEL: trunc_v32i16_to_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512F-NEXT: vpmovdb %zmm1, 16(%rsi) -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpmovdb %zmm0, (%rsi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_v32i16_to_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX512VL-NEXT: vpmovdb %zmm1, 16(%rsi) -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpmovdb %zmm0, (%rsi) ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll index 6a504269b93..23a4a978a2d 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll @@ -1148,7 +1148,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) { ; ; AVX512F-LABEL: trunc16i16_16i8_ashr: ; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpsraw $8, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpmovdb %zmm0, (%rax) ; AVX512F-NEXT: vzeroupper @@ -1156,7 +1156,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) { ; ; AVX512VL-LABEL: trunc16i16_16i8_ashr: ; AVX512VL: # %bb.0: # %entry -; AVX512VL-NEXT: vpsraw $8, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) ; AVX512VL-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index 0027fbe2657..35b190b1132 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -1158,7 +1158,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) { ; ; AVX512F-LABEL: trunc16i16_16i8_ashr: ; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpsraw $8, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpmovdb %zmm0, (%rax) ; AVX512F-NEXT: vzeroupper @@ -1166,7 +1166,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) { ; ; AVX512VL-LABEL: trunc16i16_16i8_ashr: ; AVX512VL: # %bb.0: # %entry -; AVX512VL-NEXT: vpsraw $8, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) ; AVX512VL-NEXT: vzeroupper |