diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-07-12 13:29:41 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-07-12 13:29:41 +0000 |
| commit | 44b89fa900c6098c121e5546b79e40607389f799 (patch) | |
| tree | c25b8db8616e0c35168d24f51144b329417550dd /llvm/test/CodeGen | |
| parent | 9b00a8e9d7e2cc959b1d766f01b5f0539fe997de (diff) | |
| download | bcm5719-llvm-44b89fa900c6098c121e5546b79e40607389f799.tar.gz bcm5719-llvm-44b89fa900c6098c121e5546b79e40607389f799.zip | |
[X86][SSE] Utilize ZeroableElements for canWidenShuffleElements
canWidenShuffleElements can do a better job if given a mask with ZeroableElements info. Apparently, ZeroableElements was being only used to identify AllZero candidates, but possibly we could plug it into more shuffle matchers.
Original Patch by Zvi Rackover @zvi
Differential Revision: https://reviews.llvm.org/D42044
llvm-svn: 336903
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-cast.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll | 28 |
2 files changed, 7 insertions, 25 deletions
diff --git a/llvm/test/CodeGen/X86/avx-cast.ll b/llvm/test/CodeGen/X86/avx-cast.ll index 4240f798bf9..92d9319a187 100644 --- a/llvm/test/CodeGen/X86/avx-cast.ll +++ b/llvm/test/CodeGen/X86/avx-cast.ll @@ -9,9 +9,7 @@ define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp { ; AVX-LABEL: castA: ; AVX: ## %bb.0: -; AVX-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX-NEXT: vmovaps %xmm0, %xmm0 ; AVX-NEXT: retq %shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> ret <8 x float> %shuffle.i diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 54094c39e46..6f5b7d89dfa 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2890,31 +2890,15 @@ define <32 x i8> @zeroable_src_to_zext(<32 x i8> %a0) { ; AVX1-LABEL: zeroable_src_to_zext: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: zeroable_src_to_zext: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] -; AVX2-NEXT: retq -; -; AVX512VLBW-LABEL: zeroable_src_to_zext: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: zeroable_src_to_zext: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [32,33,0,0,0,0,0,0,34,35,0,0,0,0,0,0,36,37,16,16,16,16,16,16,38,39,16,16,16,16,16,16] -; AVX512VLVBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VLVBMI-NEXT: vpermt2b %ymm0, %ymm2, %ymm1 -; AVX512VLVBMI-NEXT: vmovdqa %ymm1, %ymm0 -; AVX512VLVBMI-NEXT: retq +; AVX2OR512VL-LABEL: zeroable_src_to_zext: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX2OR512VL-NEXT: retq %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> %2 = shufflevector <32 x i8> %1, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 20, i32 21, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48> ret <32 x i8> %2 |

