summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-07-12 13:29:41 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-07-12 13:29:41 +0000
commit44b89fa900c6098c121e5546b79e40607389f799 (patch)
treec25b8db8616e0c35168d24f51144b329417550dd /llvm/test/CodeGen
parent9b00a8e9d7e2cc959b1d766f01b5f0539fe997de (diff)
downloadbcm5719-llvm-44b89fa900c6098c121e5546b79e40607389f799.tar.gz
bcm5719-llvm-44b89fa900c6098c121e5546b79e40607389f799.zip
[X86][SSE] Utilize ZeroableElements for canWidenShuffleElements
canWidenShuffleElements can do a better job if given a mask with ZeroableElements info. Apparently, ZeroableElements was being only used to identify AllZero candidates, but possibly we could plug it into more shuffle matchers. Original Patch by Zvi Rackover @zvi Differential Revision: https://reviews.llvm.org/D42044 llvm-svn: 336903
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/avx-cast.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll28
2 files changed, 7 insertions, 25 deletions
diff --git a/llvm/test/CodeGen/X86/avx-cast.ll b/llvm/test/CodeGen/X86/avx-cast.ll
index 4240f798bf9..92d9319a187 100644
--- a/llvm/test/CodeGen/X86/avx-cast.ll
+++ b/llvm/test/CodeGen/X86/avx-cast.ll
@@ -9,9 +9,7 @@
define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castA:
; AVX: ## %bb.0:
-; AVX-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX-NEXT: vmovaps %xmm0, %xmm0
; AVX-NEXT: retq
%shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
ret <8 x float> %shuffle.i
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index 54094c39e46..6f5b7d89dfa 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -2890,31 +2890,15 @@ define <32 x i8> @zeroable_src_to_zext(<32 x i8> %a0) {
; AVX1-LABEL: zeroable_src_to_zext:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: zeroable_src_to_zext:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
-; AVX2-NEXT: retq
-;
-; AVX512VLBW-LABEL: zeroable_src_to_zext:
-; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX512VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
-; AVX512VLBW-NEXT: retq
-;
-; AVX512VLVBMI-LABEL: zeroable_src_to_zext:
-; AVX512VLVBMI: # %bb.0:
-; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [32,33,0,0,0,0,0,0,34,35,0,0,0,0,0,0,36,37,16,16,16,16,16,16,38,39,16,16,16,16,16,16]
-; AVX512VLVBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VLVBMI-NEXT: vpermt2b %ymm0, %ymm2, %ymm1
-; AVX512VLVBMI-NEXT: vmovdqa %ymm1, %ymm0
-; AVX512VLVBMI-NEXT: retq
+; AVX2OR512VL-LABEL: zeroable_src_to_zext:
+; AVX2OR512VL: # %bb.0:
+; AVX2OR512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2OR512VL-NEXT: retq
%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = shufflevector <32 x i8> %1, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 20, i32 21, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
ret <32 x i8> %2
OpenPOWER on IntegriCloud