diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-06 21:21:12 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-06 21:21:12 +0000 |
| commit | bc573ca1b8c0be403667c1e332450641b84df5e4 (patch) | |
| tree | 50f87fcbd19580a6139f46e2858f938a3b653d6f /llvm/test | |
| parent | 28c889593a283892639008b887d9e35fe19b87eb (diff) | |
| download | bcm5719-llvm-bc573ca1b8c0be403667c1e332450641b84df5e4.tar.gz bcm5719-llvm-bc573ca1b8c0be403667c1e332450641b84df5e4.zip | |
[X86][AVX2] Improve sign/zero extension on AVX2 targets
Split extensions to large vectors into 256-bit chunks - the equivalent of what we do with pre-AVX2 into 128-bit chunks
llvm-svn: 277939
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_int_to_fp.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-sext.ll | 10 |
2 files changed, 10 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 5c1ecfba32f..4a3ffde01f1 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -167,8 +167,7 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) { ; ; AVX2-LABEL: sitofp_16i8_to_2f64: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX2-NEXT: vzeroupper @@ -370,8 +369,7 @@ define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) { ; ; AVX2-LABEL: sitofp_16i8_to_4f64: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX2-NEXT: retq ; @@ -627,8 +625,7 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) { ; ; AVX2-LABEL: uitofp_16i8_to_2f64: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX2-NEXT: vzeroupper @@ -909,8 +906,7 @@ define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) { ; ; AVX2-LABEL: uitofp_16i8_to_4f64: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX2-NEXT: retq ; @@ -1103,8 +1099,7 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) { ; ; AVX2-LABEL: sitofp_16i8_to_4f32: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX2-NEXT: vzeroupper @@ -1315,8 +1310,7 @@ define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) { ; ; AVX2-LABEL: sitofp_16i8_to_8f32: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -1692,8 +1686,7 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) { ; ; AVX2-LABEL: uitofp_16i8_to_4f32: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX2-NEXT: vzeroupper @@ -2089,8 +2082,7 @@ define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) { ; ; AVX2-LABEL: uitofp_16i8_to_8f32: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 093e3ba76ff..ca9baee331e 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -407,15 +407,9 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp ; ; AVX2-LABEL: sext_16i8_to_8i64: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-NEXT: vpslld $24, %xmm1, %xmm1 -; AVX2-NEXT: vpsrad $24, %xmm1, %xmm1 -; AVX2-NEXT: vpmovsxdq %xmm1, %ymm2 +; AVX2-NEXT: vpmovsxbq %xmm0, %ymm2 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 -; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 +; AVX2-NEXT: vpmovsxbq %xmm0, %ymm1 ; AVX2-NEXT: vmovdqa %ymm2, %ymm0 ; AVX2-NEXT: retq ; |

