diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-02-14 18:23:58 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-02-14 18:23:58 +0000 |
| commit | 2ec8373633af92a0c07adde6bac43fc54b24c42b (patch) | |
| tree | 307f9cfe8c6409b27c4e614387ae06708bd1edbd /llvm/test/CodeGen | |
| parent | 1c19cc174530dabf376cefcce98a16c8e85683fa (diff) | |
| download | bcm5719-llvm-2ec8373633af92a0c07adde6bac43fc54b24c42b.tar.gz bcm5719-llvm-2ec8373633af92a0c07adde6bac43fc54b24c42b.zip | |
[X86][SSE] truncateVectorWithPACK - Use src type instead of dst to select between PACK*SDW/PACK*SWB
Try to keep PACK*SDW/PACK*SWB as wide as possible, this helps ComputeNumSignBits as it can only peek through bitcasts to wider types, pre-AVX2 codegen was already doing this as it could peek through bitcasts/subvectors more easily than AVX2 could through shuffles.
This shouldn't affect existing results as calls to truncateVectorWithPACK ensure we have enough sign bits to pack to the same value, but it should make it possible to use truncateVectorWithPACK chains to perform saturation in combineTruncateWithSat with a future patch.
llvm-svn: 325149
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-setcc-512.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-compare-results.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-usat.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vselect-packss.ll | 8 |
5 files changed, 13 insertions, 15 deletions
diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll index bafc8ece17a..f5cb903be05 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll @@ -111,7 +111,7 @@ define i16 @v16i32(<16 x i32> %a, <16 x i32> %b) { ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 @@ -172,7 +172,7 @@ define i16 @v16f32(<16 x float> %a, <16 x float> %b) { ; AVX2: # %bb.0: ; AVX2-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 ; AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-compare-results.ll b/llvm/test/CodeGen/X86/vector-compare-results.ll index 2a0fbf4e497..f33daf2900e 100644 --- a/llvm/test/CodeGen/X86/vector-compare-results.ll +++ b/llvm/test/CodeGen/X86/vector-compare-results.ll @@ -714,7 +714,7 @@ define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 ; AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 @@ -896,7 +896,7 @@ define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 @@ -2351,7 +2351,7 @@ define <16 x i1> @test_cmp_v16f64(<16 x double> %a0, <16 x double> %a1) nounwind ; AVX2-NEXT: vcmpltpd %ymm0, %ymm4, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 @@ -2902,7 +2902,7 @@ define <16 x i1> @test_cmp_v16i64(<16 x i64> %a0, <16 x i64> %a1) nounwind { ; AVX2-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll index 30c3a53c7b0..5dd96cee29c 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -1612,7 +1612,7 @@ define <16 x i8> @trunc_usat_v16i64_v16i8(<16 x i64> %a0) { ; AVX2-NEXT: vpackusdw %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1846,7 +1846,7 @@ define <16 x i8> @trunc_usat_v16i32_v16i8(<16 x i32> %a0) { ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255] ; AVX2-NEXT: vpminud %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index ffe84d2c7fc..9171805ac54 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -974,7 +974,7 @@ define void @trunc16i32_16i8_ashr(<16 x i32> %a) { ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrad $24, %ymm1, %ymm1 ; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0 -; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 @@ -1051,7 +1051,7 @@ define void @trunc16i32_16i8_lshr(<16 x i32> %a) { ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrld $24, %ymm1, %ymm1 ; AVX2-NEXT: vpsrld $24, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vselect-packss.ll b/llvm/test/CodeGen/X86/vselect-packss.ll index d66486572f8..8d517151181 100644 --- a/llvm/test/CodeGen/X86/vselect-packss.ll +++ b/llvm/test/CodeGen/X86/vselect-packss.ll @@ -136,13 +136,11 @@ define <16 x i8> @vselect_packss_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i8 ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm1 -; AVX2-NEXT: vpandn %xmm5, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -284,7 +282,7 @@ define <16 x i8> @vselect_packss_v16i64(<16 x i64> %a0, <16 x i64> %a1, <16 x i8 ; AVX2-NEXT: vpcmpeqq %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackssdw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 |

