diff options
author | Craig Topper <craig.topper@intel.com> | 2018-08-25 18:01:24 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-08-25 18:01:24 +0000 |
commit | ebec2793d1768d655a4afab19420ab8efb9d8d89 (patch) | |
tree | 4eacd21a69944e51886dbe51e2c4e39f9d105605 /llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll | |
parent | a11a3b381806e2bd4e54ed1dfa32b57723e30714 (diff) | |
download | bcm5719-llvm-ebec2793d1768d655a4afab19420ab8efb9d8d89.tar.gz bcm5719-llvm-ebec2793d1768d655a4afab19420ab8efb9d8d89.zip |
[X86] Replace support for vXi32 SMUL_LOHI/UMUL_LOHI with MULHS/MULHU support instead.
Summary:
The only time vector SMUL_LOHI/UMUL_LOHI nodes are created is during division/remainder lowering. If its created before op legalization, generic DAGCombine immediately turns that SMUL_LOHI/UMUL_LOHI into a MULHS/MULHU since only the upper half is used. That node will stick around through vector op legalization and will be turned back into UMUL_LOHI/SMUL_LOHI during op legalization. It will then be custom lowered by the X86 backend. Due to this two step lowering the vector shuffles created by the custom lowering get legalized after their inputs rather than before. This prevents the shuffles from being combined with any build_vector of constants.
This patch uses changes vXi32 to use MULHS/MULHU instead. This is what the later DAG combine did anyway. But by skipping the change back to UMUL_LOHI/SMUL_LOHI we lower it before any constant BUILD_VECTORS. This allows the vector_shuffle creation to constant fold with the build_vectors. This accounts for the test changes here.
Reviewers: RKSimon, spatel
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D51254
llvm-svn: 340690
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll | 116 |
1 files changed, 54 insertions, 62 deletions
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll index 5df4d09e971..3a4b2f3be82 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll @@ -88,41 +88,37 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { define <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind { ; AVX1-LABEL: test_div7_8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] -; AVX1-NEXT: vpmuldq %xmm3, %xmm5, %xmm3 -; AVX1-NEXT: vpmuldq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] -; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpsrld $31, %xmm2, %xmm3 -; AVX1-NEXT: vpsrad $2, %xmm2, %xmm2 -; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuldq %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] +; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpmuldq %xmm3, %xmm1, %xmm4 +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2,3],xmm4[4,5],xmm2[6,7] +; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpsrld $31, %xmm1, %xmm2 +; AVX1-NEXT: vpsrad $2, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpmuldq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] +; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpsrld $31, %xmm0, %xmm2 ; AVX1-NEXT: vpsrad $2, %xmm0, %xmm0 -; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_div7_8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] -; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] -; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] -; AVX2-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 -; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] +; AVX2-NEXT: vpmuldq %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmuldq %ymm2, %ymm0, %ymm2 +; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7] ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm1 ; AVX2-NEXT: vpsrad $2, %ymm0, %ymm0 @@ -363,46 +359,42 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind { ; AVX1-LABEL: test_rem7_8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] -; AVX1-NEXT: vpmuldq %xmm3, %xmm5, %xmm3 -; AVX1-NEXT: vpmuldq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] +; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpmuldq %xmm3, %xmm1, %xmm4 +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2,3],xmm4[4,5],xmm2[6,7] +; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsrld $31, %xmm2, %xmm4 +; AVX1-NEXT: vpsrad $2, %xmm2, %xmm2 ; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,7,7,7] +; AVX1-NEXT: vpmulld %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpsubd %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpmuldq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] +; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm2 ; AVX1-NEXT: vpsrld $31, %xmm2, %xmm3 ; AVX1-NEXT: vpsrad $2, %xmm2, %xmm2 ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7] -; AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsubd %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuldq %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7] -; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vpsrld $31, %xmm1, %xmm4 -; AVX1-NEXT: vpsrad $2, %xmm1, %xmm1 -; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vpmulld %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_rem7_8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] -; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] -; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] -; AVX2-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 -; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] +; AVX2-NEXT: vpmuldq %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmuldq %ymm2, %ymm0, %ymm2 +; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7] ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm1 ; AVX2-NEXT: vpsrld $31, %ymm1, %ymm2 ; AVX2-NEXT: vpsrad $2, %ymm1, %ymm1 |