diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-12-25 06:47:10 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-12-25 06:47:10 +0000 |
| commit | 705fef3ef3d26718f34de45e1d4c2ef0f37c9bb2 (patch) | |
| tree | 582343f5fcf0a1f01720cff28bd6e7d08c93096a /llvm/test | |
| parent | b28460a0d6c02e7938d69daf54cf9b8e17bf431b (diff) | |
| download | bcm5719-llvm-705fef3ef3d26718f34de45e1d4c2ef0f37c9bb2.tar.gz bcm5719-llvm-705fef3ef3d26718f34de45e1d4c2ef0f37c9bb2.zip | |
[X86] Add a DAG combines to turn vXi64 muls into VPMULDQ/VPMULUDQ if the upper bits are all sign bits or zeros.
Normally we catch this during lowering, but vXi64 mul is considered legal when we have AVX512DQ.
This DAG combine allows us to avoid PMULLQ with AVX512DQ if we can prove its unnecessary. PMULLQ is 3 uops that take 4 cycles each. While pmuldq/pmuludq is only one 4 cycle uop.
llvm-svn: 321437
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-pmuldq.ll | 68 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 8 |
2 files changed, 20 insertions, 56 deletions
diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll index aa318c68a88..421c948efab 100644 --- a/llvm/test/CodeGen/X86/combine-pmuldq.ll +++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll @@ -15,32 +15,14 @@ define <2 x i64> @combine_shuffle_sext_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-NEXT: pmuldq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX2-LABEL: combine_shuffle_sext_pmuldq: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0 -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX2-NEXT: vpmovsxdq %xmm1, %xmm1 -; AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: combine_shuffle_sext_pmuldq: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512VL-NEXT: vpmovsxdq %xmm1, %xmm1 -; AVX512VL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512DQVL-LABEL: combine_shuffle_sext_pmuldq: -; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512DQVL-NEXT: vpmovsxdq %xmm0, %xmm0 -; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512DQVL-NEXT: vpmovsxdq %xmm1, %xmm1 -; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 -; AVX512DQVL-NEXT: retq +; AVX-LABEL: combine_shuffle_sext_pmuldq: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX-NEXT: vpmovsxdq %xmm1, %xmm1 +; AVX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 2> %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> %3 = sext <2 x i32> %1 to <2 x i64> @@ -60,32 +42,14 @@ define <2 x i64> @combine_shuffle_zext_pmuludq(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-NEXT: pmuludq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX2-LABEL: combine_shuffle_zext_pmuludq: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero -; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: combine_shuffle_zext_pmuludq: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero -; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512DQVL-LABEL: combine_shuffle_zext_pmuludq: -; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero -; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 -; AVX512DQVL-NEXT: retq +; AVX-LABEL: combine_shuffle_zext_pmuludq: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 2> %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> %3 = zext <2 x i32> %1 to <2 x i64> diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index d318dde3443..d3521ca9f1e 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -497,7 +497,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 @@ -510,7 +510,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax -; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1 +; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 ; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 @@ -582,7 +582,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 @@ -595,7 +595,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax -; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1 +; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 ; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 |

