From ca3a63a849ee7253dbbb6d6b2d59513b79ad6405 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 9 May 2017 13:14:40 +0000 Subject: [X86][SSE42] Lower v2i64/v4i64 ASHR(X, 63) as PCMPGTQ(0, X) Similar to what we do for vXi8 ASHR(X, 7), use SSE42's PCMPGTQ to splat the sign instead of using the PSRAD+PSHUFD. Avoiding bitcasts this improves combines that utilize computeNumSignBits, permits memory folding and reduces pipe pressure. Although it does require a second register, given that this is a (cheap) zero register the impact is minimal. Differential Revision: https://reviews.llvm.org/D32973 llvm-svn: 302525 --- llvm/test/CodeGen/X86/combine-abs.ll | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'llvm/test/CodeGen/X86/combine-abs.ll') diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index 887abe99f6e..37beb438d73 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -50,12 +50,11 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) { define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) { ; AVX2-LABEL: combine_v4i64_abs_abs: ; AVX2: # BB#0: -; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 -; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 -; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1 ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq -- cgit v1.2.3