summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/combine-abs.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-05-09 13:14:40 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-05-09 13:14:40 +0000
commitca3a63a849ee7253dbbb6d6b2d59513b79ad6405 (patch)
tree9a9d169536c5592098c3a1b2fc22e315b1d7d1c3 /llvm/test/CodeGen/X86/combine-abs.ll
parente8da53f4e07eea1488eeaf1684f15aac0cf447e7 (diff)
downloadbcm5719-llvm-ca3a63a849ee7253dbbb6d6b2d59513b79ad6405.tar.gz
bcm5719-llvm-ca3a63a849ee7253dbbb6d6b2d59513b79ad6405.zip
[X86][SSE42] Lower v2i64/v4i64 ASHR(X, 63) as PCMPGTQ(0, X)
Similar to what we do for vXi8 ASHR(X, 7), use SSE42's PCMPGTQ to splat the sign instead of using the PSRAD+PSHUFD. Avoiding bitcasts this improves combines that utilize computeNumSignBits, permits memory folding and reduces pipe pressure. Although it does require a second register, given that this is a (cheap) zero register the impact is minimal. Differential Revision: https://reviews.llvm.org/D32973 llvm-svn: 302525
Diffstat (limited to 'llvm/test/CodeGen/X86/combine-abs.ll')
-rw-r--r--llvm/test/CodeGen/X86/combine-abs.ll11
1 files changed, 5 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll
index 887abe99f6e..37beb438d73 100644
--- a/llvm/test/CodeGen/X86/combine-abs.ll
+++ b/llvm/test/CodeGen/X86/combine-abs.ll
@@ -50,12 +50,11 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) {
define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
; AVX2-LABEL: combine_v4i64_abs_abs:
; AVX2: # BB#0:
-; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
-; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
-; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
-; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
OpenPOWER on IntegriCloud