summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/trunc-subvector.ll
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-11-19 04:33:20 +0000
committerCraig Topper <craig.topper@intel.com>2018-11-19 04:33:20 +0000
commit3616891046e7f13a758e53dcc6fa73a7c3232b35 (patch)
tree8126bffd4f9009478ef780060293bc37d774e892 /llvm/test/CodeGen/X86/trunc-subvector.ll
parent053f1eea96eaa8a0a4bb034274fa485655323d32 (diff)
downloadbcm5719-llvm-3616891046e7f13a758e53dcc6fa73a7c3232b35.tar.gz
bcm5719-llvm-3616891046e7f13a758e53dcc6fa73a7c3232b35.zip
[X86] Use compare with 0 to fill an element with sign bits when sign extending to v2i64 pre-sse4.1
Previously we used an arithmetic shift right by 31, but that requires a copy to preserve the input. So we might as well materialize a zero and compare to it since the comparison will overwrite the register that contains the zeros. This should be one byte shorter. llvm-svn: 347181
Diffstat (limited to 'llvm/test/CodeGen/X86/trunc-subvector.ll')
-rw-r--r--llvm/test/CodeGen/X86/trunc-subvector.ll17
1 files changed, 9 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/X86/trunc-subvector.ll b/llvm/test/CodeGen/X86/trunc-subvector.ll
index 88830ee7288..77e67d6e554 100644
--- a/llvm/test/CodeGen/X86/trunc-subvector.ll
+++ b/llvm/test/CodeGen/X86/trunc-subvector.ll
@@ -41,7 +41,8 @@ define <2 x i32> @test3(<8 x i32> %v) {
; SSE2-LABEL: test3:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
@@ -67,8 +68,8 @@ define <2 x i32> @test3(<8 x i32> %v) {
define <2 x i32> @test4(<8 x i32> %v) {
; SSE2-LABEL: test4:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
@@ -93,12 +94,12 @@ define <2 x i32> @test4(<8 x i32> %v) {
define <2 x i32> @test5(<8 x i32> %v) {
; SSE2-LABEL: test5:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: retq
OpenPOWER on IntegriCloud