diff options
author | Craig Topper <craig.topper@intel.com> | 2018-11-19 04:33:20 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-11-19 04:33:20 +0000 |
commit | 3616891046e7f13a758e53dcc6fa73a7c3232b35 (patch) | |
tree | 8126bffd4f9009478ef780060293bc37d774e892 /llvm/test/CodeGen/X86/trunc-subvector.ll | |
parent | 053f1eea96eaa8a0a4bb034274fa485655323d32 (diff) | |
download | bcm5719-llvm-3616891046e7f13a758e53dcc6fa73a7c3232b35.tar.gz bcm5719-llvm-3616891046e7f13a758e53dcc6fa73a7c3232b35.zip |
[X86] Use compare with 0 to fill an element with sign bits when sign extending to v2i64 pre-sse4.1
Previously we used an arithmetic shift right by 31, but that requires a copy to preserve the input. So we might as well materialize a zero and compare to it since the comparison will overwrite the register that contains the zeros. This should be one byte shorter.
llvm-svn: 347181
Diffstat (limited to 'llvm/test/CodeGen/X86/trunc-subvector.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/trunc-subvector.ll | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/X86/trunc-subvector.ll b/llvm/test/CodeGen/X86/trunc-subvector.ll index 88830ee7288..77e67d6e554 100644 --- a/llvm/test/CodeGen/X86/trunc-subvector.ll +++ b/llvm/test/CodeGen/X86/trunc-subvector.ll @@ -41,7 +41,8 @@ define <2 x i32> @test3(<8 x i32> %v) { ; SSE2-LABEL: test3: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; @@ -67,8 +68,8 @@ define <2 x i32> @test3(<8 x i32> %v) { define <2 x i32> @test4(<8 x i32> %v) { ; SSE2-LABEL: test4: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; @@ -93,12 +94,12 @@ define <2 x i32> @test4(<8 x i32> %v) { define <2 x i32> @test5(<8 x i32> %v) { ; SSE2-LABEL: test5: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psrad $31, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE2-NEXT: retq |