diff options
author | Nate Begeman <natebegeman@mac.com> | 2010-07-27 22:37:06 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2010-07-27 22:37:06 +0000 |
commit | 269a6da023c70708c7f86a89575c0fd1d2c5ae71 (patch) | |
tree | 81a9539317098fae957e0f5e4b3e9eef61145683 /clang/lib/CodeGen/CodeGenModule.cpp | |
parent | c1124300fe0a45b9e4a7955b7df32e37386c5b76 (diff) | |
download | bcm5719-llvm-269a6da023c70708c7f86a89575c0fd1d2c5ae71.tar.gz bcm5719-llvm-269a6da023c70708c7f86a89575c0fd1d2c5ae71.zip |
~40% faster vector shl <4 x i32> on SSE 4.1 Larger improvements for smaller types coming in future patches.
For:
define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
entry:
%shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1]
%tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2
}
We get:
_shl: ## @shl
pslld $23, %xmm1
paddd LCPI0_0, %xmm1
cvttps2dq %xmm1, %xmm1
pmulld %xmm1, %xmm0
ret
Instead of:
_shl: ## @shl
pshufd $3, %xmm0, %xmm2
movd %xmm2, %eax
pshufd $3, %xmm1, %xmm2
movd %xmm2, %ecx
shll %cl, %eax
movd %eax, %xmm2
pshufd $1, %xmm0, %xmm3
movd %xmm3, %eax
pshufd $1, %xmm1, %xmm3
movd %xmm3, %ecx
shll %cl, %eax
movd %eax, %xmm3
punpckldq %xmm2, %xmm3
movd %xmm0, %eax
movd %xmm1, %ecx
shll %cl, %eax
movd %eax, %xmm2
movhlps %xmm0, %xmm0
movd %xmm0, %eax
movhlps %xmm1, %xmm1
movd %xmm1, %ecx
shll %cl, %eax
movd %eax, %xmm0
punpckldq %xmm0, %xmm2
movdqa %xmm2, %xmm0
punpckldq %xmm3, %xmm0
ret
llvm-svn: 109549
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
0 files changed, 0 insertions, 0 deletions