| Commit message (Collapse) | Author | Age | Files | Lines |
| ... | |
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int test2(int * P, int* Q, int A, int B) {
return P+A == P;
}
into:
test2:
movl 4(%esp), %eax
movl 12(%esp), %eax
shll $2, %eax
cmpl $0, %eax
sete %al
movzbl %al, %eax
ret
instead of:
test2:
movl 4(%esp), %eax
movl 12(%esp), %ecx
leal (%eax,%ecx,4), %ecx
cmpl %eax, %ecx
sete %al
movzbl %al, %eax
ret
ICC is producing worse code:
test2:
movl 4(%esp), %eax #8.5
movl 12(%esp), %edx #8.5
lea (%edx,%edx), %ecx #9.9
addl %ecx, %ecx #9.9
addl %eax, %ecx #9.9
cmpl %eax, %ecx #9.16
movl $0, %eax #9.16
sete %al #9.16
ret #9.16
as is GCC (looks like our old code):
test2:
movl 4(%esp), %edx
movl 12(%esp), %eax
leal (%edx,%eax,4), %ecx
cmpl %edx, %ecx
sete %al
movzbl %al, %eax
ret
llvm-svn: 19430
|
| |
|
|
| |
llvm-svn: 19429
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
.LBB_Z5test0PdS__3: # no_exit.1
fldl data(,%eax,8)
fldl 24(%esp)
faddp %st(1)
fstl 24(%esp)
incl %eax
movl $16000, %ecx
sarl $3, %ecx
cmpl %eax, %ecx
fstpl 16(%esp)
#FP_REG_KILL
jg .LBB_Z5test0PdS__3 # no_exit.1
into:
.LBB_Z5test0PdS__3: # no_exit.1
fldl data(,%eax,8)
fldl 24(%esp)
faddp %st(1)
fstl 24(%esp)
incl %eax
cmpl $2000, %eax
fstpl 16(%esp)
#FP_REG_KILL
jl .LBB_Z5test0PdS__3 # no_exit.1
llvm-svn: 19427
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
codegen this loop in stepanov:
no_exit.i: ; preds = %entry, %no_exit.i, %then.i, %_Z5checkd.exit
%i.0.0 = phi int [ 0, %entry ], [ %i.0.0, %no_exit.i ], [ %inc.0, %_Z5checkd.exit ], [ %inc.012, %then.i ] ; <int> [#uses=3]
%indvar = phi uint [ %indvar.next, %no_exit.i ], [ 0, %entry ], [ 0, %then.i ], [ 0, %_Z5checkd.exit ] ; <uint> [#uses=3]
%result_addr.i.0 = phi double [ %tmp.4.i.i, %no_exit.i ], [ 0.000000e+00, %entry ], [ 0.000000e+00, %then.i ], [ 0.000000e+00, %_Z5checkd.exit ] ; <double> [#uses=1]
%first_addr.0.i.2.rec = cast uint %indvar to int ; <int> [#uses=1]
%first_addr.0.i.2 = getelementptr [2000 x double]* %data, int 0, uint %indvar ; <double*> [#uses=1]
%inc.i.rec = add int %first_addr.0.i.2.rec, 1 ; <int> [#uses=1]
%inc.i = getelementptr [2000 x double]* %data, int 0, int %inc.i.rec ; <double*> [#uses=1]
%tmp.3.i.i = load double* %first_addr.0.i.2 ; <double> [#uses=1]
%tmp.4.i.i = add double %result_addr.i.0, %tmp.3.i.i ; <double> [#uses=2]
%tmp.2.i = seteq double* %inc.i, getelementptr ([2000 x double]* %data, int 0, int 2000) ; <bool> [#uses=1]
%indvar.next = add uint %indvar, 1 ; <uint> [#uses=1]
br bool %tmp.2.i, label %_Z10accumulateIPddET0_T_S2_S1_.exit, label %no_exit.i
To this:
.LBB_Z4testIPddEvT_S1_T0__1: # no_exit.i
fldl data(,%eax,8)
fldl 16(%esp)
faddp %st(1)
fstpl 16(%esp)
incl %eax
movl %eax, %ecx
shll $3, %ecx
cmpl $16000, %ecx
#FP_REG_KILL
jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i
instead of this:
.LBB_Z4testIPddEvT_S1_T0__1: # no_exit.i
fldl data(,%eax,8)
fldl 16(%esp)
faddp %st(1)
fstpl 16(%esp)
incl %eax
leal data(,%eax,8), %ecx
leal data+16000, %edx
cmpl %edx, %ecx
#FP_REG_KILL
jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i
llvm-svn: 19425
|
| |
|
|
| |
llvm-svn: 19423
|
| |
|
|
| |
llvm-svn: 19422
|
| |
|
|
|
|
| |
list.
llvm-svn: 19421
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
loops in stepanov to this:
.LBB_Z5test0PdS__2: # no_exit.1
fldl data(,%eax,8)
fldl 24(%esp)
faddp %st(1)
fstl 24(%esp)
incl %eax
cmpl $2000, %eax
fstpl 16(%esp)
#FP_REG_KILL
jl .LBB_Z5test0PdS__2
instead of this:
.LBB_Z5test0PdS__2: # no_exit.1
fldl data(,%eax,8)
fldl 24(%esp)
faddp %st(1)
fstl 24(%esp)
incl %eax
movl $data, %ecx
movl %ecx, %edx
addl $16000, %edx
subl %ecx, %edx
movl %edx, %ecx
sarl $2, %ecx
shrl $29, %ecx
addl %ecx, %edx
sarl $3, %edx
cmpl %edx, %eax
fstpl 16(%esp)
#FP_REG_KILL
jl .LBB_Z5test0PdS__2
The old instruction selector produced:
.LBB_Z5test0PdS__2: # no_exit.1
fldl 24(%esp)
faddl data(,%eax,8)
fstl 24(%esp)
movl %eax, %ecx
incl %ecx
incl %eax
leal data+16000, %edx
movl $data, %edi
subl %edi, %edx
movl %edx, %edi
sarl $2, %edi
shrl $29, %edi
addl %edi, %edx
sarl $3, %edx
cmpl %edx, %ecx
fstpl 16(%esp)
#FP_REG_KILL
jl .LBB_Z5test0PdS__2 # no_exit.1
Which is even worse!
llvm-svn: 19419
|
| |
|
|
|
|
| |
values), and eliminate some switch statements.
llvm-svn: 19417
|
| |
|
|
|
|
|
|
| |
std::__pad<wchar_t, std::char_traits<wchar_t> >::_S_pad(std::ios_base&, wchar_t, wchar_t*, wchar_t const*, int, int, bool)
from libstdc++
llvm-svn: 19416
|
| |
|
|
| |
llvm-svn: 19415
|
| |
|
|
| |
llvm-svn: 19409
|
| |
|
|
|
|
| |
intrinsics.
llvm-svn: 19407
|
| |
|
|
|
|
| |
now.
llvm-svn: 19404
|
| |
|
|
| |
llvm-svn: 19397
|
| |
|
|
| |
llvm-svn: 19388
|
| |
|
|
| |
llvm-svn: 19386
|
| |
|
|
| |
llvm-svn: 19385
|
| |
|
|
| |
llvm-svn: 19384
|
| |
|
|
|
|
| |
Fix a bug legalizing "ret (Val,Val)"
llvm-svn: 19375
|
| |
|
|
| |
llvm-svn: 19374
|
| |
|
|
| |
llvm-svn: 19369
|
| |
|
|
|
|
| |
all targets.
llvm-svn: 19366
|
| |
|
|
| |
llvm-svn: 19365
|
| |
|
|
| |
llvm-svn: 19361
|
| |
|
|
| |
llvm-svn: 19360
|
| |
|
|
| |
llvm-svn: 19359
|
| |
|
|
| |
llvm-svn: 19358
|
| |
|
|
|
|
| |
CodeGen/Generic/select.ll:castconst.
llvm-svn: 19357
|
| |
|
|
| |
llvm-svn: 19356
|
| |
|
|
|
|
| |
int GEP indices on 64-bit archs.
llvm-svn: 19354
|
| |
|
|
| |
llvm-svn: 19353
|
| |
|
|
| |
llvm-svn: 19352
|
| |
|
|
| |
llvm-svn: 19350
|
| |
|
|
| |
llvm-svn: 19349
|
| |
|
|
| |
llvm-svn: 19348
|
| |
|
|
| |
llvm-svn: 19346
|
| |
|
|
| |
llvm-svn: 19345
|
| |
|
|
| |
llvm-svn: 19339
|
| |
|
|
|
|
| |
of the code for lowering from LLVM code to a SelectionDAG.
llvm-svn: 19331
|
| |
|
|
| |
llvm-svn: 19330
|
| |
|
|
|
|
| |
to go, but it does work for some non-trivial cases now.
llvm-svn: 19329
|
| |
|
|
| |
llvm-svn: 19327
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
1. If we are two-addressing a commutable instruction and the LHS is not the
last use of the variable, see if the instruction is the last use of the
RHS. If so, commute the instruction, allowing us to avoid a
register-register copy in many cases for common instructions like ADD, OR,
AND, etc on X86.
2. If #1 doesn't hold, and if this is an instruction that also existing in
3-address form, promote the instruction to a 3-address instruction to
avoid the register-register copy. We can do this for several common
instructions in X86, including ADDrr, INC, DEC, etc.
This patch implements test/Regression/CodeGen/X86/commute-two-addr.ll,
overlap-add.ll, and overlap-shift.ll when I check in the X86 support for it.
llvm-svn: 19245
|
| |
|
|
| |
llvm-svn: 18955
|
| |
|
|
|
|
| |
20%, shaving 0.1s off hbd compile time on my g5. Yay.
llvm-svn: 18592
|
| |
|
|
|
|
|
| |
Make only one print method to avoid overloaded virtual warnings when \
compiled with -Woverloaded-virtual
llvm-svn: 18589
|
| |
|
|
|
|
| |
Prolang-C/bison in the JIT
llvm-svn: 18477
|
| |
|
|
|
|
| |
PR449
llvm-svn: 18306
|
| |
|
|
| |
llvm-svn: 18131
|