| Commit message (Collapse) | Author | Age | Files | Lines |
| |
|
|
| |
llvm-svn: 19499
|
| |
|
|
| |
llvm-svn: 19498
|
| |
|
|
| |
llvm-svn: 19496
|
| |
|
|
|
|
|
| |
contain FP PHI nodes but no other FP defining instructions. This fixes
183.equake
llvm-svn: 19495
|
| |
|
|
| |
llvm-svn: 19494
|
| |
|
|
|
|
|
|
|
| |
256.bzip2 from 7142 to 7103 lines of .s file.
Second, add initial support for folding loads into compares, though this code
is dynamically dead for now. :(
llvm-svn: 19493
|
| |
|
|
|
|
|
|
|
|
|
|
|
| |
several times in 256.bzip2:
mov %EAX, DWORD PTR [%ESP + 204]
- mov %EAX, DWORD PTR [%EAX]
- or %EAX, 2097152
- mov %ECX, DWORD PTR [%ESP + 204]
- mov DWORD PTR [%ECX], %EAX
+ or DWORD PTR [%EAX], 2097152
llvm-svn: 19492
|
| |
|
|
|
|
|
|
|
|
|
| |
mov %AL, BYTE PTR [%EDX + l18_length_code]
movzx %EAX, %AL
Emit:
movzx %EAX, BYTE PTR [%EDX + l18_length_code]
llvm-svn: 19489
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
Select [mem] += Val operations. For constants, we used to get:
mov %ECX, -32768
add %ECX, DWORD PTR [l4_match_start]
mov DWORD PTR [l4_match_start], %ECX
Now we get:
add DWORD PTR [l4_match_start], -32768
For other values we used to get:
mov %EBP, %EDI ;; because the add destroys the value
add %EBP, DWORD PTR [l4_input_len]
mov DWORD PTR [l4_input_len], %EBP
now we get:
add DWORD PTR [l4_input_len], %EDI
Both of these use less registers than the alternative, are faster and smaller.
llvm-svn: 19488
|
| |
|
|
| |
llvm-svn: 19487
|
| |
|
|
|
|
| |
folded into an instruction.
llvm-svn: 19486
|
| |
|
|
| |
llvm-svn: 19485
|
| |
|
|
|
|
| |
same for PHI nodes.
llvm-svn: 19484
|
| |
|
|
|
|
|
| |
* Fold loads into Add, sub, and, or, xor and mul when possible.
* Codegen shl X, 1 as add X, X
llvm-svn: 19483
|
| |
|
|
| |
llvm-svn: 19482
|
| |
|
|
| |
llvm-svn: 19480
|
| |
|
|
| |
llvm-svn: 19475
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
mov %ECX, %EAX
add %ECX, 32768
mov %SI, WORD PTR [2*%ECX + l13_prev]
Generate this:
mov %SI, WORD PTR [2*%ECX + l13_prev + 65536]
This occurs when you have a GEP instruction where an index is
"something + imm".
llvm-svn: 19472
|
| |
|
|
| |
llvm-svn: 19468
|
| |
|
|
| |
llvm-svn: 19467
|
| |
|
|
| |
llvm-svn: 19466
|
| |
|
|
|
|
| |
does not support them.
llvm-svn: 19465
|
| |
|
|
| |
llvm-svn: 19464
|
| |
|
|
| |
llvm-svn: 19463
|
| |
|
|
| |
llvm-svn: 19457
|
| |
|
|
|
|
| |
intended to be a dlopenable module and not a "plain" shared library.
llvm-svn: 19456
|
| |
|
|
| |
llvm-svn: 19455
|
| |
|
|
| |
llvm-svn: 19453
|
| |
|
|
| |
llvm-svn: 19452
|
| |
|
|
|
|
|
|
| |
int -> FP casting code. Note that we don't have to set it for FP operations
that take FP values as operands: whatever produces the FP value will set the
flag.
llvm-svn: 19451
|
| |
|
|
|
|
| |
inverted the sense of the comparison.
llvm-svn: 19450
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
evaluate the LHS or the RHS of an operation first. This causes good things
to happen. For example, instead of compiling a loop to this:
.LBBstrength_result7_1: # loopentry
movl 16(%esp), %edi
movl (%edi), %edi ;;; LOAD
movl (%ecx), %ebx
movl $2, (%eax,%ebx,4)
movl (%edx), %ebx
movl %esi, %ebp
addl $21, %ebp
addl $42, %esi
cmpl $0, %edi ;;; USE
cmovne %esi, %ebp
cmpl %ebp, %ebx
movl %ebp, %esi
jg .LBBstrength_result7_1
We now compile it to this:
.LBBstrength_result7_1: # loopentry
movl %edi, %ebx
addl $42, %ebx
addl $21, %edi
movl (%ecx), %ebp ;; LOAD
cmpl $0, %ebp ;; USE
cmovne %ebx, %edi
movl (%edx), %ebx
movl $2, (%eax,%ebx,4)
movl (%esi), %ebx
cmpl %edi, %ebx
jg .LBBstrength_result7_1
Which reduces register pressure enough (in this case) to avoid spilling in the
loop.
As another example, consider the CodeGen/X86/regpressure.ll testcase. We
used to generate this code for both cases:
regpressure1:
subl $32, %esp
movl %esi, 12(%esp)
movl %edi, 8(%esp)
movl %ebx, 4(%esp)
movl %ebp, (%esp)
movl 36(%esp), %ecx
movl (%ecx), %eax
movl 4(%ecx), %edx
movl %edx, 24(%esp)
movl 8(%ecx), %edx
movl %edx, 16(%esp)
movl 12(%ecx), %edx
movl 16(%ecx), %esi
movl 20(%ecx), %edi
movl 24(%ecx), %ebx
movl %ebx, 28(%esp)
movl 28(%ecx), %ebx
movl 32(%ecx), %ebp
movl %ebp, 20(%esp)
movl 36(%ecx), %ecx
imull 24(%esp), %eax
imull 16(%esp), %eax
imull %edx, %eax
imull %esi, %eax
imull %edi, %eax
imull 28(%esp), %eax
imull %ebx, %eax
imull 20(%esp), %eax
imull %ecx, %eax
movl (%esp), %ebp
movl 4(%esp), %ebx
movl 8(%esp), %edi
movl 12(%esp), %esi
addl $32, %esp
ret
This code is basically trying to do all of the loads first, then execute all
of the multiplies. Because we run out of registers, lots of spill code happens.
We now generate this code for both cases:
regpressure1:
movl 4(%esp), %ecx
movl (%ecx), %eax
movl 4(%ecx), %edx
imull %edx, %eax
movl 8(%ecx), %edx
imull %edx, %eax
movl 12(%ecx), %edx
imull %edx, %eax
movl 16(%ecx), %edx
imull %edx, %eax
movl 20(%ecx), %edx
imull %edx, %eax
movl 24(%ecx), %edx
imull %edx, %eax
movl 28(%ecx), %edx
imull %edx, %eax
movl 32(%ecx), %edx
imull %edx, %eax
movl 36(%ecx), %ecx
imull %ecx, %eax
ret
which is much nicer (when we fold loads into the muls it will be even better).
The old instruction selector used to produce the good code for regpressure1
but not for regpressure2, as it depended on the order of operations in the
LLVM code.
llvm-svn: 19449
|
| |
|
|
| |
llvm-svn: 19447
|
| |
|
|
| |
llvm-svn: 19445
|
| |
|
|
| |
llvm-svn: 19444
|
| |
|
|
| |
llvm-svn: 19443
|
| |
|
|
|
|
| |
graph with no labels! :)
llvm-svn: 19441
|
| |
|
|
| |
llvm-svn: 19438
|
| |
|
|
| |
llvm-svn: 19437
|
| |
|
|
| |
llvm-svn: 19436
|
| |
|
|
| |
llvm-svn: 19435
|
| |
|
|
|
|
|
| |
leaked to the system. Now they are destroyed with the JITMemoryManager is
destroyed.
llvm-svn: 19434
|
| |
|
|
| |
llvm-svn: 19432
|
| |
|
|
|
|
|
| |
1. Rename createLoaderPass to CreateProfileLoaderPass
2. Opt shouldn't use the pass registered in CodeGen.
llvm-svn: 19431
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int test2(int * P, int* Q, int A, int B) {
return P+A == P;
}
into:
test2:
movl 4(%esp), %eax
movl 12(%esp), %eax
shll $2, %eax
cmpl $0, %eax
sete %al
movzbl %al, %eax
ret
instead of:
test2:
movl 4(%esp), %eax
movl 12(%esp), %ecx
leal (%eax,%ecx,4), %ecx
cmpl %eax, %ecx
sete %al
movzbl %al, %eax
ret
ICC is producing worse code:
test2:
movl 4(%esp), %eax #8.5
movl 12(%esp), %edx #8.5
lea (%edx,%edx), %ecx #9.9
addl %ecx, %ecx #9.9
addl %eax, %ecx #9.9
cmpl %eax, %ecx #9.16
movl $0, %eax #9.16
sete %al #9.16
ret #9.16
as is GCC (looks like our old code):
test2:
movl 4(%esp), %edx
movl 12(%esp), %eax
leal (%edx,%eax,4), %ecx
cmpl %edx, %ecx
sete %al
movzbl %al, %eax
ret
llvm-svn: 19430
|
| |
|
|
| |
llvm-svn: 19429
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
.LBB_Z5test0PdS__3: # no_exit.1
fldl data(,%eax,8)
fldl 24(%esp)
faddp %st(1)
fstl 24(%esp)
incl %eax
movl $16000, %ecx
sarl $3, %ecx
cmpl %eax, %ecx
fstpl 16(%esp)
#FP_REG_KILL
jg .LBB_Z5test0PdS__3 # no_exit.1
into:
.LBB_Z5test0PdS__3: # no_exit.1
fldl data(,%eax,8)
fldl 24(%esp)
faddp %st(1)
fstl 24(%esp)
incl %eax
cmpl $2000, %eax
fstpl 16(%esp)
#FP_REG_KILL
jl .LBB_Z5test0PdS__3 # no_exit.1
llvm-svn: 19427
|
| |
|
|
|
|
|
| |
gdb debugger doesn't get confused on which file it is reading (the one in
lib/System or the one in lib/System/{Win32,Unix})
llvm-svn: 19426
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
codegen this loop in stepanov:
no_exit.i: ; preds = %entry, %no_exit.i, %then.i, %_Z5checkd.exit
%i.0.0 = phi int [ 0, %entry ], [ %i.0.0, %no_exit.i ], [ %inc.0, %_Z5checkd.exit ], [ %inc.012, %then.i ] ; <int> [#uses=3]
%indvar = phi uint [ %indvar.next, %no_exit.i ], [ 0, %entry ], [ 0, %then.i ], [ 0, %_Z5checkd.exit ] ; <uint> [#uses=3]
%result_addr.i.0 = phi double [ %tmp.4.i.i, %no_exit.i ], [ 0.000000e+00, %entry ], [ 0.000000e+00, %then.i ], [ 0.000000e+00, %_Z5checkd.exit ] ; <double> [#uses=1]
%first_addr.0.i.2.rec = cast uint %indvar to int ; <int> [#uses=1]
%first_addr.0.i.2 = getelementptr [2000 x double]* %data, int 0, uint %indvar ; <double*> [#uses=1]
%inc.i.rec = add int %first_addr.0.i.2.rec, 1 ; <int> [#uses=1]
%inc.i = getelementptr [2000 x double]* %data, int 0, int %inc.i.rec ; <double*> [#uses=1]
%tmp.3.i.i = load double* %first_addr.0.i.2 ; <double> [#uses=1]
%tmp.4.i.i = add double %result_addr.i.0, %tmp.3.i.i ; <double> [#uses=2]
%tmp.2.i = seteq double* %inc.i, getelementptr ([2000 x double]* %data, int 0, int 2000) ; <bool> [#uses=1]
%indvar.next = add uint %indvar, 1 ; <uint> [#uses=1]
br bool %tmp.2.i, label %_Z10accumulateIPddET0_T_S2_S1_.exit, label %no_exit.i
To this:
.LBB_Z4testIPddEvT_S1_T0__1: # no_exit.i
fldl data(,%eax,8)
fldl 16(%esp)
faddp %st(1)
fstpl 16(%esp)
incl %eax
movl %eax, %ecx
shll $3, %ecx
cmpl $16000, %ecx
#FP_REG_KILL
jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i
instead of this:
.LBB_Z4testIPddEvT_S1_T0__1: # no_exit.i
fldl data(,%eax,8)
fldl 16(%esp)
faddp %st(1)
fstpl 16(%esp)
incl %eax
leal data(,%eax,8), %ecx
leal data+16000, %edx
cmpl %edx, %ecx
#FP_REG_KILL
jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i
llvm-svn: 19425
|
| |
|
|
| |
llvm-svn: 19424
|