| Commit message (Collapse) | Author | Age | Files | Lines |
| ... | |
| |
|
|
|
|
|
| |
the use follows def immediately, it doesn't make sense to spill it and
hope it will be easier to allocate for this LI.
llvm-svn: 28217
|
| |
|
|
| |
llvm-svn: 28212
|
| |
|
|
| |
llvm-svn: 28207
|
| |
|
|
|
|
| |
ppc
llvm-svn: 28205
|
| |
|
|
|
|
|
| |
the distance between the def and another use is much longer). This is under
option control for now "-sched-lower-defnuse".
llvm-svn: 28201
|
| |
|
|
| |
llvm-svn: 28200
|
| |
|
|
| |
llvm-svn: 28197
|
| |
|
|
|
|
| |
bunch of special case code.
llvm-svn: 28194
|
| |
|
|
| |
llvm-svn: 28192
|
| |
|
|
|
|
| |
of code to be unified.
llvm-svn: 28191
|
| |
|
|
|
|
| |
be emitted to the .s file.
llvm-svn: 28189
|
| |
|
|
| |
llvm-svn: 28188
|
| |
|
|
| |
llvm-svn: 28186
|
| |
|
|
| |
llvm-svn: 28184
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
to:
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test2:
add r2, r3, r4
extsh r2, r2
srwi r2, r2, 1
extsh r3, r2
blr
llvm-svn: 28175
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
_test4:
srawi r3, r3, 16
blr
instead of:
_test4:
srwi r2, r3, 16
extsh r3, r2
blr
for:
short test4(unsigned X) {
return (X >> 16);
}
llvm-svn: 28174
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
short test4(unsigned X) {
return (X >> 16);
}
to:
_test4:
movl 4(%esp), %eax
sarl $16, %eax
ret
instead of:
_test4:
movl $-65536, %eax
andl 4(%esp), %eax
sarl $16, %eax
ret
llvm-svn: 28171
|
| |
|
|
| |
llvm-svn: 28167
|
| |
|
|
|
|
| |
to be only 31.25% dense, rather than 75% dense.
llvm-svn: 28165
|
| |
|
|
| |
llvm-svn: 28161
|
| |
|
|
| |
llvm-svn: 28152
|
| |
|
|
| |
llvm-svn: 28151
|
| |
|
|
| |
llvm-svn: 28150
|
| |
|
|
| |
llvm-svn: 28148
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
sign_extend_inreg operations. Though ComputeNumSignBits is still rudimentary,
this is enough to compile this:
short test(short X, short x) {
int Y = X+x;
return (Y >> 1);
}
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
into:
_test:
add r2, r3, r4
srawi r3, r2, 1
blr
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test:
add r2, r3, r4
srawi r2, r2, 1
extsh r3, r2
blr
_test2:
add r2, r3, r4
extsh r2, r2
srawi r2, r2, 1
extsh r3, r2
blr
llvm-svn: 28146
|
| |
|
|
|
|
|
|
| |
a cast immediately before a PHI node.
This fixes Regression/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
llvm-svn: 28143
|
| |
|
|
|
|
|
|
|
|
| |
27,28c27
< movzwl %di, %edi
< movl %edi, %ebx
---
> movw %di, %bx
llvm-svn: 28137
|
| |
|
|
| |
llvm-svn: 28136
|
| |
|
|
| |
llvm-svn: 28132
|
| |
|
|
| |
llvm-svn: 28130
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
generated:
movl 8(%esp), %eax
movl %eax, %edx
addl $4316, %edx
cmpb $1, %cl
ja LBB1_2 #cond_false
LBB1_1: #cond_true
movl L_QuantizationTables720$non_lazy_ptr, %ecx
movl %ecx, (%edx)
movl L_QNOtoQuantTableShift720$non_lazy_ptr, %edx
movl %edx, 4460(%eax)
ret
...
Now we generate:
movl 8(%esp), %eax
cmpb $1, %cl
ja LBB1_2 #cond_false
LBB1_1: #cond_true
movl L_QuantizationTables720$non_lazy_ptr, %ecx
movl %ecx, 4316(%eax)
movl L_QNOtoQuantTableShift720$non_lazy_ptr, %ecx
movl %ecx, 4460(%eax)
ret
... which uses one fewer register.
llvm-svn: 28129
|
| |
|
|
| |
llvm-svn: 28124
|
| |
|
|
|
|
|
|
|
|
|
| |
// fold (and (sext x), (sext y)) -> (sext (and x, y))
// fold (or (sext x), (sext y)) -> (sext (or x, y))
// fold (xor (sext x), (sext y)) -> (sext (xor x, y))
// fold (and (aext x), (aext y)) -> (aext (and x, y))
// fold (or (aext x), (aext y)) -> (aext (or x, y))
// fold (xor (aext x), (aext y)) -> (aext (xor x, y))
llvm-svn: 28123
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
mov EAX, DWORD PTR [ESP + 4]
mov ECX, DWORD PTR [EAX]
mov EDX, ECX
add EDX, EDX
or EDX, ECX
and EDX, -2147483648
and ECX, 2147483647
or EDX, ECX
mov DWORD PTR [EAX], EDX
ret
instead of:
sub ESP, 4
mov DWORD PTR [ESP], ESI
mov EAX, DWORD PTR [ESP + 8]
mov ECX, DWORD PTR [EAX]
mov EDX, ECX
add EDX, EDX
mov ESI, ECX
and ESI, -2147483648
and EDX, -2147483648
or EDX, ESI
and ECX, 2147483647
or EDX, ECX
mov DWORD PTR [EAX], EDX
mov ESI, DWORD PTR [ESP]
add ESP, 4
ret
llvm-svn: 28122
|
| |
|
|
| |
llvm-svn: 28121
|
| |
|
|
|
|
|
|
| |
// fold (and (trunc x), (trunc y)) -> (trunc (and x, y))
// fold (or (trunc x), (trunc y)) -> (trunc (or x, y))
// fold (xor (trunc x), (trunc y)) -> (trunc (xor x, y))
llvm-svn: 28120
|
| |
|
|
| |
llvm-svn: 28117
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
of cross-block live ranges, and allows the bb-at-a-time selector to always
coallesce these away, at isel time.
This reduces the load on the coallescer and register allocator. For example
on a codec on X86, we went from:
1643 asm-printer - Number of machine instrs printed
419 liveintervals - Number of loads/stores folded into instructions
1144 liveintervals - Number of identity moves eliminated after coalescing
1022 liveintervals - Number of interval joins performed
282 liveintervals - Number of intervals after coalescing
1304 liveintervals - Number of original intervals
86 regalloc - Number of times we had to backtrack
1.90232 regalloc - Ratio of intervals processed over total intervals
40 spiller - Number of values reused
182 spiller - Number of loads added
121 spiller - Number of stores added
132 spiller - Number of register spills
6 twoaddressinstruction - Number of instructions commuted to coalesce
360 twoaddressinstruction - Number of two-address instructions
to:
1636 asm-printer - Number of machine instrs printed
403 liveintervals - Number of loads/stores folded into instructions
1155 liveintervals - Number of identity moves eliminated after coalescing
1033 liveintervals - Number of interval joins performed
279 liveintervals - Number of intervals after coalescing
1312 liveintervals - Number of original intervals
76 regalloc - Number of times we had to backtrack
1.88998 regalloc - Ratio of intervals processed over total intervals
1 spiller - Number of copies elided
41 spiller - Number of values reused
191 spiller - Number of loads added
114 spiller - Number of stores added
128 spiller - Number of register spills
4 twoaddressinstruction - Number of instructions commuted to coalesce
356 twoaddressinstruction - Number of two-address instructions
On this testcase, this change provides a modest reduction in spill code,
regalloc iterations, and total instructions emitted. It increases the number
of register coallesces.
llvm-svn: 28115
|
| |
|
|
| |
llvm-svn: 28110
|
| |
|
|
|
|
|
|
|
|
| |
scheduler can go into a "vertical mode" (i.e. traversing up the two-address
chain, etc.) when the register pressure is low.
This does seem to reduce the number of spills in the cases I've looked at. But
with x86, it's no guarantee the performance of the code improves.
It can be turned on with -sched-vertically option.
llvm-svn: 28108
|
| |
|
|
| |
llvm-svn: 28107
|
| |
|
|
| |
llvm-svn: 28105
|
| |
|
|
| |
llvm-svn: 28104
|
| |
|
|
| |
llvm-svn: 28102
|
| |
|
|
| |
llvm-svn: 28099
|
| |
|
|
|
|
| |
thing that can be in it. Remove a dead method.
llvm-svn: 28098
|
| |
|
|
| |
llvm-svn: 28094
|
| |
|
|
| |
llvm-svn: 28093
|
| |
|
|
| |
llvm-svn: 28092
|
| |
|
|
| |
llvm-svn: 28091
|