| Commit message (Collapse) | Author | Age | Files | Lines |
| |
|
|
| |
llvm-svn: 26071
|
| |
|
|
| |
llvm-svn: 26065
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
xori r6, r2, 1
rlwinm r6, r6, 0, 31, 31
cmpwi cr0, r6, 0
bne cr0, LBB1_3 ; endif
to this:
rlwinm r6, r2, 0, 31, 31
cmpwi cr0, r6, 0
beq cr0, LBB1_3 ; endif
llvm-svn: 26047
|
| |
|
|
| |
llvm-svn: 26021
|
| |
|
|
| |
llvm-svn: 26019
|
| |
|
|
| |
llvm-svn: 26018
|
| |
|
|
| |
llvm-svn: 26016
|
| |
|
|
| |
llvm-svn: 26006
|
| |
|
|
| |
llvm-svn: 26005
|
| |
|
|
| |
llvm-svn: 26001
|
| |
|
|
| |
llvm-svn: 26000
|
| |
|
|
|
|
| |
* Fix hasNUsesOfValue(), it should be const.
llvm-svn: 25990
|
| |
|
|
| |
llvm-svn: 25981
|
| |
|
|
| |
llvm-svn: 25978
|
| |
|
|
| |
llvm-svn: 25975
|
| |
|
|
| |
llvm-svn: 25960
|
| |
|
|
| |
llvm-svn: 25957
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int %rlwnm(int %A, int %B) {
%C = call int asm "rlwnm $0, $1, $2, $3, $4", "=r,r,r,n,n"(int %A, int %B, int 4, int 17)
ret int %C
}
into:
_rlwnm:
or r2, r3, r3
or r3, r4, r4
rlwnm r2, r2, r3, 4, 17 ;; note the immediates :)
or r3, r2, r2
blr
llvm-svn: 25955
|
| |
|
|
| |
llvm-svn: 25952
|
| |
|
|
| |
llvm-svn: 25949
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
store EAX -> [ss#0]
[ss#0] += 1
...
use(EAX)
In this case, it is not valid to rewrite this as:
store EAX -> [ss#0]
EAX += 1
store EAX -> [ss#0] ;;; this would also delete the store above
...
use(EAX)
... because EAX is not a dead at that point. Keep track of which registers
we are allowed to clobber, and which ones we aren't, and don't clobber the
ones we're not supposed to. :)
This should resolve the issues on X86 last night.
llvm-svn: 25948
|
| |
|
|
|
|
|
|
|
| |
and PhysRegsAvailable maps out into a new AvailableSpills struct. No
functionality change.
This paves the way for a bugfix, coming up next.
llvm-svn: 25947
|
| |
|
|
| |
llvm-svn: 25945
|
| |
|
|
|
|
| |
anyway.
llvm-svn: 25941
|
| |
|
|
| |
llvm-svn: 25935
|
| |
|
|
| |
llvm-svn: 25934
|
| |
|
|
|
|
|
|
| |
receive
a std::multimap iterator value. For some reason, GCC doesn't have a problem with this.
llvm-svn: 25927
|
| |
|
|
|
|
| |
copy.
llvm-svn: 25926
|
| |
|
|
| |
llvm-svn: 25924
|
| |
|
|
|
|
|
|
|
|
|
|
| |
1. a target doesn't know how to fold load/stores into copies, or
2. the spiller rewrites the input to a copy to the same register as the dest
instead of to the reloaded reg.
This will be moved/improved in the near future, but allows elimination of
some ancient x86 hacks. This eliminates 92 copies from SMG2000 on X86 and
163 copies from 252.eon.
llvm-svn: 25922
|
| |
|
|
| |
llvm-svn: 25920
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
of this, and use it to our advantage (bwahahah). This allows us to eliminate another
60 instructions from smg2000 on PPC (probably significantly more on X86). A common
old-new diff looks like this:
stw r2, 3304(r1)
- lwz r2, 3192(r1)
stw r2, 3300(r1)
- lwz r2, 3192(r1)
stw r2, 3296(r1)
- lwz r2, 3192(r1)
stw r2, 3200(r1)
- lwz r2, 3192(r1)
stw r2, 3196(r1)
- lwz r2, 3192(r1)
+ or r2, r2, r2
stw r2, 3188(r1)
and
- lwz r31, 604(r1)
- lwz r13, 604(r1)
- lwz r14, 604(r1)
- lwz r15, 604(r1)
- lwz r16, 604(r1)
- lwz r30, 604(r1)
+ or r31, r30, r30
+ or r13, r30, r30
+ or r14, r30, r30
+ or r15, r30, r30
+ or r16, r30, r30
+ or r30, r30, r30
Removal of the R = R copies is coming next...
llvm-svn: 25919
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
this code:
store [stack slot #0], R10
= add R14, [stack slot #0]
The spiller didn't know that the store made the value of [stackslot#0] available
in R10 *IF* the store came from a copy instruction with the store folded into it.
This patch teaches VirtRegMap to look at these stores and recognize the values
they make available. In one case Evan provided, this code:
divsd %XMM0, %XMM1
movsd %XMM1, QWORD PTR [%ESP + 40]
1) movsd QWORD PTR [%ESP + 48], %XMM1
2) movsd %XMM1, QWORD PTR [%ESP + 48]
addsd %XMM1, %XMM0
3) movsd QWORD PTR [%ESP + 48], %XMM1
movsd QWORD PTR [%ESP + 4], %XMM0
turns into:
divsd %XMM0, %XMM1
movsd %XMM1, QWORD PTR [%ESP + 40]
addsd %XMM1, %XMM0
3) movsd QWORD PTR [%ESP + 48], %XMM1
movsd QWORD PTR [%ESP + 4], %XMM0
In this case, instruction #2 was removed because of the value made
available by #1, and inst #1 was later deleted because it is now
never used before the stack slot is redefined by #3.
This occurs here and there in a lot of code with high spilling, on PPC
most of the removed loads/stores are LSU-reject-causing loads, which is
nice.
On X86, things are much better (because it spills more), where we nuke
about 1% of the instructions from SMG2000 and several hundred from eon.
More improvements to come...
llvm-svn: 25917
|
| |
|
|
|
|
| |
more logical place. Other methods should also be moved if anyoneis interested. :)
llvm-svn: 25913
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
and instruction. This allows us to compile stuff like this:
bool %X(int %X) {
%Y = add int %X, 14
%Z = setne int %Y, 12345
ret bool %Z
}
to this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
ret
instead of this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
andl $1, %eax
ret
This occurs quite a bit with the X86 backend. For example, 25 times in
lambda, 30 times in 177.mesa, 14 times in galgel, 70 times in fma3d,
25 times in vpr, several hundred times in gcc, ~45 times in crafty,
~60 times in parser, ~140 times in eon, 110 times in perlbmk, 55 on gap,
16 times on bzip2, 14 times on twolf, and 1-2 times in many other SPEC2K
programs.
llvm-svn: 25901
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
(C1-X) == C2 --> X == C1-C2
(X+C1) == C2 --> X == C2-C1
This allows us to compile this:
bool %X(int %X) {
%Y = add int %X, 14
%Z = setne int %Y, 12345
ret bool %Z
}
into this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
andl $1, %eax
ret
not this:
_X:
movl $14, %eax
addl 4(%esp), %eax
cmpl $12345, %eax
setne %al
movzbl %al, %eax
andl $1, %eax
ret
Testcase here: Regression/CodeGen/X86/compare-add.ll
nukage of the and coming up next.
llvm-svn: 25898
|
| |
|
|
| |
llvm-svn: 25895
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
| |
%C = call int asm "xyz $0, $1, $2, $3", "=r,r,r,0"(int %A, int %B, int 4)
and get:
xyz r2, r3, r4, r2
note that the r2's are pinned together. Yaay for 2-address instructions.
2342 ----------------------------------------------------------------------
llvm-svn: 25893
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
substituted operands. For this testcase:
int %test(int %A, int %B) {
%C = call int asm "xyz $0, $1, $2", "=r,r,r"(int %A, int %B)
ret int %C
}
we now emit:
_test:
or r2, r3, r3
or r3, r4, r4
xyz r2, r2, r3 ;; look here
or r3, r2, r2
blr
... note the substituted operands. :)
llvm-svn: 25886
|
| |
|
|
| |
llvm-svn: 25879
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
compile:
int %test(int %A, int %B) {
%C = call int asm "xyz $0, $1, $2", "=r,r,r"(int %A, int %B)
ret int %C
}
into:
(0x8906130, LLVM BB @0x8902220):
%r2 = OR4 %r3, %r3
%r3 = OR4 %r4, %r4
INLINEASM <es:xyz $0, $1, $2>, %r2<def>, %r2, %r3
%r3 = OR4 %r2, %r2
BLR
which asmprints as:
_test:
or r2, r3, r3
or r3, r4, r4
xyz $0, $1, $2 ;; need to print the operands now :)
or r3, r2, r2
blr
llvm-svn: 25878
|
| |
|
|
|
|
| |
of the SELECT_CC, BR_CC, and BRTWOWAY_CC nodes.
llvm-svn: 25875
|
| |
|
|
| |
llvm-svn: 25865
|
| |
|
|
| |
llvm-svn: 25855
|
| |
|
|
|
|
| |
caused several test failures.
llvm-svn: 25852
|
| |
|
|
| |
llvm-svn: 25832
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int %test_cpuid(int %op) {
%B = alloca int
%C = alloca int
%D = alloca int
%A = call int asm "cpuid", "=eax,==ebx,==ecx,==edx,eax"(int* %B, int* %C, int* %D, int %op)
%Bv = load int* %B
%Cv = load int* %C
%Dv = load int* %D
%x = add int %A, %Bv
%y = add int %x, %Cv
%z = add int %y, %Dv
ret int %z
}
to this:
_test_cpuid:
sub %ESP, 16
mov DWORD PTR [%ESP], %EBX
mov %EAX, DWORD PTR [%ESP + 20]
cpuid
mov DWORD PTR [%ESP + 8], %ECX
mov DWORD PTR [%ESP + 12], %EBX
mov DWORD PTR [%ESP + 4], %EDX
mov %ECX, DWORD PTR [%ESP + 12]
add %EAX, %ECX
mov %ECX, DWORD PTR [%ESP + 8]
add %EAX, %ECX
mov %ECX, DWORD PTR [%ESP + 4]
add %EAX, %ECX
mov %EBX, DWORD PTR [%ESP]
add %ESP, 16
ret
... note the proper register allocation. :)
it is unclear to me why the loads aren't folded into the adds.
llvm-svn: 25827
|
| |
|
|
| |
llvm-svn: 25822
|
| |
|
|
|
|
|
| |
a chance to custom legalize setcc, which broke a bunch of C++ Codes.
Testcase here: CodeGen/X86/2006-01-30-LongSetcc.ll
llvm-svn: 25821
|
| |
|
|
|
|
| |
of lowered target nodes.
llvm-svn: 25804
|