| Commit message (Collapse) | Author | Age | Files | Lines |
| ... | |
| |
|
|
| |
llvm-svn: 12730
|
| |
|
|
|
|
| |
Fill in addPassesToJITCompile method.
llvm-svn: 12729
|
| |
|
|
| |
llvm-svn: 12728
|
| |
|
|
|
|
|
| |
ld/st instructions - doesn't seem to work yet, but I think it's
just a typo or something somewhere.
llvm-svn: 12727
|
| |
|
|
| |
llvm-svn: 12726
|
| |
|
|
|
|
| |
Don't put NOPs in delay slots at all. We'll have a fix-up pass later.
llvm-svn: 12725
|
| |
|
|
| |
llvm-svn: 12719
|
| |
|
|
| |
llvm-svn: 12716
|
| |
|
|
|
|
| |
from the really simple X86 instruction selector tablegen backend
llvm-svn: 12715
|
| |
|
|
| |
llvm-svn: 12714
|
| |
|
|
| |
llvm-svn: 12713
|
| |
|
|
| |
llvm-svn: 12712
|
| |
|
|
| |
llvm-svn: 12711
|
| |
|
|
| |
llvm-svn: 12710
|
| |
|
|
| |
llvm-svn: 12703
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
Enable folding of long seteq/setne comparisons into branches and select instructions
Implement unfolded long relational comparisons against a constants a bit more efficiently
Folding comparisons changes code that looks like this:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
mov %ECX, %EAX
or %ECX, %EDX
sete %CL
test %CL, %CL
je .LBB2 # PC rel: F
into code that looks like this:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
mov %ECX, %EAX
or %ECX, %EDX
jne .LBB2 # PC rel: F
This speeds up 186.crafty by 6% with llc-ls.
llvm-svn: 12702
|
| |
|
|
| |
llvm-svn: 12701
|
| |
|
|
| |
llvm-svn: 12700
|
| |
|
|
| |
llvm-svn: 12699
|
| |
|
|
|
|
| |
for the application with the C backend instead of the native LLVM code generator
llvm-svn: 12698
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
comparing a long against zero got us this:
sub %ESP, 8
mov DWORD PTR [%ESP + 4], %ESI
mov DWORD PTR [%ESP], %EDI
mov %EAX, DWORD PTR [%ESP + 12]
mov %EDX, DWORD PTR [%ESP + 16]
mov %ECX, 0
mov %ESI, 0
mov %EDI, %EAX
xor %EDI, %ECX
mov %ECX, %EDX
xor %ECX, %ESI
or %EDI, %ECX
sete %CL
test %CL, %CL
je .LBB2 # PC rel: F
Now it gets us this:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
mov %ECX, %EAX
or %ECX, %EDX
sete %CL
test %CL, %CL
je .LBB2 # PC rel: F
llvm-svn: 12696
|
| |
|
|
| |
llvm-svn: 12695
|
| |
|
|
| |
llvm-svn: 12694
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
immediate. For
example, multiplying X*(1 + (1LL << 32)) now produces:
test:
mov %ECX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
mov %EAX, %ECX
add %EDX, %ECX
ret
[[[Note to Alkis: why isn't linear scan generating this code?? This might be a
problem with your intervals being too conservative:
test:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
add %EDX, %EAX
ret
end note]]]
Whereas GCC produces this:
T:
sub %esp, 12
mov %edx, DWORD PTR [%esp+16]
mov DWORD PTR [%esp+8], %edi
mov %ecx, DWORD PTR [%esp+20]
xor %edi, %edi
mov DWORD PTR [%esp], %ebx
mov %ebx, %edi
mov %eax, %edx
mov DWORD PTR [%esp+4], %esi
add %ebx, %edx
mov %edi, DWORD PTR [%esp+8]
lea %edx, [%ecx+%ebx]
mov %esi, DWORD PTR [%esp+4]
mov %ebx, DWORD PTR [%esp]
add %esp, 12
ret
I'm not sure example what GCC is smoking here, but it looks like it has just
confused itself with a bunch of stack slots or something. The intel compiler
is better, but still not good:
T:
movl 4(%esp), %edx #2.11
movl 8(%esp), %eax #2.11
lea (%eax,%edx), %ecx #3.12
movl $1, %eax #3.12
mull %edx #3.12
addl %ecx, %edx #3.12
ret #3.12
llvm-svn: 12693
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
long %test(long %X) {
%Y = mul long %X, 123
ret long %Y
}
we used to generate:
test:
sub %ESP, 12
mov DWORD PTR [%ESP + 8], %ESI
mov DWORD PTR [%ESP + 4], %EDI
mov DWORD PTR [%ESP], %EBX
mov %ECX, DWORD PTR [%ESP + 16]
mov %ESI, DWORD PTR [%ESP + 20]
mov %EDI, 123
mov %EBX, 0
mov %EAX, %ECX
mul %EDI
imul %ESI, %EDI
add %ESI, %EDX
imul %ECX, %EBX
add %ESI, %ECX
mov %EDX, %ESI
mov %EBX, DWORD PTR [%ESP]
mov %EDI, DWORD PTR [%ESP + 4]
mov %ESI, DWORD PTR [%ESP + 8]
add %ESP, 12
ret
Now we emit:
test:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, 123
mul %EDX
imul %ECX, %ECX, 123
add %ECX, %EDX
mov %EDX, %ECX
ret
Which, incidently, is substantially nicer than what GCC manages:
T:
sub %esp, 8
mov %eax, 123
mov DWORD PTR [%esp], %ebx
mov %ebx, DWORD PTR [%esp+16]
mov DWORD PTR [%esp+4], %esi
mov %esi, DWORD PTR [%esp+12]
imul %ecx, %ebx, 123
mov %ebx, DWORD PTR [%esp]
mul %esi
mov %esi, DWORD PTR [%esp+4]
add %esp, 8
lea %edx, [%ecx+%edx]
ret
llvm-svn: 12692
|
| |
|
|
|
|
| |
* Eliminated extraneous space in the HTML
llvm-svn: 12691
|
| |
|
|
| |
llvm-svn: 12690
|
| |
|
|
| |
llvm-svn: 12689
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
On this testcase:
long %test(long %X) {
%Y = shr long %X, ubyte 32
ret long %Y
}
instead of:
t:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EAX, DWORD PTR [%ESP + 8]
sar %EAX, 0
mov %EDX, 0
ret
we now emit:
test:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EAX, DWORD PTR [%ESP + 8]
mov %EDX, 0
ret
llvm-svn: 12688
|
| |
|
|
| |
llvm-svn: 12687
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
For example, on this instruction:
call void %test(long 1234)
Instead of this:
mov %EAX, 1234
mov %ECX, 0
mov DWORD PTR [%ESP], %EAX
mov DWORD PTR [%ESP + 4], %ECX
call test
We now emit this:
mov DWORD PTR [%ESP], 1234
mov DWORD PTR [%ESP + 4], 0
call test
llvm-svn: 12686
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
of the words of the constant is zeros. For example:
Y = and long X, 1234
now generates:
Yl = and Xl, 1234
Yh = 0
instead of:
Yl = and Xl, 1234
Yh = and Xh, 0
llvm-svn: 12685
|
| |
|
|
| |
llvm-svn: 12684
|
| |
|
|
|
|
| |
This allows us to handle code like 'add long %X, 123456789012' more efficiently.
llvm-svn: 12683
|
| |
|
|
| |
llvm-svn: 12682
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
long %test(long %X) {
%Y = sub long 0, %X
ret long %Y
}
We used to generate:
test:
sub %ESP, 4
mov DWORD PTR [%ESP], %ESI
mov %ECX, DWORD PTR [%ESP + 8]
mov %ESI, DWORD PTR [%ESP + 12]
mov %EAX, 0
mov %EDX, 0
sub %EAX, %ECX
sbb %EDX, %ESI
mov %ESI, DWORD PTR [%ESP]
add %ESP, 4
ret
Now we generate:
test:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
neg %EAX
adc %EDX, 0
neg %EDX
ret
llvm-svn: 12681
|
| |
|
|
|
|
| |
to eliminate
llvm-svn: 12680
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
* In promote32, if we can just promote a constant value, do so instead of
promoting a constant dynamically.
* In visitReturn inst, actually USE the promote32 argument that takes a
Value*
The end result of this is that we now generate this:
test:
mov %EAX, 0
ret
instead of...
test:
mov %AX, 0
movzx %EAX, %AX
ret
for:
ushort %test() {
ret ushort 0
}
llvm-svn: 12679
|
| |
|
|
|
|
|
| |
code. This "instantly" gives us loop-extractor power to assist with the
debugment of our nasty codegen issues. :)
llvm-svn: 12678
|
| |
|
|
| |
llvm-svn: 12677
|
| |
|
|
| |
llvm-svn: 12676
|
| |
|
|
| |
llvm-svn: 12675
|
| |
|
|
| |
llvm-svn: 12674
|
| |
|
|
| |
llvm-svn: 12673
|
| |
|
|
| |
llvm-svn: 12671
|
| |
|
|
| |
llvm-svn: 12670
|
| |
|
|
| |
llvm-svn: 12669
|
| |
|
|
| |
llvm-svn: 12668
|
| |
|
|
| |
llvm-svn: 12667
|
| |
|
|
|
|
| |
testcase! :)
llvm-svn: 12659
|