| Commit message (Collapse) | Author | Age | Files | Lines |
... | |
|
|
|
| |
llvm-svn: 30610
|
|
|
|
| |
llvm-svn: 30609
|
|
|
|
| |
llvm-svn: 30606
|
|
|
|
|
|
| |
because they have an aggressive linker that does dead code stripping.
llvm-svn: 30604
|
|
|
|
| |
llvm-svn: 30601
|
|
|
|
| |
llvm-svn: 30600
|
|
|
|
| |
llvm-svn: 30597
|
|
|
|
|
|
| |
directives
llvm-svn: 30593
|
|
|
|
|
|
| |
on X86-64.
llvm-svn: 30590
|
|
|
|
| |
llvm-svn: 30573
|
|
|
|
| |
llvm-svn: 30570
|
|
|
|
| |
llvm-svn: 30568
|
|
|
|
| |
llvm-svn: 30561
|
|
|
|
| |
llvm-svn: 30560
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int %test(ulong *%tmp) {
%tmp = load ulong* %tmp ; <ulong> [#uses=1]
%tmp.mask = shr ulong %tmp, ubyte 50 ; <ulong> [#uses=1]
%tmp.mask = cast ulong %tmp.mask to ubyte
%tmp2 = and ubyte %tmp.mask, 3 ; <ubyte> [#uses=1]
%tmp2 = cast ubyte %tmp2 to int ; <int> [#uses=1]
ret int %tmp2
}
to:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
andl $3, %eax
ret
instead of:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
# TRUNCATE movb %al, %al
andb $3, %al
movzbl %al, %eax
ret
llvm-svn: 30558
|
|
|
|
|
|
|
| |
the src/dst are not the same size. This catches things like "truncate
32-bit X to 8 bits, then zext to 16", which happens a bit on X86.
llvm-svn: 30557
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int test3(int a, int b) { return (a < 0) ? a : 0; }
to:
_test3:
srawi r2, r3, 31
and r3, r2, r3
blr
instead of:
_test3:
cmpwi cr0, r3, 1
li r2, 0
blt cr0, LBB2_2 ;entry
LBB2_1: ;entry
mr r3, r2
LBB2_2: ;entry
blr
This implements: PowerPC/select_lt0.ll:seli32_a_a
llvm-svn: 30517
|
|
|
|
| |
llvm-svn: 30514
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
1. teach SimplifySetCC that '(srl (ctlz x), 5) == 0' is really x != 0.
2. Teach visitSELECT_CC to use SimplifySetCC instead of calling it and
ignoring the result. This allows us to compile:
bool %test(ulong %x) {
%tmp = setlt ulong %x, 4294967296
ret bool %tmp
}
to:
_test:
cntlzw r2, r3
cmplwi cr0, r3, 1
srwi r2, r2, 5
li r3, 0
beq cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
instead of:
_test:
addi r2, r3, -1
cntlzw r2, r2
cntlzw r3, r3
srwi r2, r2, 5
cmplwi cr0, r2, 0
srwi r2, r3, 5
li r3, 0
bne cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
This isn't wonderful, but it's an improvement.
llvm-svn: 30513
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
shift amount is one or zero. For example, for:
long long foo1(long long X, int C) {
return X << (C|32);
}
long long foo2(long long X, int C) {
return X << (C&~32);
}
we get:
_foo1:
movb $31, %cl
movl 4(%esp), %edx
andb 12(%esp), %cl
shll %cl, %edx
xorl %eax, %eax
ret
_foo2:
movb $223, %cl
movl 4(%esp), %eax
movl 8(%esp), %edx
andb 12(%esp), %cl
shldl %cl, %eax, %edx
shll %cl, %eax
ret
instead of:
_foo1:
subl $4, %esp
movl %ebx, (%esp)
movb $32, %bl
movl 8(%esp), %eax
movl 12(%esp), %edx
movb %bl, %cl
orb 16(%esp), %cl
shldl %cl, %eax, %edx
shll %cl, %eax
xorl %ecx, %ecx
testb %bl, %bl
cmovne %eax, %edx
cmovne %ecx, %eax
movl (%esp), %ebx
addl $4, %esp
ret
_foo2:
subl $4, %esp
movl %ebx, (%esp)
movb $223, %cl
movl 8(%esp), %eax
movl 12(%esp), %edx
andb 16(%esp), %cl
shldl %cl, %eax, %edx
shll %cl, %eax
xorl %ecx, %ecx
xorb %bl, %bl
testb %bl, %bl
cmovne %eax, %edx
cmovne %ecx, %eax
movl (%esp), %ebx
addl $4, %esp
ret
llvm-svn: 30506
|
|
|
|
|
|
| |
allow it to go into an infinite loop, filling up the disk!
llvm-svn: 30494
|
|
|
|
| |
llvm-svn: 30478
|
|
|
|
| |
llvm-svn: 30477
|
|
|
|
| |
llvm-svn: 30474
|
|
|
|
| |
llvm-svn: 30470
|
|
|
|
| |
llvm-svn: 30462
|
|
|
|
| |
llvm-svn: 30461
|
|
|
|
| |
llvm-svn: 30460
|
|
|
|
| |
llvm-svn: 30407
|
|
|
|
| |
llvm-svn: 30403
|
|
|
|
| |
llvm-svn: 30402
|
|
|
|
|
|
| |
is faster and is needed for future improvements.
llvm-svn: 30383
|
|
|
|
|
|
| |
This implements CodeGen/X86/and-or-fold.ll
llvm-svn: 30379
|
|
|
|
|
|
| |
matching things like ((x >> c1) & c2) | ((x << c3) & c4) to (rot x, c5) & c6
llvm-svn: 30376
|
|
|
|
| |
llvm-svn: 30327
|
|
|
|
| |
llvm-svn: 30326
|
|
|
|
|
|
| |
values.
llvm-svn: 30316
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
in a specific BB, don't undo this!). This allows us to compile
CodeGen/X86/loop-hoist.ll into:
_foo:
xorl %eax, %eax
*** movl L_Arr$non_lazy_ptr, %ecx
movl 4(%esp), %edx
LBB1_1: #cond_true
movl %eax, (%ecx,%eax,4)
incl %eax
cmpl %edx, %eax
jne LBB1_1 #cond_true
LBB1_2: #return
ret
instead of:
_foo:
xorl %eax, %eax
movl 4(%esp), %ecx
LBB1_1: #cond_true
*** movl L_Arr$non_lazy_ptr, %edx
movl %eax, (%edx,%eax,4)
incl %eax
cmpl %ecx, %eax
jne LBB1_1 #cond_true
LBB1_2: #return
ret
This was noticed in 464.h264ref. This doesn't usually affect PPC,
but strikes X86 all the time.
llvm-svn: 30290
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
addl %ecx, %ecx
adcl %eax, %eax
instead of:
movl %ecx, %edx
addl %edx, %edx
shrl $31, %ecx
addl %eax, %eax
orl %ecx, %eax
and to:
addc r5, r5, r5
adde r4, r4, r4
instead of:
slwi r2,r9,1
srwi r0,r11,31
slwi r3,r11,1
or r2,r0,r2
on PPC.
llvm-svn: 30284
|
|
|
|
|
|
| |
representing expressions that can only be resolved at link time, etc.
llvm-svn: 30278
|
|
|
|
| |
llvm-svn: 30240
|
|
|
|
|
|
|
|
|
| |
due to switch cases going to the same place, it make #pred != #phi entries,
breaking live interval analysis.
This fixes 458.sjeng on x86 with llc.
llvm-svn: 30236
|
|
|
|
| |
llvm-svn: 30225
|
|
|
|
| |
llvm-svn: 30217
|
|
|
|
|
|
|
| |
the file now, however the relocated address is currently wrong. Fixing
that will require some deep pondering.
llvm-svn: 30207
|
|
|
|
|
|
| |
safe for later allocation. This fixes McCat/18-imp with llc-beta.
llvm-svn: 30204
|
|
|
|
| |
llvm-svn: 30198
|
|
|
|
|
|
|
| |
of unallocatable registers, just because an alias is allocatable. We were
picking registers like SIL just because ESI was being used.
llvm-svn: 30197
|
|
|
|
| |
llvm-svn: 30162
|
|
|
|
| |
llvm-svn: 30151
|