| Commit message (Collapse) | Author | Age | Files | Lines |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
and handle it like constant stride vars. This fixes some bad codegen in
variable stride cases. For example, it compiles this:
void foo(int k, int i) {
for (k=i+i; k <= 8192; k+=i)
flags2[k] = 0;
}
to:
LBB1_1: #bb.preheader
movl %eax, %ecx
addl %ecx, %ecx
movl L_flags2$non_lazy_ptr, %edx
LBB1_2: #bb
movb $0, (%edx,%ecx)
addl %eax, %ecx
cmpl $8192, %ecx
jle LBB1_2 #bb
LBB1_5: #return
ret
or (if the array is local and we are in dynamic-nonpic or static mode):
LBB3_2: #bb
movb $0, _flags2(%ecx)
addl %eax, %ecx
cmpl $8192, %ecx
jle LBB3_2 #bb
and:
lis r2, ha16(L_flags2$non_lazy_ptr)
lwz r2, lo16(L_flags2$non_lazy_ptr)(r2)
slwi r3, r4, 1
LBB1_2: ;bb
li r5, 0
add r6, r4, r3
stbx r5, r2, r3
cmpwi cr0, r6, 8192
bgt cr0, LBB1_5 ;return
instead of:
leal (%eax,%eax,2), %ecx
movl %eax, %edx
addl %edx, %edx
addl L_flags2$non_lazy_ptr, %edx
xorl %esi, %esi
LBB1_2: #bb
movb $0, (%edx,%esi)
movl %eax, %edi
addl %esi, %edi
addl %ecx, %esi
cmpl $8192, %esi
jg LBB1_5 #return
and:
lis r2, ha16(L_flags2$non_lazy_ptr)
lwz r2, lo16(L_flags2$non_lazy_ptr)(r2)
mulli r3, r4, 3
slwi r5, r4, 1
li r6, 0
add r2, r2, r5
LBB1_2: ;bb
li r5, 0
add r7, r3, r6
stbx r5, r2, r6
add r6, r4, r6
cmpwi cr0, r7, 8192
ble cr0, LBB1_2 ;bb
This speeds up Benchmarks/Shootout/sieve from 8.533s to 6.464s and
implements LoopStrengthReduce/var_stride_used_by_compare.ll
llvm-svn: 31809
|
|
|
|
| |
llvm-svn: 31751
|
|
|
|
|
|
|
|
| |
(X >> Z) op (Y >> Z) -> (X op Y) >> Z
for all shifts and all ops={and/or/xor}.
llvm-svn: 31729
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
typedef struct { unsigned prefix : 4; unsigned code : 4; unsigned unsigned_p : 4; } tree_common;
int foo(tree_common *a, tree_common *b) { return a->code == b->code; }
into:
_foo:
movl 4(%esp), %eax
movl 8(%esp), %ecx
movl (%eax), %eax
xorl (%ecx), %eax
# TRUNCATE movb %al, %al
shrb $4, %al
testb %al, %al
sete %al
movzbl %al, %eax
ret
instead of:
_foo:
movl 8(%esp), %eax
movb (%eax), %al
shrb $4, %al
movl 4(%esp), %ecx
movb (%ecx), %cl
shrb $4, %cl
cmpb %al, %cl
sete %al
movzbl %al, %eax
ret
saving one cycle by eliminating a shift.
llvm-svn: 31727
|
|
|
|
|
|
| |
by the shr -> [al]shr patch. This was reduced from 176.gcc.
llvm-svn: 31653
|
|
|
|
| |
llvm-svn: 31610
|
|
|
|
| |
llvm-svn: 31608
|
|
|
|
|
|
| |
add.ll:test33, add.ll:test34, shift-sra.ll:test2
llvm-svn: 31586
|
|
|
|
|
|
| |
case that it bad to do.
llvm-svn: 31563
|
|
|
|
|
|
| |
delete in the first place. This also makes it simpler.
llvm-svn: 31562
|
|
|
|
| |
llvm-svn: 31561
|
|
|
|
|
|
| |
loops.
llvm-svn: 31560
|
|
|
|
|
|
|
|
| |
This patch converts the old SHR instruction into two instructions,
AShr (Arithmetic) and LShr (Logical). The Shr instructions now are not
dependent on the sign of their operands.
llvm-svn: 31542
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int func(vFloat v0, vFloat v1) {
int ii;
vSInt32 vsiidx[2];
vsiidx[0] = _mm_cvttps_epi32(v0);
vsiidx[1] = _mm_cvttps_epi32(v1);
ii = ((int *) vsiidx)[4];
return ii;
}
This fixes Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
llvm-svn: 31524
|
|
|
|
| |
llvm-svn: 31464
|
|
|
|
| |
llvm-svn: 31460
|
|
|
|
| |
llvm-svn: 31431
|
|
|
|
| |
llvm-svn: 31398
|
|
|
|
|
|
|
|
|
|
| |
Turn on -Wunused and -Wno-unused-parameter. Clean up most of the resulting
fall out by removing unused variables. Remaining warnings have to do with
unused functions (I didn't want to delete code without review) and unused
variables in generated code. Maintainers should clean up the remaining
issues when they see them. All changes pass DejaGnu tests and Olden.
llvm-svn: 31380
|
|
|
|
|
|
| |
Replace the REM instruction with UREM, SREM and FREM.
llvm-svn: 31369
|
|
|
|
| |
llvm-svn: 31362
|
|
|
|
|
|
| |
This fixes http://llvm.org/bugs/show_bug.cgi?id=979
llvm-svn: 31358
|
|
|
|
| |
llvm-svn: 31352
|
|
|
|
| |
llvm-svn: 31346
|
|
|
|
|
|
|
| |
result. This can significantly shrink code and exposes identities more
aggressively.
llvm-svn: 31344
|
|
|
|
| |
llvm-svn: 31342
|
|
|
|
|
|
| |
This triggers thousands of times on multisource.
llvm-svn: 31341
|
|
|
|
|
|
| |
Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll
llvm-svn: 31317
|
|
|
|
| |
llvm-svn: 31315
|
|
|
|
| |
llvm-svn: 31284
|
|
|
|
| |
llvm-svn: 31258
|
|
|
|
| |
llvm-svn: 31257
|
|
|
|
| |
llvm-svn: 31256
|
|
|
|
| |
llvm-svn: 31255
|
|
|
|
| |
llvm-svn: 31248
|
|
|
|
|
|
|
|
| |
InsertNewInstBefore(new CastInst(Val, ValTy, Val->GetName()), I)
into:
InsertCastBefore(Val, ValTy, I)
llvm-svn: 31204
|
|
|
|
|
|
|
|
| |
Make necessary changes to support DIV -> [SUF]Div. This changes llvm to
have three division instructions: signed, unsigned, floating point. The
bytecode and assembler are bacwards compatible, however.
llvm-svn: 31195
|
|
|
|
|
|
| |
produce an EQ property.
llvm-svn: 31193
|
|
|
|
|
|
| |
Fix and comment the "or", "and" and "xor" transformations.
llvm-svn: 31189
|
|
|
|
| |
llvm-svn: 31184
|
|
|
|
| |
llvm-svn: 31151
|
|
|
|
|
|
| |
passes llvm-gcc bootstrap.
llvm-svn: 31146
|
|
|
|
|
|
| |
Prolangs-C/agrep and SCCP/2006-10-23-IPSCCP-Crash.ll
llvm-svn: 31132
|
|
|
|
|
|
|
| |
property is added by running through the list of uses of the value and
adding resolved properties to the property set.
llvm-svn: 31126
|
|
|
|
| |
llvm-svn: 31123
|
|
|
|
| |
llvm-svn: 31121
|
|
|
|
|
|
| |
optimization opportunity pointed out by Chris Lattner.
llvm-svn: 31118
|
|
|
|
|
|
| |
opportunity pointed out by Andrew Lewycky.
llvm-svn: 31115
|
|
|
|
|
|
| |
transformation. This speeds up a C++ app 2.25x.
llvm-svn: 31113
|
|
|
|
|
|
|
|
|
|
| |
1. Better document what is going on here.
2. Only hack on one branch per iteration, making the results less conservative.
3. Handle the problematic case by marking edges executable instead of by
playing with value lattice states. This is far less pessimistic, and fixes
SCCP/ipsccp-gvar.ll.
llvm-svn: 31106
|