| Commit message (Collapse) | Author | Age | Files | Lines |
| |
|
|
| |
llvm-svn: 32426
|
| |
|
|
|
|
| |
This implements Transforms/ScalarRepl/union-packed.ll
llvm-svn: 32422
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
* Add support for promoting unions with fp values in them. This produces
our new int<->fp bitcast instructions, implementing
Transforms/ScalarRepl/union-fp-int.ll
As an example, this allows us to compile this:
union intfloat { int i; float f; };
float invsqrt(const float arg_x) {
union intfloat x = { .f = arg_x };
const float xhalf = arg_x * 0.5f;
x.i = 0x5f3759df - (x.i >> 1);
return x.f * (1.5f - xhalf * x.f * x.f);
}
into:
_invsqrt:
movss 4(%esp), %xmm0
movd %xmm0, %eax
sarl %eax
movl $1597463007, %ecx
subl %eax, %ecx
movd %ecx, %xmm1
mulss LCPI1_0, %xmm0
mulss %xmm1, %xmm0
movss LCPI1_1, %xmm2
mulss %xmm1, %xmm0
subss %xmm0, %xmm2
movl 8(%esp), %eax
mulss %xmm2, %xmm1
movss %xmm1, (%eax)
ret
instead of:
_invsqrt:
subl $4, %esp
movss 8(%esp), %xmm0
movss %xmm0, (%esp)
movl (%esp), %eax
movl $1597463007, %ecx
sarl %eax
subl %eax, %ecx
movl %ecx, (%esp)
mulss LCPI1_0, %xmm0
movss (%esp), %xmm1
mulss %xmm1, %xmm0
mulss %xmm1, %xmm0
movss LCPI1_1, %xmm2
subss %xmm0, %xmm2
mulss %xmm2, %xmm1
movl 12(%esp), %eax
movss %xmm1, (%eax)
addl $4, %esp
ret
llvm-svn: 32418
|
| |
|
|
|
|
| |
MarkAliveBlocks.
llvm-svn: 32375
|
| |
|
|
| |
llvm-svn: 32333
|
| |
|
|
| |
llvm-svn: 32320
|
| |
|
|
|
|
| |
now cerr, cout, and NullStream resp.
llvm-svn: 32298
|
| |
|
|
| |
llvm-svn: 32288
|
| |
|
|
| |
llvm-svn: 32280
|
| |
|
|
|
|
| |
is 'unsigned'.
llvm-svn: 32279
|
| |
|
|
|
|
| |
passing false would make the asmprinter fail anyway.
llvm-svn: 32264
|
| |
|
|
| |
llvm-svn: 32252
|
| |
|
|
|
|
| |
7.48s. This regression is due to unforseen consequences of the cast patch.
llvm-svn: 32209
|
| |
|
|
|
|
| |
for now.
llvm-svn: 32208
|
| |
|
|
| |
llvm-svn: 32166
|
| |
|
|
| |
llvm-svn: 32113
|
| |
|
|
|
|
| |
Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
llvm-svn: 32112
|
| |
|
|
| |
llvm-svn: 32051
|
| |
|
|
| |
llvm-svn: 32044
|
| |
|
|
| |
llvm-svn: 32043
|
| |
|
|
| |
llvm-svn: 32042
|
| |
|
|
|
|
|
|
|
|
| |
unsigned short swp(unsigned short a) {
return ((a & 0xff00) >> 8 | (a & 0x00ff) << 8);
}
as an idiom for bswap.
llvm-svn: 32011
|
| |
|
|
|
|
|
| |
This implements InstCombine/cast.ll:test34. It fires hundreds of times on
176.gcc.
llvm-svn: 32009
|
| |
|
|
|
|
| |
folding seteq (bswap(x)), c -> seteq(x,bswap(c))
llvm-svn: 32006
|
| |
|
|
| |
llvm-svn: 31996
|
| |
|
|
| |
llvm-svn: 31977
|
| |
|
|
|
|
| |
producing code for "trunc to bool". This passes all tests on Linux.
llvm-svn: 31963
|
| |
|
|
| |
llvm-svn: 31941
|
| |
|
|
|
|
|
|
|
|
| |
The long awaited CAST patch. This introduces 12 new instructions into LLVM
to replace the cast instruction. Corresponding changes throughout LLVM are
provided. This passes llvm-test, llvm/test, and SPEC CPUINT2000 with the
exception of 175.vpr which fails only on a slight floating point output
difference.
llvm-svn: 31931
|
| |
|
|
| |
llvm-svn: 31925
|
| |
|
|
| |
llvm-svn: 31924
|
| |
|
|
| |
llvm-svn: 31923
|
| |
|
|
| |
llvm-svn: 31922
|
| |
|
|
|
|
|
|
| |
Remove predicate simplifier from default gcc3 pipeline. New design is too
slow to enable by default.
Add new testcases for problems encountered in development.
llvm-svn: 31895
|
| |
|
|
| |
llvm-svn: 31889
|
| |
|
|
|
|
|
| |
only do these transformations if there are a small number of phi's.
This speeds up Ptrdist/ks from 2.35s to 2.19s on my mac pro.
llvm-svn: 31853
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
and handle it like constant stride vars. This fixes some bad codegen in
variable stride cases. For example, it compiles this:
void foo(int k, int i) {
for (k=i+i; k <= 8192; k+=i)
flags2[k] = 0;
}
to:
LBB1_1: #bb.preheader
movl %eax, %ecx
addl %ecx, %ecx
movl L_flags2$non_lazy_ptr, %edx
LBB1_2: #bb
movb $0, (%edx,%ecx)
addl %eax, %ecx
cmpl $8192, %ecx
jle LBB1_2 #bb
LBB1_5: #return
ret
or (if the array is local and we are in dynamic-nonpic or static mode):
LBB3_2: #bb
movb $0, _flags2(%ecx)
addl %eax, %ecx
cmpl $8192, %ecx
jle LBB3_2 #bb
and:
lis r2, ha16(L_flags2$non_lazy_ptr)
lwz r2, lo16(L_flags2$non_lazy_ptr)(r2)
slwi r3, r4, 1
LBB1_2: ;bb
li r5, 0
add r6, r4, r3
stbx r5, r2, r3
cmpwi cr0, r6, 8192
bgt cr0, LBB1_5 ;return
instead of:
leal (%eax,%eax,2), %ecx
movl %eax, %edx
addl %edx, %edx
addl L_flags2$non_lazy_ptr, %edx
xorl %esi, %esi
LBB1_2: #bb
movb $0, (%edx,%esi)
movl %eax, %edi
addl %esi, %edi
addl %ecx, %esi
cmpl $8192, %esi
jg LBB1_5 #return
and:
lis r2, ha16(L_flags2$non_lazy_ptr)
lwz r2, lo16(L_flags2$non_lazy_ptr)(r2)
mulli r3, r4, 3
slwi r5, r4, 1
li r6, 0
add r2, r2, r5
LBB1_2: ;bb
li r5, 0
add r7, r3, r6
stbx r5, r2, r6
add r6, r4, r6
cmpwi cr0, r7, 8192
ble cr0, LBB1_2 ;bb
This speeds up Benchmarks/Shootout/sieve from 8.533s to 6.464s and
implements LoopStrengthReduce/var_stride_used_by_compare.ll
llvm-svn: 31809
|
| |
|
|
| |
llvm-svn: 31751
|
| |
|
|
|
|
|
|
| |
(X >> Z) op (Y >> Z) -> (X op Y) >> Z
for all shifts and all ops={and/or/xor}.
llvm-svn: 31729
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
typedef struct { unsigned prefix : 4; unsigned code : 4; unsigned unsigned_p : 4; } tree_common;
int foo(tree_common *a, tree_common *b) { return a->code == b->code; }
into:
_foo:
movl 4(%esp), %eax
movl 8(%esp), %ecx
movl (%eax), %eax
xorl (%ecx), %eax
# TRUNCATE movb %al, %al
shrb $4, %al
testb %al, %al
sete %al
movzbl %al, %eax
ret
instead of:
_foo:
movl 8(%esp), %eax
movb (%eax), %al
shrb $4, %al
movl 4(%esp), %ecx
movb (%ecx), %cl
shrb $4, %cl
cmpb %al, %cl
sete %al
movzbl %al, %eax
ret
saving one cycle by eliminating a shift.
llvm-svn: 31727
|
| |
|
|
|
|
| |
by the shr -> [al]shr patch. This was reduced from 176.gcc.
llvm-svn: 31653
|
| |
|
|
| |
llvm-svn: 31610
|
| |
|
|
| |
llvm-svn: 31608
|
| |
|
|
|
|
| |
add.ll:test33, add.ll:test34, shift-sra.ll:test2
llvm-svn: 31586
|
| |
|
|
|
|
| |
case that it bad to do.
llvm-svn: 31563
|
| |
|
|
|
|
| |
delete in the first place. This also makes it simpler.
llvm-svn: 31562
|
| |
|
|
| |
llvm-svn: 31561
|
| |
|
|
|
|
| |
loops.
llvm-svn: 31560
|
| |
|
|
|
|
|
|
| |
This patch converts the old SHR instruction into two instructions,
AShr (Arithmetic) and LShr (Logical). The Shr instructions now are not
dependent on the sign of their operands.
llvm-svn: 31542
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int func(vFloat v0, vFloat v1) {
int ii;
vSInt32 vsiidx[2];
vsiidx[0] = _mm_cvttps_epi32(v0);
vsiidx[1] = _mm_cvttps_epi32(v1);
ii = ((int *) vsiidx)[4];
return ii;
}
This fixes Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
llvm-svn: 31524
|