| Commit message (Collapse) | Author | Age | Files | Lines |
... | |
|
|
|
| |
llvm-svn: 21871
|
|
|
|
| |
llvm-svn: 21855
|
|
|
|
| |
llvm-svn: 21838
|
|
|
|
| |
llvm-svn: 21824
|
|
|
|
|
|
| |
arithmetic lowering.
llvm-svn: 21818
|
|
|
|
|
|
| |
being stored/loaded through!
llvm-svn: 21806
|
|
|
|
| |
llvm-svn: 21805
|
|
|
|
| |
llvm-svn: 21803
|
|
|
|
|
|
|
|
|
| |
population (ctpop). Generic lowering is implemented, however only promotion
is implemented for SelectionDAG at the moment.
More coming soon.
llvm-svn: 21676
|
|
|
|
| |
llvm-svn: 21605
|
|
|
|
|
|
|
|
| |
enables one to use alias analysis in the backends.
(TRUNK)Stores and (EXT|ZEXT|SEXT)Loads have an extra SDOperand which is a SrcValueSDNode which contains the Value*. Note that if the operation is introduced by the backend, it will still have the operand, but the value* will be null.
llvm-svn: 21599
|
|
|
|
| |
llvm-svn: 21552
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int foo1(int x, int y) {
int t1 = x >= 0;
int t2 = y >= 0;
return t1 & t2;
}
int foo2(int x, int y) {
int t1 = x == -1;
int t2 = y == -1;
return t1 & t2;
}
produces:
_foo1:
or r2, r4, r3
srwi r2, r2, 31
xori r3, r2, 1
blr
_foo2:
and r2, r4, r3
addic r2, r2, 1
li r2, 0
addze r3, r2
blr
instead of:
_foo1:
srwi r2, r4, 31
xori r2, r2, 1
srwi r3, r3, 31
xori r3, r3, 1
and r3, r2, r3
blr
_foo2:
addic r2, r4, 1
li r2, 0
addze r2, r2
addic r3, r3, 1
li r3, 0
addze r3, r3
and r3, r2, r3
blr
llvm-svn: 21547
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
_foo:
or r2, r4, r3
srwi r3, r2, 31
blr
instead of:
_foo:
srwi r2, r4, 31
srwi r3, r3, 31
or r3, r2, r3
blr
llvm-svn: 21544
|
|
|
|
| |
llvm-svn: 21439
|
|
|
|
| |
llvm-svn: 21420
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
bool %test(int %X) {
%Y = and int %X, 8
%Z = setne int %Y, 0
ret bool %Z
}
we now generate this:
rlwinm r2, r3, 0, 28, 28
srwi r3, r2, 3
instead of this:
rlwinm r2, r3, 0, 28, 28
srwi r2, r2, 3
rlwinm r3, r2, 0, 31, 31
I'll leave it to Nate to get it down to one instruction. :)
---------------------------------------------------------------------
llvm-svn: 21391
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
This turns this PPC code:
rlwinm r2, r3, 0, 28, 28
cmpwi cr7, r2, 8
mfcr r2
rlwinm r3, r2, 31, 31, 31
into this:
rlwinm r2, r3, 0, 28, 28
srwi r2, r2, 3
rlwinm r3, r2, 0, 31, 31
Next up, nuking the extra and.
llvm-svn: 21390
|
|
|
|
| |
llvm-svn: 21319
|
|
|
|
| |
llvm-svn: 21318
|
|
|
|
| |
llvm-svn: 21317
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
// (X != 0) | (Y != 0) -> (X|Y != 0)
// (X == 0) & (Y == 0) -> (X|Y == 0)
Compiling this:
int %bar(int %a, int %b) {
entry:
%tmp.1 = setne int %a, 0
%tmp.2 = setne int %b, 0
%tmp.3 = or bool %tmp.1, %tmp.2
%retval = cast bool %tmp.3 to int
ret int %retval
}
to this:
_bar:
or r2, r3, r4
addic r3, r2, -1
subfe r3, r3, r2
blr
instead of:
_bar:
addic r2, r3, -1
subfe r2, r2, r3
addic r3, r4, -1
subfe r3, r3, r4
or r3, r2, r3
blr
llvm-svn: 21316
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
eliminating an and for Nate's testcase:
int %bar(int %a, int %b) {
entry:
%tmp.1 = setne int %a, 0
%tmp.2 = setne int %b, 0
%tmp.3 = or bool %tmp.1, %tmp.2
%retval = cast bool %tmp.3 to int
ret int %retval
}
generating:
_bar:
addic r2, r3, -1
subfe r2, r2, r3
addic r3, r4, -1
subfe r3, r3, r4
or r3, r2, r3
blr
instead of:
_bar:
addic r2, r3, -1
subfe r2, r2, r3
addic r3, r4, -1
subfe r3, r3, r4
or r2, r2, r3
rlwinm r3, r2, 0, 31, 31
blr
llvm-svn: 21315
|
|
|
|
|
|
| |
in the PPC Pattern ISel
llvm-svn: 21297
|
|
|
|
|
|
|
|
| |
Move the transform for select (a < 0) ? b : 0 into the dag from ppc isel
Enable the dag to fold and (setcc, 1) -> setcc for targets where setcc
always produces zero or one.
llvm-svn: 21291
|
|
|
|
| |
llvm-svn: 21289
|
|
|
|
| |
llvm-svn: 21288
|
|
|
|
|
|
| |
with != 0 comparisons vanishing.
llvm-svn: 21287
|
|
|
|
| |
llvm-svn: 21273
|
|
|
|
| |
llvm-svn: 21272
|
|
|
|
|
|
| |
instead. OVerall, this increases the amount of folding we can do.
llvm-svn: 21265
|
|
|
|
| |
llvm-svn: 21262
|
|
|
|
|
|
| |
Make llvm undef values generate ISD::UNDEF nodes
llvm-svn: 21261
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
compile this:
int foo (unsigned long a, unsigned long long g) {
return a >= g;
}
To:
foo:
movl 8(%esp), %eax
cmpl %eax, 4(%esp)
setae %al
cmpl $0, 12(%esp)
sete %cl
andb %al, %cl
movzbl %cl, %eax
ret
instead of:
foo:
movl 8(%esp), %eax
cmpl %eax, 4(%esp)
setae %al
movzbw %al, %cx
movl 12(%esp), %edx
cmpl $0, %edx
sete %al
movzbw %al, %ax
cmpl $0, %edx
cmove %cx, %ax
movzbl %al, %eax
ret
llvm-svn: 21244
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
unsigned long long g;
unsigned long foo (unsigned long a) {
return (a >= g) ? 1 : 0;
}
It changes the ppc code from:
_foo:
.LBB_foo_0: ; entry
mflr r11
stw r11, 8(r1)
bl "L00000$pb"
"L00000$pb":
mflr r2
addis r2, r2, ha16(L_g$non_lazy_ptr-"L00000$pb")
lwz r2, lo16(L_g$non_lazy_ptr-"L00000$pb")(r2)
lwz r4, 0(r2)
lwz r2, 4(r2)
cmplw cr0, r3, r2
li r2, 1
li r3, 0
bge .LBB_foo_2 ; entry
.LBB_foo_1: ; entry
or r2, r3, r3
.LBB_foo_2: ; entry
cmplwi cr0, r4, 1
li r3, 1
li r5, 0
blt .LBB_foo_4 ; entry
.LBB_foo_3: ; entry
or r3, r5, r5
.LBB_foo_4: ; entry
cmpwi cr0, r4, 0
beq .LBB_foo_6 ; entry
.LBB_foo_5: ; entry
or r2, r3, r3
.LBB_foo_6: ; entry
rlwinm r3, r2, 0, 31, 31
lwz r11, 8(r1)
mtlr r11
blr
to:
_foo:
.LBB_foo_0: ; entry
mflr r11
stw r11, 8(r1)
bl "L00000$pb"
"L00000$pb":
mflr r2
addis r2, r2, ha16(L_g$non_lazy_ptr-"L00000$pb")
lwz r2, lo16(L_g$non_lazy_ptr-"L00000$pb")(r2)
lwz r4, 0(r2)
lwz r2, 4(r2)
cmplw cr0, r3, r2
li r2, 1
li r3, 0
bge .LBB_foo_2 ; entry
.LBB_foo_1: ; entry
or r2, r3, r3
.LBB_foo_2: ; entry
cntlzw r3, r4
srwi r3, r3, 5
cmpwi cr0, r4, 0
beq .LBB_foo_4 ; entry
.LBB_foo_3: ; entry
or r2, r3, r3
.LBB_foo_4: ; entry
rlwinm r3, r2, 0, 31, 31
lwz r11, 8(r1)
mtlr r11
blr
llvm-svn: 21241
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
the result does change as a result of the extend.
This improves codegen for Alpha on this testcase:
int %a(ushort* %i) {
%tmp.1 = load ushort* %i
%tmp.2 = cast ushort %tmp.1 to int
%tmp.4 = and int %tmp.2, 1
ret int %tmp.4
}
Generating:
a:
ldgp $29, 0($27)
ldwu $0,0($16)
and $0,1,$0
ret $31,($26),1
instead of:
a:
ldgp $29, 0($27)
ldwu $0,0($16)
and $0,1,$0
addl $0,0,$0
ret $31,($26),1
btw, alpha really should switch to livein/outs for args :)
llvm-svn: 21213
|
|
|
|
| |
llvm-svn: 21204
|
|
|
|
| |
llvm-svn: 21203
|
|
|
|
|
|
|
|
|
| |
the new zero extend, not the original operand. This fixes cast bool -> long
on ppc.
Add an unrelated fixme
llvm-svn: 21196
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int a(short i) {
return i & 1;
}
as
_a:
andi. r3, r3, 1
blr
instead of:
_a:
rlwinm r2, r3, 0, 16, 31
andi. r3, r2, 1
blr
on ppc. It should also help the other risc targets.
llvm-svn: 21189
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
is deconstructed then reconstructed here. This catches 19 fabs's in 177.mesa
9 in 168.wupwise, 5 in 171.swim, 3 in 172.mgrid, and 14 in 173.applu out of
specfp2000.
This allows the X86 code generator to make MUCH better code than before for
each of these and saves one instr on ppc.
This depends on the previous CFE patch to expose these correctly.
llvm-svn: 21171
|
|
|
|
| |
llvm-svn: 21165
|
|
|
|
| |
llvm-svn: 21160
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
this into sign/zero extension instructions later.
On PPC, for example, this testcase:
%G = external global sbyte
implementation
void %test(int %X, int %Y) {
%C = setlt int %X, %Y
%D = cast bool %C to sbyte
store sbyte %D, sbyte* %G
ret void
}
Now codegens to:
cmpw cr0, r3, r4
li r3, 1
li r4, 0
blt .LBB_test_2 ;
.LBB_test_1: ;
or r3, r4, r4
.LBB_test_2: ;
addis r2, r2, ha16(L_G$non_lazy_ptr-"L00000$pb")
lwz r2, lo16(L_G$non_lazy_ptr-"L00000$pb")(r2)
stb r3, 0(r2)
instead of:
cmpw cr0, r3, r4
li r3, 1
li r4, 0
blt .LBB_test_2 ;
.LBB_test_1: ;
or r3, r4, r4
.LBB_test_2: ;
*** rlwinm r3, r3, 0, 31, 31
addis r2, r2, ha16(L_G$non_lazy_ptr-"L00000$pb")
lwz r2, lo16(L_G$non_lazy_ptr-"L00000$pb")(r2)
stb r3, 0(r2)
llvm-svn: 21148
|
|
|
|
| |
llvm-svn: 21144
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
(likewise for <= >=u >=u).
Second, it implements a special case hack to turn 'X gtu SINTMAX' -> 'X lt 0'
On powerpc, for example, this changes this:
lis r2, 32767
ori r2, r2, 65535
cmplw cr0, r3, r2
bgt .LBB_test_2
into:
cmpwi cr0, r3, 0
blt .LBB_test_2
llvm-svn: 21142
|
|
|
|
|
|
| |
elements auto of the autoCSE maps.
llvm-svn: 21128
|
|
|
|
|
|
| |
multiply.
llvm-svn: 21102
|
|
|
|
| |
llvm-svn: 21008
|
|
|
|
| |
llvm-svn: 21004
|