| Commit message (Collapse) | Author | Age | Files | Lines |
| ... | |
| |
|
|
|
|
|
| |
out into SimplifySelectCC. This allows both ISD::SELECT and ISD::SELECT_CC
to use the same set of simplifying folds.
llvm-svn: 22779
|
| |
|
|
|
|
| |
back once a DAG->DAG ISel exists.
llvm-svn: 22778
|
| |
|
|
| |
llvm-svn: 22777
|
| |
|
|
|
|
|
| |
a problem in LoopStrengthReduction, where it would split critical edges
then confused itself with outdated loop information.
llvm-svn: 22776
|
| |
|
|
|
|
| |
need to be updated. This code is a relic from when it did.
llvm-svn: 22775
|
| |
|
|
|
|
| |
e.g. (X & 7) >> 3 -> 0
llvm-svn: 22774
|
| |
|
|
|
|
| |
zero.
llvm-svn: 22773
|
| |
|
|
| |
llvm-svn: 22772
|
| |
|
|
|
|
| |
(~x)|y and x|(~y) yielding ORC.
llvm-svn: 22771
|
| |
|
|
| |
llvm-svn: 22770
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
middle of the loop. This turns a critical loop in gzip into this:
.LBB_test_1: ; loopentry
or r27, r28, r28
add r28, r3, r27
lhz r28, 3(r28)
add r26, r4, r27
lhz r26, 3(r26)
cmpw cr0, r28, r26
bne .LBB_test_8 ; loopentry.loopexit_crit_edge
.LBB_test_2: ; shortcirc_next.0
add r28, r3, r27
lhz r28, 5(r28)
add r26, r4, r27
lhz r26, 5(r26)
cmpw cr0, r28, r26
bne .LBB_test_7 ; shortcirc_next.0.loopexit_crit_edge
.LBB_test_3: ; shortcirc_next.1
add r28, r3, r27
lhz r28, 7(r28)
add r26, r4, r27
lhz r26, 7(r26)
cmpw cr0, r28, r26
bne .LBB_test_6 ; shortcirc_next.1.loopexit_crit_edge
.LBB_test_4: ; shortcirc_next.2
add r28, r3, r27
lhz r26, 9(r28)
add r28, r4, r27
lhz r25, 9(r28)
addi r28, r27, 8
cmpw cr7, r26, r25
mfcr r26, 1
rlwinm r26, r26, 31, 31, 31
add r25, r8, r27
cmpw cr7, r25, r7
mfcr r25, 1
rlwinm r25, r25, 29, 31, 31
and. r26, r26, r25
bne .LBB_test_1 ; loopentry
instead of this:
.LBB_test_1: ; loopentry
or r27, r28, r28
add r28, r3, r27
lhz r28, 3(r28)
add r26, r4, r27
lhz r26, 3(r26)
cmpw cr0, r28, r26
beq .LBB_test_3 ; shortcirc_next.0
.LBB_test_2: ; loopentry.loopexit_crit_edge
add r2, r30, r27
add r8, r29, r27
b .LBB_test_9 ; loopexit
.LBB_test_3: ; shortcirc_next.0
add r28, r3, r27
lhz r28, 5(r28)
add r26, r4, r27
lhz r26, 5(r26)
cmpw cr0, r28, r26
beq .LBB_test_5 ; shortcirc_next.1
.LBB_test_4: ; shortcirc_next.0.loopexit_crit_edge
add r2, r11, r27
add r8, r12, r27
b .LBB_test_9 ; loopexit
.LBB_test_5: ; shortcirc_next.1
add r28, r3, r27
lhz r28, 7(r28)
add r26, r4, r27
lhz r26, 7(r26)
cmpw cr0, r28, r26
beq .LBB_test_7 ; shortcirc_next.2
.LBB_test_6: ; shortcirc_next.1.loopexit_crit_edge
add r2, r9, r27
add r8, r10, r27
b .LBB_test_9 ; loopexit
.LBB_test_7: ; shortcirc_next.2
add r28, r3, r27
lhz r26, 9(r28)
add r28, r4, r27
lhz r25, 9(r28)
addi r28, r27, 8
cmpw cr7, r26, r25
mfcr r26, 1
rlwinm r26, r26, 31, 31, 31
add r25, r8, r27
cmpw cr7, r25, r7
mfcr r25, 1
rlwinm r25, r25, 29, 31, 31
and. r26, r26, r25
bne .LBB_test_1 ; loopentry
Next up, improve the code for the loop.
llvm-svn: 22769
|
| |
|
|
| |
llvm-svn: 22768
|
| |
|
|
| |
llvm-svn: 22767
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
edge so that the code is not always executed for both operands. This
prevents LSR from inserting code into loops whose exit blocks contain
PHI uses of IV expressions (which are outside of loops). On gzip, for
example, we turn this ugly code:
.LBB_test_1: ; loopentry
add r27, r3, r28
lhz r27, 3(r27)
add r26, r4, r28
lhz r26, 3(r26)
add r25, r30, r28 ;; Only live if exiting the loop
add r24, r29, r28 ;; Only live if exiting the loop
cmpw cr0, r27, r26
bne .LBB_test_5 ; loopexit
into this:
.LBB_test_1: ; loopentry
or r27, r28, r28
add r28, r3, r27
lhz r28, 3(r28)
add r26, r4, r27
lhz r26, 3(r26)
cmpw cr0, r28, r26
beq .LBB_test_3 ; shortcirc_next.0
.LBB_test_2: ; loopentry.loopexit_crit_edge
add r2, r30, r27
add r8, r29, r27
b .LBB_test_9 ; loopexit
.LBB_test_2: ; shortcirc_next.0
...
blt .LBB_test_1
into this:
.LBB_test_1: ; loopentry
or r27, r28, r28
add r28, r3, r27
lhz r28, 3(r28)
add r26, r4, r27
lhz r26, 3(r26)
cmpw cr0, r28, r26
beq .LBB_test_3 ; shortcirc_next.0
.LBB_test_2: ; loopentry.loopexit_crit_edge
add r2, r30, r27
add r8, r29, r27
b .LBB_t_3: ; shortcirc_next.0
.LBB_test_3: ; shortcirc_next.0
...
blt .LBB_test_1
Next step: get the block out of the loop so that the loop is all
fall-throughs again.
llvm-svn: 22766
|
| |
|
|
|
|
|
| |
Instead, just update the BB in-place. This is both faster, and it prevents
split-critical-edges from shuffling the PHI argument list unneccesarily.
llvm-svn: 22765
|
| |
|
|
| |
llvm-svn: 22764
|
| |
|
|
| |
llvm-svn: 22763
|
| |
|
|
| |
llvm-svn: 22762
|
| |
|
|
|
|
|
|
| |
specified opcode and an integer constant right operand.
2. Modified ISD::SHL, ISD::SRL, ISD::SRA to use rlwinm when applied after a mask.
llvm-svn: 22761
|
| |
|
|
|
|
| |
Patch by Jim Laskey.
llvm-svn: 22760
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
| |
(avoids an extra level of indirection in MakeReg).
defined MakeIntReg using RegMap->createVirtualRegister(PPC32::GPRCRegisterClass)
defined MakeFPReg using RegMap->createVirtualRegister(PPC32::FPRCRegisterClass)
s/MakeReg(MVT::i32)/MakeIntReg/
s/MakeReg(MVT::f64)/MakeFPReg/
Patch by Jim Laskey!
llvm-svn: 22759
|
| |
|
|
|
|
| |
integer MPEG encoding loop by a factor of two.
llvm-svn: 22758
|
| |
|
|
|
|
|
|
|
|
|
|
| |
1. move assertions for node creation to getNode()
2. legalize the values returned in ExpandOp immediately
3. Move select_cc optimizations from SELECT's getNode() to SELECT_CC's,
allowing them to be cleaned up significantly.
This paves the way to pick up additional optimizations on SELECT_CC, such
as sum-of-absolute-differences.
llvm-svn: 22757
|
| |
|
|
|
|
|
| |
reflects what the hardware is capable of. This significantly simplifies
the CC handling logic throughout the ISel.
llvm-svn: 22756
|
| |
|
|
|
|
| |
implement SELECT.
llvm-svn: 22755
|
| |
|
|
|
|
|
|
|
| |
1. Clean up how SelectIntImmediateExpr handles use counts.
2. "Subtract from" was not clearing hi 16 bits.
Patch by Jim Laskey
llvm-svn: 22754
|
| |
|
|
| |
llvm-svn: 22753
|
| |
|
|
| |
llvm-svn: 22752
|
| |
|
|
| |
llvm-svn: 22751
|
| |
|
|
|
|
| |
Patch by Jim Laskey!
llvm-svn: 22750
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
New routine: ISel::SelectIntImmediateExpr
2. Now checking use counts of large constants. If use count is > 2 then drop
thru so that the constant gets loaded into a register.
Source:
int %test1(int %a) {
entry:
%tmp.1 = add int %a, 123456789 ; <int> [#uses=1]
%tmp.2 = or int %tmp.1, 123456789 ; <int> [#uses=1]
%tmp.3 = xor int %tmp.2, 123456789 ; <int> [#uses=1]
%tmp.4 = sub int %tmp.3, -123456789 ; <int> [#uses=1]
ret int %tmp.4
}
Did Emit:
.machine ppc970
.text
.align 2
.globl _test1
_test1:
.LBB_test1_0: ; entry
addi r2, r3, -13035
addis r2, r2, 1884
ori r2, r2, 52501
oris r2, r2, 1883
xori r2, r2, 52501
xoris r2, r2, 1883
addi r2, r2, 52501
addis r3, r2, 1883
blr
Now Emits:
.machine ppc970
.text
.align 2
.globl _test1
_test1:
.LBB_test1_0: ; entry
lis r2, 1883
ori r2, r2, 52501
add r3, r3, r2
or r3, r3, r2
xor r3, r3, r2
add r3, r3, r2
blr
Patch by Jim Laskey!
llvm-svn: 22749
|
| |
|
|
|
|
|
| |
be an infinite loop when using g++-4.0.1*, this kills the ia64 nightly
tester. A proper fix shall be forthcoming!!! thanks for not killing me. :)
llvm-svn: 22748
|
| |
|
|
| |
llvm-svn: 22747
|
| |
|
|
|
|
|
| |
into just Y. This often occurs when it seperates loops that have collapsed loop
headers. This implements LoopSimplify/phi-node-simplify.ll
llvm-svn: 22746
|
| |
|
|
| |
llvm-svn: 22745
|
| |
|
|
|
|
| |
constant stride. This implements Transforms/IndVarsSimplify/variable-stride-ivs.ll
llvm-svn: 22744
|
| |
|
|
| |
llvm-svn: 22743
|
| |
|
|
| |
llvm-svn: 22742
|
| |
|
|
| |
llvm-svn: 22741
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
stride.
For code like this:
void foo(float *a, float *b, int n, int stride_a, int stride_b) {
int i;
for (i=0; i<n; i++)
a[i*stride_a] = b[i*stride_b];
}
we now emit:
.LBB_foo2_2: ; no_exit
lfs f0, 0(r4)
stfs f0, 0(r3)
addi r7, r7, 1
add r4, r2, r4
add r3, r6, r3
cmpw cr0, r7, r5
blt .LBB_foo2_2 ; no_exit
instead of:
.LBB_foo_2: ; no_exit
mullw r8, r2, r7 ;; multiply!
slwi r8, r8, 2
lfsx f0, r4, r8
mullw r8, r2, r6 ;; multiply!
slwi r8, r8, 2
stfsx f0, r3, r8
addi r2, r2, 1
cmpw cr0, r2, r5
blt .LBB_foo_2 ; no_exit
loops with variable strides occur pretty often. For example, in SPECFP2K
there are 317 variable strides in 177.mesa, 3 in 179.art, 14 in 188.ammp,
56 in 168.wupwise, 36 in 172.mgrid.
Now we can allow indvars to turn functions written like this:
void foo2(float *a, float *b, int n, int stride_a, int stride_b) {
int i, ai = 0, bi = 0;
for (i=0; i<n; i++)
{
a[ai] = b[bi];
ai += stride_a;
bi += stride_b;
}
}
into code like the above for better analysis. With this patch, they generate
identical code.
llvm-svn: 22740
|
| |
|
|
|
|
| |
by being more careful about updating PHI nodes
llvm-svn: 22739
|
| |
|
|
| |
llvm-svn: 22738
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
Once we compute the evolution for a GEP, tell SE about it. This allows users
of the GEP to know it, if the users are not direct. This allows us to compile
this testcase:
void fbSolidFillmmx(int w, unsigned char *d) {
while (w >= 64) {
*(unsigned long long *) (d + 0) = 0;
*(unsigned long long *) (d + 8) = 0;
*(unsigned long long *) (d + 16) = 0;
*(unsigned long long *) (d + 24) = 0;
*(unsigned long long *) (d + 32) = 0;
*(unsigned long long *) (d + 40) = 0;
*(unsigned long long *) (d + 48) = 0;
*(unsigned long long *) (d + 56) = 0;
w -= 64;
d += 64;
}
}
into:
.LBB_fbSolidFillmmx_2: ; no_exit
li r2, 0
stw r2, 0(r4)
stw r2, 4(r4)
stw r2, 8(r4)
stw r2, 12(r4)
stw r2, 16(r4)
stw r2, 20(r4)
stw r2, 24(r4)
stw r2, 28(r4)
stw r2, 32(r4)
stw r2, 36(r4)
stw r2, 40(r4)
stw r2, 44(r4)
stw r2, 48(r4)
stw r2, 52(r4)
stw r2, 56(r4)
stw r2, 60(r4)
addi r4, r4, 64
addi r3, r3, -64
cmpwi cr0, r3, 63
bgt .LBB_fbSolidFillmmx_2 ; no_exit
instead of:
.LBB_fbSolidFillmmx_2: ; no_exit
li r11, 0
stw r11, 0(r4)
stw r11, 4(r4)
stwx r11, r10, r4
add r12, r10, r4
stw r11, 4(r12)
stwx r11, r9, r4
add r12, r9, r4
stw r11, 4(r12)
stwx r11, r8, r4
add r12, r8, r4
stw r11, 4(r12)
stwx r11, r7, r4
add r12, r7, r4
stw r11, 4(r12)
stwx r11, r6, r4
add r12, r6, r4
stw r11, 4(r12)
stwx r11, r5, r4
add r12, r5, r4
stw r11, 4(r12)
stwx r11, r2, r4
add r12, r2, r4
stw r11, 4(r12)
addi r4, r4, 64
addi r3, r3, -64
cmpwi cr0, r3, 63
bgt .LBB_fbSolidFillmmx_2 ; no_exit
llvm-svn: 22737
|
| |
|
|
| |
llvm-svn: 22736
|
| |
|
|
| |
llvm-svn: 22735
|
| |
|
|
| |
llvm-svn: 22734
|
| |
|
|
| |
llvm-svn: 22733
|
| |
|
|
| |
llvm-svn: 22732
|
| |
|
|
| |
llvm-svn: 22731
|
| |
|
|
| |
llvm-svn: 22730
|