| Commit message (Collapse) | Author | Age | Files | Lines | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus2 (unsigned int x) { b.j += x; }
To:
_plus2:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        slwi r3, r3, 6
        add r3, r4, r3
        rlwimi r3, r4, 0, 26, 14
        stw r3, 0(r2)
        blr
instead of:
_plus2:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        rlwinm r5, r4, 26, 21, 31
        add r3, r5, r3
        rlwimi r4, r3, 6, 15, 25
        stw r4, 0(r2)
        blr
by eliminating an 'and'.
I'm pretty sure this is as small as we can go :)
llvm-svn: 23386
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus2 (unsigned int x) {
  b.j += x;
}
to:
plus2:
        mov %EAX, DWORD PTR [b]
        mov %ECX, %EAX
        and %ECX, 131008
        mov %EDX, DWORD PTR [%ESP + 4]
        shl %EDX, 6
        add %EDX, %ECX
        and %EDX, 131008
        and %EAX, -131009
        or %EDX, %EAX
        mov DWORD PTR [b], %EDX
        ret
instead of:
plus2:
        mov %EAX, DWORD PTR [b]
        mov %ECX, %EAX
        shr %ECX, 6
        and %ECX, 2047
        add %ECX, DWORD PTR [%ESP + 4]
        shl %ECX, 6
        and %ECX, 131008
        and %EAX, -131009
        or %ECX, %EAX
        mov DWORD PTR [b], %ECX
        ret
llvm-svn: 23385
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus3 (unsigned int x) { b.k += x; }
To:
plus3:
        mov %EAX, DWORD PTR [%ESP + 4]
        shl %EAX, 17
        add DWORD PTR [b], %EAX
        ret
instead of:
plus3:
        mov %EAX, DWORD PTR [%ESP + 4]
        shl %EAX, 17
        mov %ECX, DWORD PTR [b]
        add %EAX, %ECX
        and %EAX, -131072
        and %ECX, 131071
        or %ECX, %EAX
        mov DWORD PTR [b], %ECX
        ret
llvm-svn: 23384
 | 
| | 
| 
| 
|  | 
llvm-svn: 23383
 | 
| | 
| 
| 
|  | 
llvm-svn: 23382
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus3 (unsigned int x) {
  b.k += x;
}
to:
_plus3:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r3, 0(r2)
        rlwinm r4, r3, 0, 0, 14
        add r4, r4, r3
        rlwimi r4, r3, 0, 15, 31
        stw r4, 0(r2)
        blr
instead of:
_plus3:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        srwi r5, r4, 17
        add r3, r5, r3
        slwi r3, r3, 17
        rlwimi r3, r4, 0, 15, 31
        stw r3, 0(r2)
        blr
llvm-svn: 23381
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus1 (unsigned int x) {
  b.i += x;
}
as:
_plus1:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        add r3, r4, r3
        rlwimi r3, r4, 0, 0, 25
        stw r3, 0(r2)
        blr
instead of:
_plus1:
        lis r2, ha16(L_b$non_lazy_ptr)
        lwz r2, lo16(L_b$non_lazy_ptr)(r2)
        lwz r4, 0(r2)
        rlwinm r5, r4, 0, 26, 31
        add r3, r5, r3
        rlwimi r3, r4, 0, 0, 25
        stw r3, 0(r2)
        blr
llvm-svn: 23379
 | 
| | 
| 
| 
|  | 
llvm-svn: 23377
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
struct {
   unsigned int bit0:1;
   unsigned int ubyte:31;
} sdata;
void foo() {
  sdata.ubyte++;
}
into this:
foo:
        add DWORD PTR [sdata], 2
        ret
instead of this:
foo:
        mov %EAX, DWORD PTR [sdata]
        mov %ECX, %EAX
        add %ECX, 2
        and %ECX, -2
        and %EAX, 1
        or %EAX, %ECX
        mov DWORD PTR [sdata], %EAX
        ret
llvm-svn: 23376
 | 
| | 
| 
| 
|  | 
llvm-svn: 23374
 | 
| | 
| 
| 
|  | 
llvm-svn: 23371
 | 
| | 
| 
| 
|  | 
llvm-svn: 23366
 | 
| | 
| 
| 
|  | 
llvm-svn: 23357
 | 
| | 
| 
| 
|  | 
llvm-svn: 23356
 | 
| | 
| 
| 
| 
| 
| 
|  | 
specified.  The various *imm operands defined by PPC are really all i32,
even though the actual immediate is restricted to a smaller value in it.
llvm-svn: 23352
 | 
| | 
| 
| 
|  | 
llvm-svn: 23350
 | 
| | 
| 
| 
|  | 
llvm-svn: 23348
 | 
| | 
| 
| 
|  | 
llvm-svn: 23347
 | 
| | 
| 
| 
|  | 
llvm-svn: 23342
 | 
| | 
| 
| 
| 
| 
|  | 
can use/define class methods
llvm-svn: 23339
 | 
| | 
| 
| 
| 
| 
| 
| 
|  | 
with incoming arguments instead of the pregs themselves.  This fixes
the scheduler from causing problems by moving a copyfromreg for an argument
to after a select_cc node (now it can, and bad things won't happen).
llvm-svn: 23334
 | 
| | 
| 
| 
|  | 
llvm-svn: 23333
 | 
| | 
| 
| 
|  | 
llvm-svn: 23332
 | 
| | 
| 
| 
| 
| 
|  | 
into particular vregs, emit copies into the entry MBB.
llvm-svn: 23331
 | 
| | 
| 
| 
|  | 
llvm-svn: 23330
 | 
| | 
| 
| 
|  | 
llvm-svn: 23329
 | 
| | 
| 
| 
| 
| 
| 
|  | 
This is useful for 178.galgel where resolution of dope vectors (by the
optimizer) causes the scales to become apparent.
llvm-svn: 23328
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
|  | 
PHI node that is not the original PHI.
This fixes up a dot-product loop in galgel, speeding it up from 18.47s to
16.13s.
llvm-svn: 23327
 | 
| | 
| 
| 
| 
| 
|  | 
indentation, no functionality change
llvm-svn: 23325
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
|  | 
if () { store A -> P; } else { store B -> P; }
into a PHI node with one store, in the most trival case.  This implements
load.ll:test10.
llvm-svn: 23324
 | 
| | 
| 
| 
| 
| 
|  | 
each other.  This implements InstCombine/load.ll:test9
llvm-svn: 23322
 | 
| | 
| 
| 
| 
| 
| 
| 
|  | 
load are exactly consequtive.  This is picked up by other passes, but this
triggers thousands of times in fortran programs that use static locals
(and is thus a compile-time speedup).
llvm-svn: 23320
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
code for IV uses outside of loops that are not dominated by the latch block.
We should only convert these uses to use the post-inc value if they ARE
dominated by the latch block.
Also use a new LoopInfo method to simplify some code.
This fixes Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll
llvm-svn: 23318
 | 
| | 
| 
| 
|  | 
llvm-svn: 23315
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
li r2, 0
LBB_test_1:     ; no_exit.2
        li r5, 0
        stw r5, 0(r3)
        addi r2, r2, 1
        addi r3, r3, 4
        cmpwi cr0, r2, 701
        blt cr0, LBB_test_1     ; no_exit.2
LBB_test_2:     ; loopexit.2.loopexit
        addi r2, r2, 1
        stw r2, 0(r4)
        blr
[zion ~/llvm]$ cat > ~/xx
Uses of IV's outside of the loop should use hte post-incremented version
of the IV, not the preincremented version.  This helps many loops (e.g. in sixtrack)
which used to generate code like this (this is the code from the
dont-hoist-simple-loop-constants.ll testcase):
_test:
        li r2, 0                 **** IV starts at 0
LBB_test_1:     ; no_exit.2
        or r5, r2, r2            **** Copy for loop exit
        li r2, 0
        stw r2, 0(r3)
        addi r3, r3, 4
        addi r2, r5, 1
        addi r6, r5, 2           **** IV+2
        cmpwi cr0, r6, 701
        blt cr0, LBB_test_1     ; no_exit.2
LBB_test_2:     ; loopexit.2.loopexit
        addi r2, r5, 2       ****  IV+2
        stw r2, 0(r4)
        blr
And now generated code like this:
_test:
        li r2, 1               *** IV starts at 1
LBB_test_1:     ; no_exit.2
        li r5, 0
        stw r5, 0(r3)
        addi r2, r2, 1
        addi r3, r3, 4
        cmpwi cr0, r2, 701     *** IV.postinc + 0
        blt cr0, LBB_test_1
LBB_test_2:     ; loopexit.2.loopexit
        stw r2, 0(r4)          *** IV.postinc + 0
        blr
llvm-svn: 23313
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
We used to emit this code for it:
_test:
        li r2, 1     ;; Value tying up a register for the whole loop
        li r5, 0
LBB_test_1:     ; no_exit.2
        or r6, r5, r5
        li r5, 0
        stw r5, 0(r3)
        addi r5, r6, 1
        addi r3, r3, 4
        add r7, r2, r5  ;; should be addi r7, r5, 1
        cmpwi cr0, r7, 701
        blt cr0, LBB_test_1     ; no_exit.2
LBB_test_2:     ; loopexit.2.loopexit
        addi r2, r6, 2
        stw r2, 0(r4)
        blr
now we emit this:
_test:
        li r2, 0
LBB_test_1:     ; no_exit.2
        or r5, r2, r2
        li r2, 0
        stw r2, 0(r3)
        addi r3, r3, 4
        addi r2, r5, 1
        addi r6, r5, 2   ;; whoa, fold those adds!
        cmpwi cr0, r6, 701
        blt cr0, LBB_test_1     ; no_exit.2
LBB_test_2:     ; loopexit.2.loopexit
        addi r2, r5, 2
        stw r2, 0(r4)
        blr
more improvement coming.
llvm-svn: 23306
 | 
| | 
| 
| 
|  | 
llvm-svn: 23304
 | 
| | 
| 
| 
| 
| 
|  | 
when storing to an 8-bit memory location), as most don't.
llvm-svn: 23303
 | 
| | 
| 
| 
|  | 
llvm-svn: 23302
 | 
| | 
| 
| 
| 
| 
| 
|  | 
select (x < y), 1, 0 -> (x < y) incorrectly: the setcc returns i1 but the
select returned i32.  Add the zero extend as needed.
llvm-svn: 23301
 | 
| | 
| 
| 
|  | 
llvm-svn: 23300
 | 
| | 
| 
| 
| 
| 
|  | 
for FP as well.  This triggers a couple dozen times on 177.mesa (for example).
llvm-svn: 23299
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
with copies, leading to code like this:
       lwz r4, 380(r1)
       or r10, r4, r4    ;; Last use of r4
By teaching the PPC backend how to fold spills into copies, we now get this
code:
       lwz r10, 380(r1)
wow. :)
This reduces a testcase nate sent me from 1505 instructions to 1484.
Note that this could handle FP values but doesn't currently, for reasons
mentioned in the patch
llvm-svn: 23298
 | 
| | 
| 
| 
|  | 
llvm-svn: 23297
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
|  | 
are, simplify logic, and cause things to not be nested as deeply.  This also
uses MRI->areAliases instead of an explicit loop.
No functionality change, just code cleanup.
llvm-svn: 23296
 | 
| | 
| 
| 
| 
| 
|  | 
as setcc and select next.
llvm-svn: 23295
 | 
| | 
| 
| 
|  | 
llvm-svn: 23294
 | 
| | 
| 
| 
| 
| 
| 
| 
|  | 
only add a reload live range once for the instruction.  This is one step
towards fixing a regalloc pessimization that Nate notice, but is later undone
by the spiller (so no code is changed).
llvm-svn: 23293
 | 
| | 
| 
| 
| 
| 
| 
| 
|  | 
is zero.  This lets the register allocator elide some copies in some cases.
This implements CodeGen/PowerPC/rlwimi-commute.ll
llvm-svn: 23292
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
1. Add support for defining Pattern's, which can match expressions when there
   is no instruction that directly implements something.  Instructions usually
   implicitly define patterns.
2. Add support for defining SDNodeXForm's, which are node transformations.
   This seperates the concept of a node xform out from the existing predicate
   support.
Using this new stuff, we add a few instruction patterns, one for testing, and
two for OR/XOR by an arbitrary immediate.
llvm-svn: 23286
 |