| Commit message (Collapse) | Author | Age | Files | Lines |
... | |
|
|
|
| |
llvm-svn: 17167
|
|
|
|
|
|
| |
test suite
llvm-svn: 17147
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
1. optional shift left
2. and x, immX
3. and y, immY
4. or z, x, y
==> rlwimi z, x, y, shift, mask begin, mask end
where immX == ~immY and immX is a run of set bits. This transformation
fires 32 times on voronoi, once on espresso, and probably several
dozen times on external benchmarks such as gcc.
To put this in terms of actual code generated for
struct B { unsigned a : 3; unsigned b : 2; };
void storeA (struct B *b, int v) { b->a = v;}
void storeB (struct B *b, int v) { b->b = v;}
Old:
_storeA:
rlwinm r2, r4, 0, 29, 31
lwz r4, 0(r3)
rlwinm r4, r4, 0, 0, 28
or r2, r4, r2
stw r2, 0(r3)
blr
_storeB:
rlwinm r2, r4, 3, 0, 28
rlwinm r2, r2, 0, 27, 28
lwz r4, 0(r3)
rlwinm r4, r4, 0, 29, 26
or r2, r2, r4
stw r2, 0(r3)
blr
New:
_storeA:
lwz r2, 0(r3)
rlwimi r2, r4, 0, 29, 31
stw r2, 0(r3)
blr
_storeB:
lwz r2, 0(r3)
rlwimi r2, r4, 3, 27, 28
stw r2, 0(r3)
blr
llvm-svn: 17078
|
|
|
|
|
|
|
|
|
|
| |
flag rotate left word immediate then mask insert (rlwimi) as a two-address
instruction, and update the ISel usage of the instruction accordingly.
This will allow us to properly schedule rlwimi, and use it to efficiently
codegen bitfield operations.
llvm-svn: 17068
|
|
|
|
| |
llvm-svn: 17050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
This transformation fires a few dozen times across the testsuite.
For example, int test2(int X) { return X ^ 0x0FF00FF0; }
Old:
_test2:
lis r2, 4080
ori r2, r2, 4080
xor r3, r3, r2
blr
New:
_test2:
xoris r3, r3, 4080
xori r3, r3, 4080
blr
llvm-svn: 17004
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
of one or more 1 bits (may wrap from least significant bit to most
significant bit) as the rlwinm rather than andi., andis., or some longer
instructons sequence.
int andn4(int z) { return z & -4; }
int clearhi(int z) { return z & 0x0000FFFF; }
int clearlo(int z) { return z & 0xFFFF0000; }
int clearmid(int z) { return z & 0x00FFFF00; }
int clearwrap(int z) { return z & 0xFF0000FF; }
_andn4:
rlwinm r3, r3, 0, 0, 29
blr
_clearhi:
rlwinm r3, r3, 0, 16, 31
blr
_clearlo:
rlwinm r3, r3, 0, 0, 15
blr
_clearmid:
rlwinm r3, r3, 0, 8, 23
blr
_clearwrap:
rlwinm r3, r3, 0, 24, 7
blr
llvm-svn: 16832
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
1. Fix an illegal argument to getClassB when deciding whether or not to
sign extend a byte load.
2. Initial addition of isLoad and isStore flags to the instruction .td file
for eventual use in a scheduler.
3. Rewrite of how constants are handled in emitSimpleBinaryOperation so
that we can emit the PowerPC shifted immediate instructions far more
often. This allows us to emit the following code:
int foo(int x) { return x | 0x00F0000; }
_foo:
.LBB_foo_0: ; entry
; IMPLICIT_DEF
oris r3, r3, 15
blr
llvm-svn: 16826
|
|
|
|
| |
llvm-svn: 16770
|
|
|
|
| |
llvm-svn: 16765
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
instruction.
Now, rather than emitting the following loop out of bisect:
.LBB_main_19: ; no_exit.0.i
rlwinm r3, r2, 3, 0, 28
lfdx f1, r3, r27
addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
lfd f2, lo16(.CPI_main_1-"L00000$pb")(r3)
fsub f2, f2, f1
addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
fcmpu cr0, f1, f4
bge .LBB_main_64 ; no_exit.0.i
.LBB_main_63: ; no_exit.0.i
b .LBB_main_65 ; no_exit.0.i
.LBB_main_64: ; no_exit.0.i
fmr f2, f1
.LBB_main_65: ; no_exit.0.i
addi r3, r2, 1
rlwinm r3, r3, 3, 0, 28
lfdx f1, r3, r27
addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
fsub f4, f4, f1
addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
lfd f5, lo16(.CPI_main_1-"L00000$pb")(r3)
fcmpu cr0, f1, f5
bge .LBB_main_67 ; no_exit.0.i
.LBB_main_66: ; no_exit.0.i
b .LBB_main_68 ; no_exit.0.i
.LBB_main_67: ; no_exit.0.i
fmr f4, f1
.LBB_main_68: ; no_exit.0.i
fadd f1, f2, f4
addis r3, r30, ha16(.CPI_main_2-"L00000$pb")
lfd f2, lo16(.CPI_main_2-"L00000$pb")(r3)
fmul f1, f1, f2
rlwinm r3, r2, 3, 0, 28
lfdx f2, r3, r28
fadd f4, f2, f1
fcmpu cr0, f4, f0
bgt .LBB_main_70 ; no_exit.0.i
.LBB_main_69: ; no_exit.0.i
b .LBB_main_71 ; no_exit.0.i
.LBB_main_70: ; no_exit.0.i
fmr f0, f4
.LBB_main_71: ; no_exit.0.i
fsub f1, f2, f1
addi r2, r2, -1
fcmpu cr0, f1, f3
blt .LBB_main_73 ; no_exit.0.i
.LBB_main_72: ; no_exit.0.i
b .LBB_main_74 ; no_exit.0.i
.LBB_main_73: ; no_exit.0.i
fmr f3, f1
.LBB_main_74: ; no_exit.0.i
cmpwi cr0, r2, -1
fmr f16, f0
fmr f17, f3
bgt .LBB_main_19 ; no_exit.0.i
We emit this instead:
.LBB_main_19: ; no_exit.0.i
rlwinm r3, r2, 3, 0, 28
lfdx f1, r3, r27
addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
lfd f2, lo16(.CPI_main_1-"L00000$pb")(r3)
fsub f2, f2, f1
fsel f1, f1, f1, f2
addi r3, r2, 1
rlwinm r3, r3, 3, 0, 28
lfdx f2, r3, r27
addis r3, r30, ha16(.CPI_main_1-"L00000$pb")
lfd f4, lo16(.CPI_main_1-"L00000$pb")(r3)
fsub f4, f4, f2
fsel f2, f2, f2, f4
fadd f1, f1, f2
addis r3, r30, ha16(.CPI_main_2-"L00000$pb")
lfd f2, lo16(.CPI_main_2-"L00000$pb")(r3)
fmul f1, f1, f2
rlwinm r3, r2, 3, 0, 28
lfdx f2, r3, r28
fadd f4, f2, f1
fsub f5, f0, f4
fsel f0, f5, f0, f4
fsub f1, f2, f1
addi r2, r2, -1
fsub f2, f1, f3
fsel f3, f2, f3, f1
cmpwi cr0, r2, -1
fmr f16, f0
fmr f17, f3
bgt .LBB_main_19 ; no_exit.0.i
llvm-svn: 16764
|
|
|
|
|
|
| |
instruction. Don't create overlapping register lifetimes
llvm-svn: 16580
|
|
|
|
| |
llvm-svn: 16578
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
integers that we can use as immediate values in instructions.
Example from yacr2:
- lis r10, -1
- ori r10, r10, 65535
- add r28, r28, r10
+ addi r28, r28, -1
addi r7, r7, 1
addi r9, r9, 1
b .LBB_main_9 ; loopentry.1.i214
llvm-svn: 16566
|
|
|
|
| |
llvm-svn: 16519
|
|
|
|
|
|
|
|
| |
the ISel to use indexed and non-zero immediate offsets for GEPs that have
more than one use. This is common for instruction sequences such as a load
followed by a modify and store to the same address.
llvm-svn: 16493
|
|
|
|
| |
llvm-svn: 16478
|
|
|
|
|
|
| |
C++ front-end in gcc does not mangle classes in anonymous namespaces correctly.
llvm-svn: 16470
|
|
|
|
|
|
|
| |
32 and 64 bit AsmWriters unified
Darwin and AIX specific features of AsmWriter split out
llvm-svn: 16163
|
|
|
|
| |
llvm-svn: 16142
|
|
|
|
|
|
|
|
| |
Move include/Config and include/Support into include/llvm/Config,
include/llvm/ADT and include/llvm/Support. From here on out, all LLVM
public header files must be under include/llvm/.
llvm-svn: 16137
|
|
|
|
|
|
|
|
|
| |
cast fp->bool
cast ulong->fp
algebraic right shift long by non-constant value
These changes tested across most of the test suite. Fixes Regression/casts
llvm-svn: 16081
|
|
|
|
| |
llvm-svn: 15991
|
|
|
|
|
|
|
|
| |
hurt a lot in others. Instead, improve branching version of SetCC and
Select instructions. The old code will be in CVS should we ever need to
dig it up again.
llvm-svn: 15979
|
|
|
|
|
|
| |
Add necessary instructions for this transformation to the .td file.
llvm-svn: 15952
|
|
|
|
| |
llvm-svn: 15926
|
|
|
|
| |
llvm-svn: 15914
|
|
|
|
|
|
|
|
| |
Change int->float cast code to put conversion constants in constant pool.
Shorten code sequence for constant pool fp loads.
Remove LOADLoDirect/LOADLoIndirect psuedo instructions and tweak asmwriter
llvm-svn: 15913
|
|
|
|
|
|
|
|
| |
that have a frame pointer. This change fixes Burg. In addition, make
the necessary changes to floating point code gen and constant loading after
Chris Lattner's fixes to the asm writer. These changes fix MallocBench/gs
llvm-svn: 15873
|
|
|
|
| |
llvm-svn: 15850
|
|
|
|
|
|
| |
sized allocas.
llvm-svn: 15806
|
|
|
|
|
|
| |
double store; optimize cmplwi generation.
llvm-svn: 15759
|
|
|
|
|
|
| |
linkage function addresses
llvm-svn: 15742
|
|
|
|
|
|
|
| |
Darwin. Also, change asm printer to output proper stubs for external
functions whose address is passed as an argument to aid in bugpointing.
llvm-svn: 15721
|
|
|
|
| |
llvm-svn: 15720
|
|
|
|
| |
llvm-svn: 15719
|
|
|
|
|
|
| |
printer.
llvm-svn: 15662
|
|
|
|
| |
llvm-svn: 15661
|
|
|
|
| |
llvm-svn: 15648
|
|
|
|
| |
llvm-svn: 15634
|
|
|
|
| |
llvm-svn: 15631
|
|
|
|
|
|
|
|
| |
Replace STDX (store 64 bit int indexed) with STFDX (store double indexed)
Fix latent bug in indexed load generation
Generate indexed loads and stores in many more cases
llvm-svn: 15626
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
Use a PowerPC specific prolog epilog inserter to control where spilled
callee save regs are placed on the stack.
Get rid of implicit return address stack slot, save return address reg
(LR) in appropriate slot
Improve code generated for functions that don't have calls or access
globals
Note from Chris: PowerPCPEI will eventually be eliminated, once the
functionality is merged into CodeGen/PrologEpilogInserter.cpp
llvm-svn: 15536
|
|
|
|
| |
llvm-svn: 15306
|
|
|
|
| |
llvm-svn: 15299
|
|
|
|
|
|
| |
Patch by Nate Begeman.
llvm-svn: 15282
|
|
|
|
| |
llvm-svn: 15270
|
|
|
|
|
|
|
|
|
|
| |
* Implemented GEP folding
* Dynamically output global address stuff once per function
* Fix casting fp<->short/byte
Patch contributed by Nate Begeman.
llvm-svn: 15237
|
|
|
|
| |
llvm-svn: 15146
|
|
|
|
| |
llvm-svn: 15143
|