| Commit message (Collapse) | Author | Age | Files | Lines |
| ... | |
| |
|
|
|
|
|
|
| |
problem where it inline the map insertion call too aggressively. Before this
change it was producing a frame size of 24k for Select_store(), now it's down
to 10k (by calling this method rather than calling the map insertion operator).
llvm-svn: 26094
|
| |
|
|
|
|
|
|
|
| |
Move all getTargetNode() out of SelectionDAG.h into SelectionDAG.cpp. This
prevents them from being inlined.
Change getTargetNode() so they return SDNode * instead of SDOperand to prevent
copying. It should also help compilation speed.
llvm-svn: 26083
|
| |
|
|
| |
llvm-svn: 26071
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
xori r6, r2, 1
rlwinm r6, r6, 0, 31, 31
cmpwi cr0, r6, 0
bne cr0, LBB1_3 ; endif
to this:
rlwinm r6, r2, 0, 31, 31
cmpwi cr0, r6, 0
beq cr0, LBB1_3 ; endif
llvm-svn: 26047
|
| |
|
|
| |
llvm-svn: 26006
|
| |
|
|
| |
llvm-svn: 26005
|
| |
|
|
| |
llvm-svn: 26001
|
| |
|
|
| |
llvm-svn: 26000
|
| |
|
|
|
|
| |
* Fix hasNUsesOfValue(), it should be const.
llvm-svn: 25990
|
| |
|
|
| |
llvm-svn: 25975
|
| |
|
|
| |
llvm-svn: 25960
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int %rlwnm(int %A, int %B) {
%C = call int asm "rlwnm $0, $1, $2, $3, $4", "=r,r,r,n,n"(int %A, int %B, int 4, int 17)
ret int %C
}
into:
_rlwnm:
or r2, r3, r3
or r3, r4, r4
rlwnm r2, r2, r3, 4, 17 ;; note the immediates :)
or r3, r2, r2
blr
llvm-svn: 25955
|
| |
|
|
| |
llvm-svn: 25952
|
| |
|
|
| |
llvm-svn: 25945
|
| |
|
|
|
|
| |
anyway.
llvm-svn: 25941
|
| |
|
|
| |
llvm-svn: 25935
|
| |
|
|
| |
llvm-svn: 25934
|
| |
|
|
| |
llvm-svn: 25920
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
and instruction. This allows us to compile stuff like this:
bool %X(int %X) {
%Y = add int %X, 14
%Z = setne int %Y, 12345
ret bool %Z
}
to this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
ret
instead of this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
andl $1, %eax
ret
This occurs quite a bit with the X86 backend. For example, 25 times in
lambda, 30 times in 177.mesa, 14 times in galgel, 70 times in fma3d,
25 times in vpr, several hundred times in gcc, ~45 times in crafty,
~60 times in parser, ~140 times in eon, 110 times in perlbmk, 55 on gap,
16 times on bzip2, 14 times on twolf, and 1-2 times in many other SPEC2K
programs.
llvm-svn: 25901
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
(C1-X) == C2 --> X == C1-C2
(X+C1) == C2 --> X == C2-C1
This allows us to compile this:
bool %X(int %X) {
%Y = add int %X, 14
%Z = setne int %Y, 12345
ret bool %Z
}
into this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
andl $1, %eax
ret
not this:
_X:
movl $14, %eax
addl 4(%esp), %eax
cmpl $12345, %eax
setne %al
movzbl %al, %eax
andl $1, %eax
ret
Testcase here: Regression/CodeGen/X86/compare-add.ll
nukage of the and coming up next.
llvm-svn: 25898
|
| |
|
|
| |
llvm-svn: 25895
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
| |
%C = call int asm "xyz $0, $1, $2, $3", "=r,r,r,0"(int %A, int %B, int 4)
and get:
xyz r2, r3, r4, r2
note that the r2's are pinned together. Yaay for 2-address instructions.
2342 ----------------------------------------------------------------------
llvm-svn: 25893
|
| |
|
|
| |
llvm-svn: 25879
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
compile:
int %test(int %A, int %B) {
%C = call int asm "xyz $0, $1, $2", "=r,r,r"(int %A, int %B)
ret int %C
}
into:
(0x8906130, LLVM BB @0x8902220):
%r2 = OR4 %r3, %r3
%r3 = OR4 %r4, %r4
INLINEASM <es:xyz $0, $1, $2>, %r2<def>, %r2, %r3
%r3 = OR4 %r2, %r2
BLR
which asmprints as:
_test:
or r2, r3, r3
or r3, r4, r4
xyz $0, $1, $2 ;; need to print the operands now :)
or r3, r2, r2
blr
llvm-svn: 25878
|
| |
|
|
|
|
| |
of the SELECT_CC, BR_CC, and BRTWOWAY_CC nodes.
llvm-svn: 25875
|
| |
|
|
| |
llvm-svn: 25865
|
| |
|
|
| |
llvm-svn: 25855
|
| |
|
|
|
|
| |
caused several test failures.
llvm-svn: 25852
|
| |
|
|
| |
llvm-svn: 25832
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int %test_cpuid(int %op) {
%B = alloca int
%C = alloca int
%D = alloca int
%A = call int asm "cpuid", "=eax,==ebx,==ecx,==edx,eax"(int* %B, int* %C, int* %D, int %op)
%Bv = load int* %B
%Cv = load int* %C
%Dv = load int* %D
%x = add int %A, %Bv
%y = add int %x, %Cv
%z = add int %y, %Dv
ret int %z
}
to this:
_test_cpuid:
sub %ESP, 16
mov DWORD PTR [%ESP], %EBX
mov %EAX, DWORD PTR [%ESP + 20]
cpuid
mov DWORD PTR [%ESP + 8], %ECX
mov DWORD PTR [%ESP + 12], %EBX
mov DWORD PTR [%ESP + 4], %EDX
mov %ECX, DWORD PTR [%ESP + 12]
add %EAX, %ECX
mov %ECX, DWORD PTR [%ESP + 8]
add %EAX, %ECX
mov %ECX, DWORD PTR [%ESP + 4]
add %EAX, %ECX
mov %EBX, DWORD PTR [%ESP]
add %ESP, 16
ret
... note the proper register allocation. :)
it is unclear to me why the loads aren't folded into the adds.
llvm-svn: 25827
|
| |
|
|
|
|
|
| |
a chance to custom legalize setcc, which broke a bunch of C++ Codes.
Testcase here: CodeGen/X86/2006-01-30-LongSetcc.ll
llvm-svn: 25821
|
| |
|
|
|
|
| |
of lowered target nodes.
llvm-svn: 25804
|
| |
|
|
|
|
| |
interface,making isMaskedValueZeroForTargetNode simpler, and useable from other partsof the compiler.
llvm-svn: 25803
|
| |
|
|
|
|
| |
to permit recursion
llvm-svn: 25799
|
| |
|
|
| |
llvm-svn: 25794
|
| |
|
|
| |
llvm-svn: 25785
|
| |
|
|
|
|
| |
instead.
llvm-svn: 25780
|
| |
|
|
| |
llvm-svn: 25774
|
| |
|
|
|
|
| |
custom expansion of ConstantFP nodes.
llvm-svn: 25772
|
| |
|
|
| |
llvm-svn: 25767
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
| |
faster. This cuts about 120 lines of code out of the legalizer (mostly code
checking to see if operands have changed).
It also fixes an ugly performance issue, where the legalizer cloned the entire
graph after any change. Now the "UpdateNodeOperands" method gives it a chance
to reuse nodes if the operands of a node change but not its opcode or valuetypes.
This speeds up instruction selection time on kimwitu++ by about 8.2% with a
release build.
llvm-svn: 25746
|
| |
|
|
| |
llvm-svn: 25744
|
| |
|
|
| |
llvm-svn: 25742
|
| |
|
|
| |
llvm-svn: 25740
|
| |
|
|
| |
llvm-svn: 25739
|
| |
|
|
| |
llvm-svn: 25738
|
| |
|
|
| |
llvm-svn: 25737
|
| |
|
|
|
|
|
|
|
|
|
| |
1. Pull out the expand cases for BSWAP and CT* into a separate function,
reducing the size of LegalizeOp.
2. Fix a bug where expand(bswap i64) was wrong when i64 is legal.
3. Changed LegalizeOp/PromoteOp so that the legalizer never needs to be
iterative. It now operates in a single pass over the nodes.
4. Simplify a LOT of code, with a net reduction of ~280 lines.
llvm-svn: 25736
|
| |
|
|
|
|
|
|
|
|
|
| |
relegalizes the stuff it returns.
Add the ability to custom expand ADD/SUB, so that targets don't need to deal
with ADD_PARTS/SUB_PARTS if they don't want.
Fix some obscure potential bugs and simplify code.
llvm-svn: 25732
|
| |
|
|
|
|
| |
ExpandLibCall do it itself.
llvm-svn: 25731
|