| Commit message (Collapse) | Author | Age | Files | Lines |
| |
|
|
| |
llvm-svn: 66725
|
| |
|
|
|
|
| |
optimization too late and left the live intervals to be out of sync with instructions. This fixes 8b10b.
llvm-svn: 66715
|
| |
|
|
|
|
| |
clear some bits.
llvm-svn: 66684
|
| |
|
|
|
|
| |
pshuflw/hw.
llvm-svn: 66645
|
| |
|
|
|
|
|
|
|
| |
alignment of the generated constant pool entry to the
desired alignment of a type. If we don't do this, we end up
trying to do movsd from 4-byte alignment memory. This fixes
450.soplex and 456.hmmer.
llvm-svn: 66641
|
| |
|
|
|
|
|
| |
1. Use the same value# to represent unknown values being merged into sub-registers.
2. When coalescer commute an instruction and the destination is a physical register, update its sub-registers by merging in the extended ranges.
llvm-svn: 66610
|
| |
|
|
| |
llvm-svn: 66581
|
| |
|
|
|
|
| |
/ Darwin.
llvm-svn: 66574
|
| |
|
|
| |
llvm-svn: 66534
|
| |
|
|
|
|
| |
format strings with the standard ${:foo} syntax.
llvm-svn: 66527
|
| |
|
|
| |
llvm-svn: 66434
|
| |
|
|
|
|
| |
optimizer can create values of funky scalar types.
llvm-svn: 66429
|
| |
|
|
|
|
| |
the same instruction as kill. This fixes PR3706.
llvm-svn: 66428
|
| |
|
|
|
|
| |
version accordingly.
llvm-svn: 66365
|
| |
|
|
| |
llvm-svn: 66363
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
For 2009-03-07-FPConstSelect.ll we now produce:
_f:
xorl %eax, %eax
testl %edi, %edi
movl $4, %ecx
cmovne %rax, %rcx
leaq LCPI1_0(%rip), %rax
movss (%rcx,%rax), %xmm0
ret
previously we produced:
_f:
subl $4, %esp
cmpl $0, 8(%esp)
movss LCPI1_0, %xmm0
je LBB1_2 ## entry
LBB1_1: ## entry
movss LCPI1_1, %xmm0
LBB1_2: ## entry
movss %xmm0, (%esp)
flds (%esp)
addl $4, %esp
ret
on PPC the code also improves to:
_f:
cntlzw r2, r3
srwi r2, r2, 5
li r3, lo16(LCPI1_0)
slwi r2, r2, 2
addis r3, r3, ha16(LCPI1_0)
lfsx f1, r3, r2
blr
from:
_f:
li r2, lo16(LCPI1_1)
cmplwi cr0, r3, 0
addis r2, r2, ha16(LCPI1_1)
beq cr0, LBB1_2 ; entry
LBB1_1: ; entry
li r2, lo16(LCPI1_0)
addis r2, r2, ha16(LCPI1_0)
LBB1_2: ; entry
lfs f1, 0(r2)
blr
This also improves the existing pic-cpool case from:
foo:
subl $12, %esp
call .Lllvm$1.$piclabel
.Lllvm$1.$piclabel:
popl %eax
addl $_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax
cmpl $0, 16(%esp)
movsd .LCPI1_0@GOTOFF(%eax), %xmm0
je .LBB1_2 # entry
.LBB1_1: # entry
movsd .LCPI1_1@GOTOFF(%eax), %xmm0
.LBB1_2: # entry
movsd %xmm0, (%esp)
fldl (%esp)
addl $12, %esp
ret
to:
foo:
call .Lllvm$1.$piclabel
.Lllvm$1.$piclabel:
popl %eax
addl $_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax
xorl %ecx, %ecx
cmpl $0, 4(%esp)
movl $8, %edx
cmovne %ecx, %edx
fldl .LCPI1_0@GOTOFF(%eax,%edx)
ret
This triggers a few dozen times in spec FP 2000.
llvm-svn: 66358
|
| |
|
|
|
|
|
|
|
|
|
| |
the same say the "test" instruction does in overflow cases,
so eliminating the test is only safe when those bits aren't
needed, as is the case for COND_E and COND_NE, or if it
can be proven that no overflow will occur. For now, just
restrict the optimization to COND_E and COND_NE and don't
do any overflow analysis.
llvm-svn: 66318
|
| |
|
|
|
|
|
|
|
|
|
| |
with multiple chain operands. This can occur when the scheduler
has added chain operands to a node that already has a chain
operand, in order to handle physical register dependencies.
This fixes an llvm-gcc bootstrap failure on x86-64 introduced
in r66058.
llvm-svn: 66240
|
| |
|
|
|
|
|
| |
negative one, as subtracts of immediates are canonicalized
to adds.
llvm-svn: 66180
|
| |
|
|
|
|
| |
there should be no spilling of anything.
llvm-svn: 66179
|
| |
|
|
| |
llvm-svn: 66140
|
| |
|
|
| |
llvm-svn: 66058
|
| |
|
|
|
|
| |
Update a testcase to check this.
llvm-svn: 66029
|
| |
|
|
| |
llvm-svn: 66024
|
| |
|
|
| |
llvm-svn: 66021
|
| |
|
|
| |
llvm-svn: 66008
|
| |
|
|
| |
llvm-svn: 66006
|
| |
|
|
|
|
| |
result from add, sub, inc, and dec instructions in simple cases.
llvm-svn: 66004
|
| |
|
|
|
|
| |
what llvm-gcc generates so codegen knows flags register is being clobbered by inline asm. 2. BURR scheduler should also check if inline asm nodes can clobber "live" physical registers. Previously it was only checking target nodes with implicit defs.
llvm-svn: 65996
|
| |
|
|
|
|
|
|
|
|
|
|
| |
so it changed it into a 31 via the TLO.ShrinkDemandedConstant() call. Then it
would go through the DAG combiner again. This time it had a value of 31, which
was turned into a -1 by TLI.SimplifyDemandedBits(). This would ping pong
forever.
Teach the TLO.ShrinkDemandedConstant() call not to lower a value if the demanded
value is an XOR of all ones.
llvm-svn: 65985
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
extracts + build_vector into a shuffle would fail, because the
type of the new build_vector would not be legal. Try harder to
create a legal build_vector type. Note: this will be totally
irrelevant once vector_shuffle no longer takes a build_vector for
shuffle mask.
New:
_foo:
xorps %xmm0, %xmm0
xorps %xmm1, %xmm1
subps %xmm1, %xmm1
mulps %xmm0, %xmm1
addps %xmm0, %xmm1
movaps %xmm1, 0
Old:
_foo:
xorps %xmm0, %xmm0
movss %xmm0, %xmm1
xorps %xmm2, %xmm2
unpcklps %xmm1, %xmm2
pshufd $80, %xmm1, %xmm1
unpcklps %xmm1, %xmm2
pslldq $16, %xmm2
pshufd $57, %xmm2, %xmm1
subps %xmm0, %xmm1
mulps %xmm0, %xmm1
addps %xmm0, %xmm1
movaps %xmm1, 0
llvm-svn: 65791
|
| |
|
|
|
|
|
|
|
|
|
| |
Look for situations like this:
%reg1024<def> = MOV r1
%reg1025<def> = MOV r0
%reg1026<def> = ADD %reg1024, %reg1025
r0 = MOV %reg1026
Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
llvm-svn: 65752
|
| |
|
|
| |
llvm-svn: 65679
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
pic | declaration | linkage | visibility |
!pic | declaration | external | default | tls1.ll tls2.ll | local exec
pic | declaration | external | default | tls1-pic.ll tls2-pic.ll | general dynamic
!pic | !declaration | external | default | tls3.ll tls4.ll | initial exec
pic | !declaration | external | default | tls3-pic.ll tls4-pic.ll | general dynamic
!pic | declaration | external | hidden | tls7.ll tls8.ll | local exec
pic | declaration | external | hidden | X | local dynamic
!pic | !declaration | external | hidden | tls9.ll tls10.ll | local exec
pic | !declaration | external | hidden | X | local dynamic
!pic | declaration | internal | default | tls5.ll tls6.ll | local exec
pic | declaration | internal | default | X | local dynamic
The ones marked with an X have not been implemented since local dynamic is not implemented.
llvm-svn: 65632
|
| |
|
|
| |
llvm-svn: 65600
|
| |
|
|
|
|
| |
implicit_def's.
llvm-svn: 65592
|
| |
|
|
|
|
| |
intrinsics expect the high bits will not be modified.
llvm-svn: 65499
|
| |
|
|
|
|
| |
already marked livein.
llvm-svn: 65498
|
| |
|
|
| |
llvm-svn: 65482
|
| |
|
|
|
|
| |
if it sees TLS addresses.
llvm-svn: 65341
|
| |
|
|
|
|
| |
for avoiding dynamic stack realignment.
llvm-svn: 65319
|
| |
|
|
| |
llvm-svn: 65313
|
| |
|
|
| |
llvm-svn: 65312
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
Generate better code for v16i8 shuffles on SSE2 (avoids stack)
Generate pshufb for v8i16 and v16i8 shuffles on SSSE3 where it is fewer uops.
Document the shuffle matching logic and add some FIXMEs for later further
cleanups.
New tests that test the above.
Examples:
New:
_shuf2:
pextrw $7, %xmm0, %eax
punpcklqdq %xmm1, %xmm0
pshuflw $128, %xmm0, %xmm0
pinsrw $2, %eax, %xmm0
Old:
_shuf2:
pextrw $2, %xmm0, %eax
pextrw $7, %xmm0, %ecx
pinsrw $2, %ecx, %xmm0
pinsrw $3, %eax, %xmm0
movd %xmm1, %eax
pinsrw $4, %eax, %xmm0
ret
=========
New:
_shuf4:
punpcklqdq %xmm1, %xmm0
pshufb LCPI1_0, %xmm0
Old:
_shuf4:
pextrw $3, %xmm0, %eax
movsd %xmm1, %xmm0
pextrw $3, %xmm1, %ecx
pinsrw $4, %ecx, %xmm0
pinsrw $5, %eax, %xmm0
========
New:
_shuf1:
pushl %ebx
pushl %edi
pushl %esi
pextrw $1, %xmm0, %eax
rolw $8, %ax
movd %xmm0, %ecx
rolw $8, %cx
pextrw $5, %xmm0, %edx
pextrw $4, %xmm0, %esi
pextrw $3, %xmm0, %edi
pextrw $2, %xmm0, %ebx
movaps %xmm0, %xmm1
pinsrw $0, %ecx, %xmm1
pinsrw $1, %eax, %xmm1
rolw $8, %bx
pinsrw $2, %ebx, %xmm1
rolw $8, %di
pinsrw $3, %edi, %xmm1
rolw $8, %si
pinsrw $4, %esi, %xmm1
rolw $8, %dx
pinsrw $5, %edx, %xmm1
pextrw $7, %xmm0, %eax
rolw $8, %ax
movaps %xmm1, %xmm0
pinsrw $7, %eax, %xmm0
popl %esi
popl %edi
popl %ebx
ret
Old:
_shuf1:
subl $252, %esp
movaps %xmm0, (%esp)
movaps %xmm0, 16(%esp)
movaps %xmm0, 32(%esp)
movaps %xmm0, 48(%esp)
movaps %xmm0, 64(%esp)
movaps %xmm0, 80(%esp)
movaps %xmm0, 96(%esp)
movaps %xmm0, 224(%esp)
movaps %xmm0, 208(%esp)
movaps %xmm0, 192(%esp)
movaps %xmm0, 176(%esp)
movaps %xmm0, 160(%esp)
movaps %xmm0, 144(%esp)
movaps %xmm0, 128(%esp)
movaps %xmm0, 112(%esp)
movzbl 14(%esp), %eax
movd %eax, %xmm1
movzbl 22(%esp), %eax
movd %eax, %xmm2
punpcklbw %xmm1, %xmm2
movzbl 42(%esp), %eax
movd %eax, %xmm1
movzbl 50(%esp), %eax
movd %eax, %xmm3
punpcklbw %xmm1, %xmm3
punpcklbw %xmm2, %xmm3
movzbl 77(%esp), %eax
movd %eax, %xmm1
movzbl 84(%esp), %eax
movd %eax, %xmm2
punpcklbw %xmm1, %xmm2
movzbl 104(%esp), %eax
movd %eax, %xmm1
punpcklbw %xmm1, %xmm0
punpcklbw %xmm2, %xmm0
movaps %xmm0, %xmm1
punpcklbw %xmm3, %xmm1
movzbl 127(%esp), %eax
movd %eax, %xmm0
movzbl 135(%esp), %eax
movd %eax, %xmm2
punpcklbw %xmm0, %xmm2
movzbl 155(%esp), %eax
movd %eax, %xmm0
movzbl 163(%esp), %eax
movd %eax, %xmm3
punpcklbw %xmm0, %xmm3
punpcklbw %xmm2, %xmm3
movzbl 188(%esp), %eax
movd %eax, %xmm0
movzbl 197(%esp), %eax
movd %eax, %xmm2
punpcklbw %xmm0, %xmm2
movzbl 217(%esp), %eax
movd %eax, %xmm4
movzbl 225(%esp), %eax
movd %eax, %xmm0
punpcklbw %xmm4, %xmm0
punpcklbw %xmm2, %xmm0
punpcklbw %xmm3, %xmm0
punpcklbw %xmm1, %xmm0
addl $252, %esp
ret
llvm-svn: 65311
|
| |
|
|
|
|
|
|
|
| |
instruction. The class also consolidates the code for detecting constant
splats that's shared across PowerPC and the CellSPU backends (and might be
useful for other backends.) Also introduces SelectionDAG::getBUID_VECTOR() for
generating new BUILD_VECTOR nodes.
llvm-svn: 65296
|
| |
|
|
| |
llvm-svn: 65287
|
| |
|
|
|
|
| |
interval as well.
llvm-svn: 65279
|
| |
|
|
| |
llvm-svn: 65274
|
| |
|
|
|
|
|
|
|
|
|
| |
Now we're using one gross, but quite robust hack :) (previous ones
did not work, for example, when ext_weak symbol was used deep inside
constant expression in the initializer).
The proper fix of this problem will require some quite huge asmprinter
changes and that's why was postponed. This fixes PR3629 by the way :)
llvm-svn: 65230
|
| |
|
|
|
|
| |
registers, and it doesn't produce side effects, just delete the instruction.
llvm-svn: 65218
|