| Commit message (Collapse) | Author | Age | Files | Lines |
| |
|
|
| |
llvm-svn: 30967
|
| |
|
|
|
|
| |
compile-time performance.
llvm-svn: 30896
|
| |
|
|
|
|
| |
PR892 and Transforms/ScalarRepl/union-pointer.ll:test2
llvm-svn: 30825
|
| |
|
|
| |
llvm-svn: 30823
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
SimplifyDemandedBits. The idea is that some operations can be simplified if
not all of the computed elements are needed. Some targets (like x86) have a
large number of intrinsics that operate on a single element, but pass other
elts through unmodified. If those other elements are not needed, the
intrinsics can be simplified to scalar operations, and insertelement ops can
be removed.
This turns (f.e.):
ushort %Convert_sse(float %f) {
%tmp = insertelement <4 x float> undef, float %f, uint 0 ; <<4 x float>> [#uses=1]
%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, uint 1 ; <<4 x float>> [#uses=1]
%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, uint 2 ; <<4 x float>> [#uses=1]
%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, uint 3 ; <<4 x float>> [#uses=1]
%tmp28 = tail call <4 x float> %llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp37 = tail call <4 x float> %llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <int> [#uses=1]
%tmp69 = cast int %tmp to ushort ; <ushort> [#uses=1]
ret ushort %tmp69
}
into:
ushort %Convert_sse(float %f) {
entry:
%tmp28 = sub float %f, 1.000000e+00 ; <float> [#uses=1]
%tmp37 = mul float %tmp28, 5.000000e-01 ; <float> [#uses=1]
%tmp375 = insertelement <4 x float> undef, float %tmp37, uint 0 ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> %llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> %llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <int> [#uses=1]
%tmp69 = cast int %tmp to ushort ; <ushort> [#uses=1]
ret ushort %tmp69
}
which improves codegen from:
_Convert_sse:
movss LCPI1_0, %xmm0
movss 4(%esp), %xmm1
subss %xmm0, %xmm1
movss LCPI1_1, %xmm0
mulss %xmm0, %xmm1
movss LCPI1_2, %xmm0
minss %xmm0, %xmm1
xorps %xmm0, %xmm0
maxss %xmm0, %xmm1
cvttss2si %xmm1, %eax
andl $65535, %eax
ret
to:
_Convert_sse:
movss 4(%esp), %xmm0
subss LCPI1_0, %xmm0
mulss LCPI1_1, %xmm0
movss LCPI1_2, %xmm1
minss %xmm1, %xmm0
xorps %xmm1, %xmm1
maxss %xmm1, %xmm0
cvttss2si %xmm0, %eax
andl $65535, %eax
ret
This is just a first step, it can be extended in many ways. Testcase here:
Transforms/InstCombine/vec_demanded_elts.ll
llvm-svn: 30752
|
| |
|
|
|
|
|
| |
Ensure that we copy KnownProperties before calling visitBasicBlock, else
we may leak properties into blocks where they don't belong.
llvm-svn: 30705
|
| |
|
|
|
|
| |
Fix SwitchInst where dest-block is the same as one of the cases.
llvm-svn: 30700
|
| |
|
|
|
|
| |
optimize in more cases.
llvm-svn: 30699
|
| |
|
|
|
|
|
| |
the alignment when promoting allocations. This implements
InstCombine/cast.ll:test32
llvm-svn: 30682
|
| |
|
|
|
|
| |
ConstantBool::getTrue() and ConstantBool::getFalse().
llvm-svn: 30665
|
| |
|
|
| |
llvm-svn: 30623
|
| |
|
|
| |
llvm-svn: 30588
|
| |
|
|
| |
llvm-svn: 30555
|
| |
|
|
| |
llvm-svn: 30552
|
| |
|
|
|
|
| |
with the right answer.
llvm-svn: 30550
|
| |
|
|
|
|
| |
Fixes infinite loop in resolve().
llvm-svn: 30540
|
| |
|
|
| |
llvm-svn: 30535
|
| |
|
|
|
|
|
| |
this comparison, but never checked it. Whoops, no wonder we miscompiled
177.mesa!
llvm-svn: 30511
|
| |
|
|
| |
llvm-svn: 30505
|
| |
|
|
| |
llvm-svn: 30504
|
| |
|
|
|
|
| |
critical in the linux kernel for pointer analysis correctness
llvm-svn: 30496
|
| |
|
|
| |
llvm-svn: 30482
|
| |
|
|
|
|
|
| |
that we can't modify the CFG any more, at least not until it's possible
to update the dominator tree (PR217).
llvm-svn: 30469
|
| |
|
|
| |
llvm-svn: 30465
|
| |
|
|
| |
llvm-svn: 30456
|
| |
|
|
| |
llvm-svn: 30450
|
| |
|
|
|
|
| |
Use isLogicalShift/isArithmeticShift to simplify code.
llvm-svn: 30448
|
| |
|
|
| |
llvm-svn: 30405
|
| |
|
|
| |
llvm-svn: 30305
|
| |
|
|
| |
llvm-svn: 30304
|
| |
|
|
| |
llvm-svn: 30298
|
| |
|
|
|
|
| |
Handle this. This fixes PR908 and Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll
llvm-svn: 30275
|
| |
|
|
| |
llvm-svn: 30266
|
| |
|
|
| |
llvm-svn: 30251
|
| |
|
|
|
|
| |
bit. This fixes Regression/Transforms/TailDup/MergeTest.ll
llvm-svn: 30237
|
| |
|
|
|
|
|
|
|
| |
operations (like findProperties) should be faster, at the expense of
unionSets being slower in cases that are rare in practise.
Don't erase a dead Instruction. This fixes a memory corruption issue.
llvm-svn: 30235
|
| |
|
|
| |
llvm-svn: 30234
|
| |
|
|
|
|
| |
This implements select.ll::test18.
llvm-svn: 30230
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
For Transforms/TailDup/if-tail-dup.ll, f.e., it produces:
_foo:
movl 8(%esp), %eax
movl 4(%esp), %ecx
testl $1, %ecx
je LBB1_2 #cond_next
LBB1_1: #cond_true
movl $1, (%eax)
LBB1_2: #cond_next
testl $2, %ecx
je LBB1_4 #cond_next10
LBB1_3: #cond_true6
movl $1, 4(%eax)
LBB1_4: #cond_next10
testl $4, %ecx
je LBB1_6 #cond_next18
LBB1_5: #cond_true14
movl $1, 8(%eax)
LBB1_6: #cond_next18
testl $8, %ecx
je LBB1_8 #return
LBB1_7: #cond_true22
movl $1, 12(%eax)
ret
LBB1_8: #return
ret
instead of:
_foo:
movl 4(%esp), %eax
testl $2, %eax
sete %cl
movl 8(%esp), %edx
testl $1, %eax
je LBB1_2 #cond_next
LBB1_1: #cond_true
movl $1, (%edx)
testb %cl, %cl
jne LBB1_4 #cond_next10
jmp LBB1_3 #cond_true6
LBB1_2: #cond_next
testb %cl, %cl
jne LBB1_4 #cond_next10
LBB1_3: #cond_true6
movl $1, 4(%edx)
testl $4, %eax
je LBB1_6 #cond_next18
jmp LBB1_5 #cond_true14
LBB1_4: #cond_next10
testl $4, %eax
je LBB1_6 #cond_next18
LBB1_5: #cond_true14
movl $1, 8(%edx)
testl $8, %eax
je LBB1_8 #return
jmp LBB1_7 #cond_true22
LBB1_6: #cond_next18
testl $8, %eax
je LBB1_8 #return
LBB1_7: #cond_true22
movl $1, 12(%edx)
ret
LBB1_8: #return
ret
llvm-svn: 30158
|
| |
|
|
|
|
|
|
| |
Reorder operations to remove duplicated work.
Fix to leave floating-point types out of the optimization.
Add tests to predsimplify.ll for SwitchInst and SelectInst handling.
llvm-svn: 30055
|
| |
|
|
|
|
| |
corrects missing optimization opportunity removing cases from a switch.
llvm-svn: 30009
|
| |
|
|
|
|
|
|
| |
another Value) weren't being found by findProperties.
This fixes predsimplify.ll test6, a missed optimization opportunity.
llvm-svn: 29991
|
| |
|
|
|
|
|
|
|
| |
If a branch's condition has become a ConstantBool, simplify it immediately.
Removing the edge saves work and exposes up more optimization opportunities
in the pass.
Add support for SelectInst.
llvm-svn: 29970
|
| |
|
|
|
|
|
|
| |
exit blocks. The output is dependent on addresses of basic block.
Add and use Loop::getUniqueExitBlocks.
llvm-svn: 29966
|
| |
|
|
| |
llvm-svn: 29950
|
| |
|
|
|
|
|
| |
and simplifies expressions. This implements the optimization described
in PR807.
llvm-svn: 29947
|
| |
|
|
|
|
|
|
| |
speedup of
gccas on 252.eon
llvm-svn: 29936
|
| |
|
|
| |
llvm-svn: 29925
|
| |
|
|
| |
llvm-svn: 29911
|
| |
|
|
|
|
|
|
| |
This was
causing a crash in 175.vpr
llvm-svn: 29887
|