|  | Commit message (Collapse) | Author | Age | Files | Lines | 
|---|
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | get away with it, which exposes opportunities to eliminate the memory
objects entirely.  For example, we now compile byval.ll to:
define internal void @f1(i32 %b.0, i64 %b.1) {
entry:
	%tmp2 = add i32 %b.0, 1		; <i32> [#uses=0]
	ret void
}
define i32 @main() nounwind  {
entry:
	call void @f1( i32 1, i64 2 )
	ret i32 0
}
This seems like it would trigger a lot for code that passes around small
structs (e.g. SDOperand's or _Complex)...
llvm-svn: 45886 | 
| | 
| 
| 
| | llvm-svn: 45878 | 
| | 
| 
| 
| | llvm-svn: 45877 | 
| | 
| 
| 
| 
| 
| | whether an argument is byval and pass into isSafeToPromoteArgument.
llvm-svn: 45876 | 
| | 
| 
| 
| | llvm-svn: 45875 | 
| | 
| 
| 
| | llvm-svn: 45874 | 
| | 
| 
| 
| | llvm-svn: 45873 | 
| | 
| 
| 
| 
| 
| | copy of it in case the callee modifies the struct.
llvm-svn: 45853 | 
| | 
| 
| 
| 
| 
| | arithmetic.
llvm-svn: 45745 | 
| | 
| 
| 
| 
| 
| | incompatibility.
llvm-svn: 45704 | 
| | 
| 
| 
| | llvm-svn: 45675 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | ShadowStackCollector, which additionally has reduced overhead with
no sacrifice in portability.
Considering a function @fun with 8 loop-local roots,
ShadowStackCollector introduces the following overhead
(x86):
; shadowstack prologue
        movl    L_llvm_gc_root_chain$non_lazy_ptr, %eax
        movl    (%eax), %ecx
        movl    $___gc_fun, 20(%esp)
        movl    $0, 24(%esp)
        movl    $0, 28(%esp)
        movl    $0, 32(%esp)
        movl    $0, 36(%esp)
        movl    $0, 40(%esp)
        movl    $0, 44(%esp)
        movl    $0, 48(%esp)
        movl    $0, 52(%esp)
        movl    %ecx, 16(%esp)
        leal    16(%esp), %ecx
        movl    %ecx, (%eax)
; shadowstack loop overhead
        (none)
; shadowstack epilogue
        movl    48(%esp), %edx
        movl    %edx, (%ecx)
; shadowstack metadata
        .align  3
___gc_fun:                              # __gc_fun
        .long   8
        .space  4
In comparison to LowerGC:
; lowergc prologue
        movl    L_llvm_gc_root_chain$non_lazy_ptr, %eax
        movl    (%eax), %ecx
        movl    %ecx, 48(%esp)
        movl    $8, 52(%esp)
        movl    $0, 60(%esp)
        movl    $0, 56(%esp)
        movl    $0, 68(%esp)
        movl    $0, 64(%esp)
        movl    $0, 76(%esp)
        movl    $0, 72(%esp)
        movl    $0, 84(%esp)
        movl    $0, 80(%esp)
        movl    $0, 92(%esp)
        movl    $0, 88(%esp)
        movl    $0, 100(%esp)
        movl    $0, 96(%esp)
        movl    $0, 108(%esp)
        movl    $0, 104(%esp)
        movl    $0, 116(%esp)
        movl    $0, 112(%esp)
; lowergc loop overhead
        leal    44(%esp), %eax
        movl    %eax, 56(%esp)
        leal    40(%esp), %eax
        movl    %eax, 64(%esp)
        leal    36(%esp), %eax
        movl    %eax, 72(%esp)
        leal    32(%esp), %eax
        movl    %eax, 80(%esp)
        leal    28(%esp), %eax
        movl    %eax, 88(%esp)
        leal    24(%esp), %eax
        movl    %eax, 96(%esp)
        leal    20(%esp), %eax
        movl    %eax, 104(%esp)
        leal    16(%esp), %eax
        movl    %eax, 112(%esp)
; lowergc epilogue
        movl    48(%esp), %edx
        movl    %edx, (%ecx)
; lowergc metadata
        (none)
llvm-svn: 45670 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| | direct calls bails out unless caller and callee have essentially
equivalent parameter attributes.  This is illogical - the callee's
attributes should be of no relevance here.  Rework the logic, which
incidentally fixes a crash when removed arguments have attributes.
llvm-svn: 45658 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | a direct call with cast parameters and cast return
value (if any), instcombine was prepared to cast any
non-void return value into any other, whether castable
or not.  Add a new predicate for testing whether casting
is valid, and check it both for the return value and
(as a cleanup) for the parameters.
llvm-svn: 45657 | 
| | 
| 
| 
| | llvm-svn: 45613 | 
| | 
| 
| 
| 
| 
| 
| | things that are not equality comparisons, for example:
   (2147479553+4096)-2147479553 < 0    !=   (2147479553+4096) < 2147479553
llvm-svn: 45612 | 
| | 
| 
| 
| | llvm-svn: 45594 | 
| | 
| 
| 
| | llvm-svn: 45568 | 
| | 
| 
| 
| 
| 
| 
| | could theoretically introduce a trap, but is also a performance issue.
This speeds up ptrdist/ks by 8%.
llvm-svn: 45533 | 
| | 
| 
| 
| | llvm-svn: 45516 | 
| | 
| 
| 
| | llvm-svn: 45418 | 
| | 
| 
| 
| | llvm-svn: 45415 | 
| | 
| 
| 
| 
| 
| | should probably be a target-specific predicate based on address space. That way for targets where this isn't applicable the predicate can be optimized away.
llvm-svn: 45403 | 
| | 
| 
| 
| 
| 
| | have potential side-effects.
llvm-svn: 45392 | 
| | 
| 
| 
| 
| 
| | pointing out my stupid mistakes when writing this patch. :-)
llvm-svn: 45384 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | define i32 @main() {
entry:
	%z = alloca i32		; <i32*> [#uses=2]
	store i32 0, i32* %z
	%tmp = load i32* %z		; <i32> [#uses=1]
	%sub = sub i32 %tmp, 1		; <i32> [#uses=1]
	%cmp = icmp ult i32 %sub, 0		; <i1> [#uses=1]
	%retval = select i1 %cmp, i32 1, i32 0		; <i32> [#uses=1]
	ret i32 %retval
}
into ret 1, instead of ret 0.
Christopher, please investigate.
llvm-svn: 45383 | 
| | 
| 
| 
| 
| 
| | when copying functions.
llvm-svn: 45356 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| | it is only a partial fix.  This change is noise for most programs, but
speeds up Shootout-C++/matrix by 20%, Ptrdist/ks by 24%, smg2000 by 8%,
hexxagon by 9%, bzip2 by 9% (not sure I trust this), ackerman by 13%, etc.
OTOH, it slows down Shootout/fib2 by 40% (I'll update PR1877 with this info).
llvm-svn: 45354 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | define void @f() {
            ...
            call i32 @g()
            ...
    }
    define void @g() {
            ...
    }
The hazards are:
  - @f and @g have GC, but they differ GC. Inlining is invalid. This
    may never occur.
  - @f has no GC, but @g does. g's GC must be propagated to @f.
The other scenarios are safe:
  - @f and @g have the same GC.
  - @f and @g have no GC.
  - @g has no GC.
This patch adds inliner checks for the former two scenarios.
llvm-svn: 45351 | 
| | 
| 
| 
| 
| 
| 
| | When specified, don't split backedges of single-bb loops.
This helps address PR1877
llvm-svn: 45344 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | us to compile:
#include <math.h>
int t1(double d) { return signbit(d); }
into:
_t1:
	movd	%xmm0, %rax
	shrq	$63, %rax
	ret
instead of:
_t1:
	movd	%xmm0, %rax
	shrq	$32, %rax
	shrl	$31, %eax
	ret
on x86-64.
llvm-svn: 45311 | 
| | 
| 
| 
| 
| 
| 
| | not merge current bb and succ even if bb's terminator is
unconditional branch to succ.
llvm-svn: 45305 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | return attributes on the floor.  In the case of a call
to a varargs function where the varargs arguments are
being removed, any call attributes on those arguments
need to be dropped.  I didn't do this because I plan to
make it illegal to have such attributes (see next patch).
With this change, compiling the gcc filter2 eh test at -O0
and then running opt -std-compile-opts on it results in
a correctly working program (compiling at -O1 or higher
results in the test failing due to a problem with how we
output eh info into the IR).
llvm-svn: 45285 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| | (icmp slt (sub A B) 1) -> (icmp sle A B)
icmp sgt (sub A B) -1) -> (icmp sge A B)
and add testcase.
llvm-svn: 45256 | 
| | 
| 
| 
| 
| 
| | uses are addresses. This trades a constant multiply for one fewer iv.
llvm-svn: 45251 | 
| | 
| 
| 
| 
| 
| | has a single use, and generalize it to not require N to be a constant.
llvm-svn: 45250 | 
| | 
| 
| 
| | llvm-svn: 45230 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| | calls 'nounwind'.  It is important for correct C++
exception handling that nounwind markings do not get
lost, so this transformation is actually needed for
correctness.
llvm-svn: 45218 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | for this case on X86
from
_foo:
        movl    $99, %ecx
        movl    4(%esp), %eax
        subl    %eax, %ecx
        xorl    %edx, %edx
        testl   %ecx, %ecx
        cmovs   %edx, %eax
        ret
to
_foo:
        xorl    %ecx, %ecx
        movl    4(%esp), %eax
        cmpl    $99, %eax
        cmovg   %ecx, %eax
        ret
llvm-svn: 45173 | 
| | 
| 
| 
| | llvm-svn: 45170 | 
| | 
| 
| 
| 
| 
| | recent submission.
llvm-svn: 45169 | 
| | 
| 
| 
| 
| 
| | doesNotThrow.
llvm-svn: 45160 | 
| | 
| 
| 
| | llvm-svn: 45159 | 
| | 
| 
| 
| 
| 
| 
| 
| | eliminate subtractions. This code is often produced by the SMAX expansion in SCEV.
This implements test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
llvm-svn: 45158 | 
| | 
| 
| 
| 
| 
| 
| 
| | how to lower them (with no attempt made to be
efficient, since they should only occur for
unoptimized code).
llvm-svn: 45108 | 
| | 
| 
| 
| | llvm-svn: 45101 | 
| | 
| 
| 
| | llvm-svn: 45100 | 
| | 
| 
| 
| 
| 
| | passed the erased element.
llvm-svn: 45099 | 
| | 
| 
| 
| 
| 
| | of PointerType::get() has become PointerType::getUnqual(), which returns a pointer in the generic address space. The new prototype of PointerType::get() requires both a type and an address space.
llvm-svn: 45082 | 
| | 
| 
| 
| 
| 
| | changed not to reject invoke of inline asm.
llvm-svn: 45077 |