| Commit message (Collapse) | Author | Age | Files | Lines | 
| | 
| 
| 
| 
| 
|  | 
intended to be a dlopenable module and not a "plain" shared library.
llvm-svn: 19456
 | 
| | 
| 
| 
|  | 
llvm-svn: 19455
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
passes the -module option on the libtool command line to ensure that the
shared library being built can be dlopened and dlsym can work on that
module. LOADABLE_MODULE should be sent only in conjunction with the
SHARED_LIBRARY directive. It should generally be used for any module that
is intended to be the target of an LLVM -load option. Note that loadable
modules will not have the lib prefix but otherwise look like shared
libraries. This is per the libtool recommendations and prevents these
special shared libraries from being linked in via -l option to the linker.
llvm-svn: 19454
 | 
| | 
| 
| 
|  | 
llvm-svn: 19453
 | 
| | 
| 
| 
|  | 
llvm-svn: 19452
 | 
| | 
| 
| 
| 
| 
| 
| 
|  | 
int -> FP casting code.  Note that we don't have to set it for FP operations
that take FP values as operands: whatever produces the FP value will set the
flag.
llvm-svn: 19451
 | 
| | 
| 
| 
| 
| 
|  | 
inverted the sense of the comparison.
llvm-svn: 19450
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
evaluate the LHS or the RHS of an operation first.  This causes good things
to happen.  For example, instead of compiling a loop to this:
.LBBstrength_result7_1: # loopentry
        movl 16(%esp), %edi
        movl (%edi), %edi             ;;; LOAD
        movl (%ecx), %ebx
        movl $2, (%eax,%ebx,4)
        movl (%edx), %ebx
        movl %esi, %ebp
        addl $21, %ebp
        addl $42, %esi
        cmpl $0, %edi                 ;;; USE
        cmovne %esi, %ebp
        cmpl %ebp, %ebx
        movl %ebp, %esi
        jg .LBBstrength_result7_1
We now compile it to this:
.LBBstrength_result7_1: # loopentry
        movl %edi, %ebx
        addl $42, %ebx
        addl $21, %edi
        movl (%ecx), %ebp              ;; LOAD
        cmpl $0, %ebp                  ;; USE
        cmovne %ebx, %edi
        movl (%edx), %ebx
        movl $2, (%eax,%ebx,4)
        movl (%esi), %ebx
        cmpl %edi, %ebx
        jg .LBBstrength_result7_1
Which reduces register pressure enough (in this case) to avoid spilling in the
loop.
As another example, consider the CodeGen/X86/regpressure.ll testcase.  We
used to generate this code for both cases:
regpressure1:
        subl $32, %esp
        movl %esi, 12(%esp)
        movl %edi, 8(%esp)
        movl %ebx, 4(%esp)
        movl %ebp, (%esp)
        movl 36(%esp), %ecx
        movl (%ecx), %eax
        movl 4(%ecx), %edx
        movl %edx, 24(%esp)
        movl 8(%ecx), %edx
        movl %edx, 16(%esp)
        movl 12(%ecx), %edx
        movl 16(%ecx), %esi
        movl 20(%ecx), %edi
        movl 24(%ecx), %ebx
        movl %ebx, 28(%esp)
        movl 28(%ecx), %ebx
        movl 32(%ecx), %ebp
        movl %ebp, 20(%esp)
        movl 36(%ecx), %ecx
        imull 24(%esp), %eax
        imull 16(%esp), %eax
        imull %edx, %eax
        imull %esi, %eax
        imull %edi, %eax
        imull 28(%esp), %eax
        imull %ebx, %eax
        imull 20(%esp), %eax
        imull %ecx, %eax
        movl (%esp), %ebp
        movl 4(%esp), %ebx
        movl 8(%esp), %edi
        movl 12(%esp), %esi
        addl $32, %esp
        ret
This code is basically trying to do all of the loads first, then execute all
of the multiplies.  Because we run out of registers, lots of spill code happens.
We now generate this code for both cases:
regpressure1:
        movl 4(%esp), %ecx
        movl (%ecx), %eax
        movl 4(%ecx), %edx
        imull %edx, %eax
        movl 8(%ecx), %edx
        imull %edx, %eax
        movl 12(%ecx), %edx
        imull %edx, %eax
        movl 16(%ecx), %edx
        imull %edx, %eax
        movl 20(%ecx), %edx
        imull %edx, %eax
        movl 24(%ecx), %edx
        imull %edx, %eax
        movl 28(%ecx), %edx
        imull %edx, %eax
        movl 32(%ecx), %edx
        imull %edx, %eax
        movl 36(%ecx), %ecx
        imull %ecx, %eax
        ret
which is much nicer (when we fold loads into the muls it will be even better).
The old instruction selector used to produce the good code for regpressure1
but not for regpressure2, as it depended on the order of operations in the
LLVM code.
llvm-svn: 19449
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
functions together at the start of the basic block, causing massive spillage.
The old isel codegened the loads wherever they happened to land, so it
generated good code for the first case, but bad code for the second.
We really want the pattern isel to generate (the same) good code for both.
llvm-svn: 19448
 | 
| | 
| 
| 
|  | 
llvm-svn: 19447
 | 
| | 
| 
| 
|  | 
llvm-svn: 19446
 | 
| | 
| 
| 
|  | 
llvm-svn: 19445
 | 
| | 
| 
| 
|  | 
llvm-svn: 19444
 | 
| | 
| 
| 
|  | 
llvm-svn: 19443
 | 
| | 
| 
| 
|  | 
llvm-svn: 19442
 | 
| | 
| 
| 
| 
| 
|  | 
graph with no labels! :)
llvm-svn: 19441
 | 
| | 
| 
| 
|  | 
llvm-svn: 19440
 | 
| | 
| 
| 
|  | 
llvm-svn: 19439
 | 
| | 
| 
| 
|  | 
llvm-svn: 19438
 | 
| | 
| 
| 
|  | 
llvm-svn: 19437
 | 
| | 
| 
| 
|  | 
llvm-svn: 19436
 | 
| | 
| 
| 
|  | 
llvm-svn: 19435
 | 
| | 
| 
| 
| 
| 
| 
|  | 
leaked to the system.  Now they are destroyed with the JITMemoryManager is
destroyed.
llvm-svn: 19434
 | 
| | 
| 
| 
|  | 
llvm-svn: 19432
 | 
| | 
| 
| 
| 
| 
| 
|  | 
1. Rename createLoaderPass to CreateProfileLoaderPass
  2. Opt shouldn't use the pass registered in CodeGen.
llvm-svn: 19431
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
int test2(int * P, int* Q, int A, int B) {
        return P+A == P;
}
into:
test2:
        movl 4(%esp), %eax
        movl 12(%esp), %eax
        shll $2, %eax
        cmpl $0, %eax
        sete %al
        movzbl %al, %eax
        ret
instead of:
test2:
        movl 4(%esp), %eax
        movl 12(%esp), %ecx
        leal (%eax,%ecx,4), %ecx
        cmpl %eax, %ecx
        sete %al
        movzbl %al, %eax
        ret
ICC is producing worse code:
test2:
        movl      4(%esp), %eax                                 #8.5
        movl      12(%esp), %edx                                #8.5
        lea       (%edx,%edx), %ecx                             #9.9
        addl      %ecx, %ecx                                    #9.9
        addl      %eax, %ecx                                    #9.9
        cmpl      %eax, %ecx                                    #9.16
        movl      $0, %eax                                      #9.16
        sete      %al                                           #9.16
        ret                                                     #9.16
as is GCC (looks like our old code):
test2:
        movl    4(%esp), %edx
        movl    12(%esp), %eax
        leal    (%edx,%eax,4), %ecx
        cmpl    %edx, %ecx
        sete    %al
        movzbl  %al, %eax
        ret
llvm-svn: 19430
 | 
| | 
| 
| 
|  | 
llvm-svn: 19429
 | 
| | 
| 
| 
|  | 
llvm-svn: 19428
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
.LBB_Z5test0PdS__3:     # no_exit.1
        fldl data(,%eax,8)
        fldl 24(%esp)
        faddp %st(1)
        fstl 24(%esp)
        incl %eax
        movl $16000, %ecx
        sarl $3, %ecx
        cmpl %eax, %ecx
        fstpl 16(%esp)
        #FP_REG_KILL
        jg .LBB_Z5test0PdS__3   # no_exit.1
into:
.LBB_Z5test0PdS__3:     # no_exit.1
        fldl data(,%eax,8)
        fldl 24(%esp)
        faddp %st(1)
        fstl 24(%esp)
        incl %eax
        cmpl $2000, %eax
        fstpl 16(%esp)
        #FP_REG_KILL
        jl .LBB_Z5test0PdS__3   # no_exit.1
llvm-svn: 19427
 | 
| | 
| 
| 
| 
| 
| 
|  | 
gdb debugger doesn't get confused on which file it is reading (the one in
lib/System or the one in lib/System/{Win32,Unix})
llvm-svn: 19426
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
codegen this loop in stepanov:
no_exit.i:              ; preds = %entry, %no_exit.i, %then.i, %_Z5checkd.exit
        %i.0.0 = phi int [ 0, %entry ], [ %i.0.0, %no_exit.i ], [ %inc.0, %_Z5checkd.exit ], [ %inc.012, %then.i ]              ; <int> [#uses=3]
        %indvar = phi uint [ %indvar.next, %no_exit.i ], [ 0, %entry ], [ 0, %then.i ], [ 0, %_Z5checkd.exit ]          ; <uint> [#uses=3]
        %result_addr.i.0 = phi double [ %tmp.4.i.i, %no_exit.i ], [ 0.000000e+00, %entry ], [ 0.000000e+00, %then.i ], [ 0.000000e+00, %_Z5checkd.exit ]          ; <double> [#uses=1]
        %first_addr.0.i.2.rec = cast uint %indvar to int                ; <int> [#uses=1]
        %first_addr.0.i.2 = getelementptr [2000 x double]* %data, int 0, uint %indvar           ; <double*> [#uses=1]
        %inc.i.rec = add int %first_addr.0.i.2.rec, 1           ; <int> [#uses=1]
        %inc.i = getelementptr [2000 x double]* %data, int 0, int %inc.i.rec            ; <double*> [#uses=1]
        %tmp.3.i.i = load double* %first_addr.0.i.2             ; <double> [#uses=1]
        %tmp.4.i.i = add double %result_addr.i.0, %tmp.3.i.i            ; <double> [#uses=2]
        %tmp.2.i = seteq double* %inc.i, getelementptr ([2000 x double]* %data, int 0, int 2000)                ; <bool> [#uses=1]
        %indvar.next = add uint %indvar, 1              ; <uint> [#uses=1]
        br bool %tmp.2.i, label %_Z10accumulateIPddET0_T_S2_S1_.exit, label %no_exit.i
To this:
.LBB_Z4testIPddEvT_S1_T0__1:    # no_exit.i
        fldl data(,%eax,8)
        fldl 16(%esp)
        faddp %st(1)
        fstpl 16(%esp)
        incl %eax
        movl %eax, %ecx
        shll $3, %ecx
        cmpl $16000, %ecx
        #FP_REG_KILL
        jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i
instead of this:
.LBB_Z4testIPddEvT_S1_T0__1:    # no_exit.i
        fldl data(,%eax,8)
        fldl 16(%esp)
        faddp %st(1)
        fstpl 16(%esp)
        incl %eax
        leal data(,%eax,8), %ecx
        leal data+16000, %edx
        cmpl %edx, %ecx
        #FP_REG_KILL
        jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i
llvm-svn: 19425
 | 
| | 
| 
| 
|  | 
llvm-svn: 19424
 | 
| | 
| 
| 
|  | 
llvm-svn: 19423
 | 
| | 
| 
| 
|  | 
llvm-svn: 19422
 | 
| | 
| 
| 
| 
| 
|  | 
list.
llvm-svn: 19421
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
field
of an ADDri (due to current restrictions on MachineOperand :( ).  This allows
us to generate:
        leal Data+16000, %edx
instead of:
        movl $Data, %edx
        addl $16000, %edx
llvm-svn: 19420
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
loops in stepanov to this:
.LBB_Z5test0PdS__2:     # no_exit.1
        fldl data(,%eax,8)
        fldl 24(%esp)
        faddp %st(1)
        fstl 24(%esp)
        incl %eax
        cmpl $2000, %eax
        fstpl 16(%esp)
        #FP_REG_KILL
        jl .LBB_Z5test0PdS__2
instead of this:
.LBB_Z5test0PdS__2:     # no_exit.1
        fldl data(,%eax,8)
        fldl 24(%esp)
        faddp %st(1)
        fstl 24(%esp)
        incl %eax
        movl $data, %ecx
        movl %ecx, %edx
        addl $16000, %edx
        subl %ecx, %edx
        movl %edx, %ecx
        sarl $2, %ecx
        shrl $29, %ecx
        addl %ecx, %edx
        sarl $3, %edx
        cmpl %edx, %eax
        fstpl 16(%esp)
        #FP_REG_KILL
        jl .LBB_Z5test0PdS__2
The old instruction selector produced:
.LBB_Z5test0PdS__2:     # no_exit.1
        fldl 24(%esp)
        faddl data(,%eax,8)
        fstl 24(%esp)
        movl %eax, %ecx
        incl %ecx
        incl %eax
        leal data+16000, %edx
        movl $data, %edi
        subl %edi, %edx
        movl %edx, %edi
        sarl $2, %edi
        shrl $29, %edi
        addl %edi, %edx
        sarl $3, %edx
        cmpl %edx, %ecx
        fstpl 16(%esp)
        #FP_REG_KILL
        jl .LBB_Z5test0PdS__2   # no_exit.1
Which is even worse!
llvm-svn: 19419
 | 
| | 
| 
| 
|  | 
llvm-svn: 19418
 | 
| | 
| 
| 
| 
| 
|  | 
values), and eliminate some switch statements.
llvm-svn: 19417
 | 
| | 
| 
| 
| 
| 
| 
| 
|  | 
std::__pad<wchar_t, std::char_traits<wchar_t> >::_S_pad(std::ios_base&, wchar_t, wchar_t*, wchar_t const*, int, int, bool)
from libstdc++
llvm-svn: 19416
 | 
| | 
| 
| 
|  | 
llvm-svn: 19415
 | 
| | 
| 
| 
| 
| 
|  | 
Also, fix zero_extend from bool to i8, which fixes Shootout/objinst.
llvm-svn: 19414
 | 
| | 
| 
| 
|  | 
llvm-svn: 19412
 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
|  | 
stdext, but
this classes uses a utility function in namespace std.  But Microsoft apparently
assumes everyone will "using namespace std;".  As LLVM doesn't....  Add a
"use std::_Distance;" to get it working.
llvm-svn: 19411
 | 
| | 
| 
| 
|  | 
llvm-svn: 19410
 | 
| | 
| 
| 
|  | 
llvm-svn: 19409
 | 
| | 
| 
| 
| 
| 
|  | 
patch, all of SingleSource/UnitTests passes.
llvm-svn: 19408
 | 
| | 
| 
| 
| 
| 
|  | 
intrinsics.
llvm-svn: 19407
 | 
| | 
| 
| 
|  | 
llvm-svn: 19406
 | 
| | 
| 
| 
|  | 
llvm-svn: 19405
 |