|  | Commit message (Collapse) | Author | Age | Files | Lines | 
|---|
| | 
| 
| 
| 
| 
| 
| | a member of the class.  While we're at it, turn the collection into a set
instead of a vector to improve efficiency and make queries simpler.
llvm-svn: 12400 | 
| | 
| 
| 
| | llvm-svn: 12382 | 
| | 
| 
| 
| 
| 
| | curly braceage
llvm-svn: 12378 | 
| | 
| 
| 
| | llvm-svn: 12372 | 
| | 
| 
| 
| | llvm-svn: 12368 | 
| | 
| 
| 
| | llvm-svn: 12319 | 
| | 
| 
| 
| | llvm-svn: 12070 | 
| | 
| 
| 
| | llvm-svn: 12068 | 
| | 
| 
| 
| 
| 
| | * Add comments to ExtractLoop()
llvm-svn: 12053 | 
| | 
| 
| 
| 
| 
| | ... which tickled the lowerinvoke pass because it used the BCE routines.
llvm-svn: 12012 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | and br->br code and generalizing it.  This allows us to compile code like this:
int test(Instruction *I) {
  if (isa<CastInst>(I))
    return foo(7);
  else if (isa<BranchInst>(I))
    return foo(123);
  else if (isa<UnwindInst>(I))
    return foo(1241);
  else if (isa<SetCondInst>(I))
    return foo(1);
  else if (isa<VAArgInst>(I))
    return foo(42);
  return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
        %tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4            ; <uint*> [#uses=1]
        %tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i          ; <uint> [#uses=2]
        %tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27                ; <bool> [#uses=0]
        switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
                 uint 27, label %then.0
                 uint 2, label %then.1
                 uint 5, label %then.2
                 uint 14, label %then.3
                 uint 15, label %then.3
                 uint 16, label %then.3
                 uint 17, label %then.3
                 uint 18, label %then.3
                 uint 19, label %then.3
                 uint 32, label %then.4
        ]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964 | 
| | 
| 
| 
| | llvm-svn: 11939 | 
| | 
| 
| 
| 
| 
| 
| | function, as long as the loop isn't the only one in that function. This should
help debugging passes easier with BugPoint.
llvm-svn: 11936 | 
| | 
| 
| 
| 
| 
| | a new function, taking care of inputs and outputs.
llvm-svn: 11935 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | if (X == 0 || X == 2)
...where the comparisons and branches are in different blocks... into a switch
instruction.  This comes up a lot in various programs, and works well with
the switch/switch merging code I checked earlier.  For example, this testcase:
int switchtest(int C) {
  return C == 0 ? f(123) :
         C == 1 ? f(3123) :
         C == 4 ? f(312) :
         C == 5 ? f(1234): f(444);
}
is converted into this:
        switch int %C, label %cond_false.3 [
                 int 0, label %cond_true.0
                 int 1, label %cond_true.1
                 int 4, label %cond_true.2
                 int 5, label %cond_true.3
        ]
instead of a whole bunch of conditional branches.
Admittedly the code is ugly, and incomplete.  To be complete, we need to add
br -> switch merging and switch -> br merging.  For example, this testcase:
struct foo { int Q, R, Z; };
#define A (X->Q+X->R * 123)
int test(struct foo *X) {
  return A  == 123 ? X1() :
        A == 12321 ? X2():
        (A == 111 || A == 222) ? X3() :
        A == 875 ? X4() : X5();
}
Gets compiled to this:
        switch int %tmp.7, label %cond_false.2 [
                 int 123, label %cond_true.0
                 int 12321, label %cond_true.1
                 int 111, label %cond_true.2
                 int 222, label %cond_true.2
        ]
...
cond_false.2:           ; preds = %entry
        %tmp.52 = seteq int %tmp.7, 875         ; <bool> [#uses=1]
        br bool %tmp.52, label %cond_true.3, label %cond_false.3
where the branch could be folded into the switch.
This kind of thing occurs *ALL OF THE TIME*, especially in programs like
176.gcc, which is a horrible mess of code.  It contains stuff like *shudder*:
#define SWITCH_TAKES_ARG(CHAR) \
  (   (CHAR) == 'D' \
   || (CHAR) == 'U' \
   || (CHAR) == 'o' \
   || (CHAR) == 'e' \
   || (CHAR) == 'u' \
   || (CHAR) == 'I' \
   || (CHAR) == 'm' \
   || (CHAR) == 'L' \
   || (CHAR) == 'A' \
   || (CHAR) == 'h' \
   || (CHAR) == 'z')
and
#define CONST_OK_FOR_LETTER_P(VALUE, C)                 \
  ((C) == 'I' ? SMALL_INTVAL (VALUE)                    \
   : (C) == 'J' ? SMALL_INTVAL (-(VALUE))               \
   : (C) == 'K' ? (unsigned)(VALUE) < 32                \
   : (C) == 'L' ? ((VALUE) & 0xffff) == 0               \
   : (C) == 'M' ? integer_ok_for_set (VALUE)            \
   : (C) == 'N' ? (VALUE) < 0                           \
   : (C) == 'O' ? (VALUE) == 0                          \
   : (C) == 'P' ? (VALUE) >= 0                          \
   : 0)
and
#define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN)                     \
{                                                               \
  if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 1))) \
    (X) = gen_rtx (PLUS, SImode, XEXP (X, 0),                   \
                   copy_to_mode_reg (SImode, XEXP (X, 1)));     \
  if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 0))) \
    (X) = gen_rtx (PLUS, SImode, XEXP (X, 1),                   \
                   copy_to_mode_reg (SImode, XEXP (X, 0)));     \
  if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == MULT)   \
    (X) = gen_rtx (PLUS, SImode, XEXP (X, 1),                   \
                   force_operand (XEXP (X, 0), 0));             \
  if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == MULT)   \
    (X) = gen_rtx (PLUS, SImode, XEXP (X, 0),                   \
                   force_operand (XEXP (X, 1), 0));             \
  if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == PLUS)   \
    (X) = gen_rtx (PLUS, Pmode, force_operand (XEXP (X, 0), NULL_RTX),\
                   XEXP (X, 1));                                \
  if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == PLUS)   \
    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0),                    \
                   force_operand (XEXP (X, 1), NULL_RTX));      \
  if (GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == CONST       \
           || GET_CODE (X) == LABEL_REF)                        \
    (X) = legitimize_address (flag_pic, X, 0, 0);               \
  if (memory_address_p (MODE, X))                               \
    goto WIN; }
and others.  These macros get used multiple times of course.  These are such
lovely candidates for macros, aren't they?  :)
This code also nicely handles LLVM constructs that look like this:
  if (isa<CastInst>(I))
   ...
  else if (isa<BranchInst>(I))
   ...
  else if (isa<SetCondInst>(I))
   ...
  else if (isa<UnwindInst>(I))
   ...
  else if (isa<VAArgInst>(I))
   ...
where the isa can obviously be a dyn_cast as well.  Switch instructions are a
good thing.
llvm-svn: 11870 | 
| | 
| 
| 
| | llvm-svn: 11799 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | This case occurs many times in various benchmarks, especially when combined
with the previous patch.  This allows it to get stuff like:
  if (X == 4 || X == 3)
    if (X == 5 || X == 8)
and
switch (X) {
case 4: case 5: case 6:
  if (X == 4 || X == 5)
llvm-svn: 11797 | 
| | 
| 
| 
| | llvm-svn: 11793 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| | This turns code like this:
  if (X == 4 | X == 7)
and
  if (X != 4 & X != 7)
into switch instructions.
llvm-svn: 11792 | 
| | 
| 
| 
| | llvm-svn: 11565 | 
| | 
| 
| 
| 
| 
| | see the testcase for the reasoning.
llvm-svn: 11496 | 
| | 
| 
| 
| | llvm-svn: 11474 | 
| | 
| 
| 
| 
| 
| 
| | allowed in invoke instructions.  Thus, if we are inlining a call to an intrinsic
function into an invoke site, we don't need to turn the call into an invoke!
llvm-svn: 11384 | 
| | 
| 
| 
| 
| 
| 
| | Having a proper 'select' instruction would allow the elimination of a lot
of the special case cruft in this patch, but we don't have one yet.
llvm-svn: 11307 | 
| | 
| 
| 
| | llvm-svn: 11301 | 
| | 
| 
| 
| 
| 
| | 'unwind' dest
llvm-svn: 11202 | 
| | 
| 
| 
| | llvm-svn: 11123 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | 1. Don't scan to the end of alloca instructions in the caller function to
     insert inlined allocas, just insert at the top.  This saves a lot of
     time inlining into functions with a lot of allocas.
  2. Use splice to move the alloca instructions over, instead of remove/insert.
     This allows us to transfer a block at a time, and eliminates a bunch of
     silly symbol table manipulations.
This speeds up the inliner on the testcase in PR209 from 1.73s -> 1.04s (67%)
llvm-svn: 11118 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| 
| | basic block,
and that basic block ends with a return instruction.  In this case, we can just splice
the cloned "body" of the function directly into the source basic block, avoiding a lot
of rearrangement and splitBasicBlock's linear scan over the split block.  This speeds up
the inliner on the testcase in PR209 from 2.3s to 1.7s, a 35% reduction.
llvm-svn: 11116 | 
| | 
| 
| 
| 
| 
| 
| | before we delete the original call site, allowing slight simplifications of
code, but nothing exciting.
llvm-svn: 11109 | 
| | 
| 
| 
| 
| 
| 
| 
| 
| 
| | process.  The only optimization we did so far is to avoid creating a
PHI node, then immediately destroying it in the common case where the
callee has one return statement.  Instead, we just don't create the return
value.  This has no noticable performance impact, but paves the way for
future improvements.
llvm-svn: 11108 | 
| | 
| 
| 
| 
| 
| 
| 
| | to add the cloned block to.  This allows the block to be added to the function
immediately, and all of the instructions to be immediately added to the function
symbol table, which speeds up the inliner from 3.7 -> 3.38s on the PR209.
llvm-svn: 11107 | 
| | 
| 
| 
| 
| 
| 
| | process them all as a group.  This speeds up SRoA/mem2reg from 28.46s to
0.62s on the testcase from PR209.
llvm-svn: 11100 | 
| | 
| 
| 
| 
| 
| | SRoA/mem2reg from 41.2s to 27.5s on the testcase in PR209.
llvm-svn: 11099 | 
| | 
| 
| 
| | llvm-svn: 10799 | 
| | 
| 
| 
| | llvm-svn: 10792 | 
| | 
| 
| 
| | llvm-svn: 10789 | 
| | 
| 
| 
| 
| 
| | ConstantFoldTerminator
llvm-svn: 10785 | 
| | 
| 
| 
| 
| 
| | case that the C/C++ front-end generates.
llvm-svn: 10761 | 
| | 
| 
| 
| | llvm-svn: 10727 | 
| | 
| 
| 
| | llvm-svn: 10529 | 
| | 
| 
| 
| | llvm-svn: 10127 | 
| | 
| 
| 
| | llvm-svn: 10111 | 
| | 
| 
| 
| | llvm-svn: 10110 | 
| | 
| 
| 
| | llvm-svn: 9903 | 
| | 
| 
| 
| | llvm-svn: 9857 | 
| | 
| 
| 
| | llvm-svn: 9853 | 
| | 
| 
| 
| | llvm-svn: 9753 | 
| | 
| 
| 
| | llvm-svn: 9739 | 
| | 
| 
| 
| | llvm-svn: 9738 |