summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog34
-rw-r--r--gcc/common.opt8
-rw-r--r--gcc/doc/invoke.texi13
-rw-r--r--gcc/flags.h5
-rw-r--r--gcc/gcse.c638
-rw-r--r--gcc/opts.c5
-rw-r--r--gcc/params.def20
-rw-r--r--gcc/params.h4
-rw-r--r--gcc/passes.c24
-rw-r--r--gcc/rtl.h1
-rw-r--r--gcc/toplev.c4
11 files changed, 750 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a28101b899d..86c9d2e6f93 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,37 @@
+2004-03-03 Mostafa Hagog <mustafa@il.ibm.com>
+
+ * common.opt: Add description of the new -fgcse-after-reload flag.
+
+ * flags.h (flag_gcse_after_reload): Declaration of global variable.
+
+ * gcse.c (reg_used_on_edge ,reg_set_between_after_reload_p,
+ reg_used_between_after_reload_p, rtx get_avail_load_store_reg,
+ is_jump_table_basic_block, bb_has_well_behaved_predecessors,
+ get_bb_avail_insn, hash_scan_set_after_reload,
+ compute_hash_table_after_reload, eliminate_partially_redundant_loads,
+ gcse_after_reload, get_bb_avail_insn): New functions to implement
+ gcse-after-reload.
+ (gcse_after_reload_main): New function, the main entry point to
+ gcse-after-reload.
+
+ * rtl.h (gcse_after_reload_main): Declaration of the new function.
+
+ * opts.c (common_handle_option): Handle the -fgcse-after-reload flag.
+
+ * toplev.c (flag_gcse_after_reload): Initialization.
+
+ * passes.c (rest_of_handl_gcse2): Call gcse_after_reload_main.
+
+ * params.def (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
+ PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION): New parameters for tuning
+ the gcse after reload optimization.
+
+ * params.h (GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
+ GCSE_AFTER_RELOAD_CRITICAL_FRACTION): Two macros to access the tuning
+ parameters.
+
+ * doc/invoke.texi: Documentation for the new flag gcse-after-reload.
+
2004-03-03 Nicolas Pitre <nico@cam.org>
* config/arm/ieee754-df.S (muldf3, divdf3): Fix denormalization of
diff --git a/gcc/common.opt b/gcc/common.opt
index cc7f218426c..d9faa60a8f5 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -371,7 +371,13 @@ Perform store motion after global common subexpression elimination
fgcse-las
Common
-Perform redundant load after store elimination in global common subexpression elimination
+Perform redundant load after store elimination in global common subexpression
+elimination
+
+fgcse-after-reload
+Common
+Perform global common subexpression elimination after register allocation
+has finished.
fguess-branch-probability
Common
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4fb703ce105..26b9a127bec 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -270,8 +270,8 @@ in the following sections.
-fdelayed-branch -fdelete-null-pointer-checks @gol
-fexpensive-optimizations -ffast-math -ffloat-store @gol
-fforce-addr -fforce-mem -ffunction-sections @gol
--fgcse -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize @gol
--fcrossjumping -fif-conversion -fif-conversion2 @gol
+-fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol
+-floop-optimize -fcrossjumping -fif-conversion -fif-conversion2 @gol
-finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
-fmove-all-movables -fnew-ra -fno-branch-count-reg @gol
@@ -3646,7 +3646,8 @@ invoking @option{-O2} on programs that use computed gotos.
@opindex O3
Optimize yet more. @option{-O3} turns on all optimizations specified by
@option{-O2} and also turns on the @option{-finline-functions},
-@option{-fweb} and @option{-frename-registers} options.
+@option{-fweb}, @option{-frename-registers}
+and @option{-fgcse-after-reload} options.
@item -O0
@opindex O0
@@ -3957,6 +3958,12 @@ same memory location (both partial and full redundancies).
Enabled by default when gcse is enabled.
+@item -fgcse-after-reload
+@opindex fgcse-after-reload
+When @option{-fgcse-after-reload} is enabled, a redundant load elimination
+pass is performed after reload. The purpose of this pass is to cleanup
+redundant spilling.
+
@item -floop-optimize
@opindex floop-optimize
Perform loop optimizations: move constant expressions out of loops, simplify
diff --git a/gcc/flags.h b/gcc/flags.h
index b986479c4b5..b088f6cb3ba 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -672,6 +672,11 @@ extern int flag_gcse_sm;
extern int flag_gcse_las;
+/* Nonzero if we want to perform global redundancy elimination after
+ register allocation. */
+
+extern int flag_gcse_after_reload;
+
/* Nonzero if value histograms should be used to optimize code. */
extern int flag_value_profile_transformations;
diff --git a/gcc/gcse.c b/gcc/gcse.c
index ad1c9fda64e..975fb1fbe2f 100644
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -1980,6 +1980,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
antic_occr->insn = insn;
antic_occr->next = NULL;
+ antic_occr->deleted_p = 0;
}
}
@@ -2016,6 +2017,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
avail_occr->insn = insn;
avail_occr->next = NULL;
+ avail_occr->deleted_p = 0;
}
}
}
@@ -2102,6 +2104,7 @@ insert_set_in_table (rtx x, rtx insn, struct hash_table *table)
cur_occr->insn = insn;
cur_occr->next = NULL;
+ cur_occr->deleted_p = 0;
}
}
@@ -8091,4 +8094,639 @@ is_too_expensive (const char *pass)
return false;
}
+/* The following code implements gcse after reload, the purpose of this
+ pass is to cleanup redundant loads generated by reload and other
+ optimizations that come after gcse. It searches for simple inter-block
+ redundancies and tries to eliminate them by adding moves and loads
+ in cold places. */
+
+/* The following structure holds the information about the occurrences of
+ the redundant instructions. */
+struct unoccr
+{
+ struct unoccr *next;
+ edge pred;
+ rtx insn;
+};
+
+static bool reg_used_on_edge (rtx, edge);
+static rtx reg_set_between_after_reload_p (rtx, rtx, rtx);
+static rtx reg_used_between_after_reload_p (rtx, rtx, rtx);
+static rtx get_avail_load_store_reg (rtx);
+static bool is_jump_table_basic_block (basic_block);
+static bool bb_has_well_behaved_predecessors (basic_block);
+static struct occr* get_bb_avail_insn (basic_block, struct occr *);
+static void hash_scan_set_after_reload (rtx, rtx, struct hash_table *);
+static void compute_hash_table_after_reload (struct hash_table *);
+static void eliminate_partially_redundant_loads (basic_block,
+ rtx,
+ struct expr *);
+static void gcse_after_reload (void);
+static struct occr* get_bb_avail_insn (basic_block, struct occr *);
+void gcse_after_reload_main (rtx, FILE *);
+
+
+/* Check if register REG is used in any insn waiting to be inserted on E.
+ Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
+ with PREV(insn),NEXT(insn) instead of calling
+ reg_overlap_mentioned_p. */
+
+static bool
+reg_used_on_edge (rtx reg, edge e)
+{
+ rtx insn;
+
+ for (insn = e->insns; insn; insn = NEXT_INSN (insn))
+ if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+ return true;
+
+ return false;
+}
+
+/* Return the insn that sets register REG or clobbers it in between
+ FROM_INSN and TO_INSN (exclusive of those two).
+ Just like reg_set_between but for hard registers and not pseudos. */
+
+static rtx
+reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn)
+{
+ rtx insn;
+ int regno;
+
+ if (GET_CODE (reg) != REG)
+ abort ();
+ regno = REGNO (reg);
+
+ /* We are called after register allocation. */
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ abort ();
+
+ if (from_insn == to_insn)
+ return NULL_RTX;
+
+ for (insn = NEXT_INSN (from_insn);
+ insn != to_insn;
+ insn = NEXT_INSN (insn))
+ {
+ if (INSN_P (insn))
+ {
+ if (FIND_REG_INC_NOTE (insn, reg)
+ || (GET_CODE (insn) == CALL_INSN
+ && call_used_regs[regno])
+ || find_reg_fusage (insn, CLOBBER, reg))
+ return insn;
+ }
+ if (set_of (reg, insn) != NULL_RTX)
+ return insn;
+ }
+ return NULL_RTX;
+}
+
+/* Return the insn that uses register REG in between FROM_INSN and TO_INSN
+ (exclusive of those two). Similar to reg_used_between but for hard
+ registers and not pseudos. */
+
+static rtx
+reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn)
+{
+ rtx insn;
+ int regno;
+
+ if (GET_CODE (reg) != REG)
+ return to_insn;
+ regno = REGNO (reg);
+
+ /* We are called after register allocation. */
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ abort ();
+ if (from_insn == to_insn)
+ return NULL_RTX;
+
+ for (insn = NEXT_INSN (from_insn);
+ insn != to_insn;
+ insn = NEXT_INSN (insn))
+ if (INSN_P (insn)
+ && (reg_overlap_mentioned_p (reg, PATTERN (insn))
+ || (GET_CODE (insn) == CALL_INSN
+ && call_used_regs[regno])
+ || find_reg_fusage (insn, USE, reg)
+ || find_reg_fusage (insn, CLOBBER, reg)))
+ return insn;
+ return NULL_RTX;
+}
+
+/* Return the loaded/stored register of a load/store instruction. */
+
+static rtx
+get_avail_load_store_reg (rtx insn)
+{
+ if (GET_CODE (SET_DEST (PATTERN (insn))) == REG) /* A load. */
+ return SET_DEST(PATTERN(insn));
+ if (GET_CODE (SET_SRC (PATTERN (insn))) == REG) /* A store. */
+ return SET_SRC (PATTERN (insn));
+ abort ();
+}
+
+/* Don't handle ABNORMAL edges or jump tables. */
+
+static bool
+is_jump_table_basic_block (basic_block bb)
+{
+ rtx insn = BB_END (bb);
+
+ if (GET_CODE (insn) == JUMP_INSN &&
+ (GET_CODE (PATTERN (insn)) == ADDR_VEC
+ || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
+ return true;
+ return false;
+}
+
+/* Return nonzero if the predecessors of BB are "well behaved". */
+
+static bool
+bb_has_well_behaved_predecessors (basic_block bb)
+{
+ edge pred;
+
+ if (! bb->pred)
+ return false;
+ for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
+ if (((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred))
+ || is_jump_table_basic_block (pred->src))
+ return false;
+ return true;
+}
+
+
+/* Search for the occurrences of expression in BB. */
+
+static struct occr*
+get_bb_avail_insn (basic_block bb, struct occr *occr)
+{
+ for (; occr != NULL; occr = occr->next)
+ if (BLOCK_FOR_INSN (occr->insn)->index == bb->index)
+ return occr;
+ return NULL;
+}
+
+/* Perform partial GCSE pass after reload, try to eliminate redundant loads
+ created by the reload pass. We try to look for a full or partial
+ redundant loads fed by one or more loads/stores in predecessor BBs,
+ and try adding loads to make them fully redundant. We also check if
+ it's worth adding loads to be able to delete the redundant load.
+
+ Algorithm:
+ 1. Build available expressions hash table:
+ For each load/store instruction, if the loaded/stored memory didn't
+ change until the end of the basic block add this memory expression to
+ the hash table.
+ 2. Perform Redundancy elimination:
+ For each load instruction do the following:
+ perform partial redundancy elimination, check if it's worth adding
+ loads to make the load fully redundant. If so add loads and
+ register copies and delete the load.
+
+ Future enhancement:
+ if loaded register is used/defined between load and some store,
+ look for some other free register between load and all its stores,
+ and replace load with a copy from this register to the loaded
+ register. */
+
+
+/* This handles the case where several stores feed a partially redundant
+ load. It checks if the redundancy elimination is possible and if it's
+ worth it. */
+
+static void
+eliminate_partially_redundant_loads (basic_block bb, rtx insn,
+ struct expr *expr)
+{
+ edge pred;
+ rtx avail_insn = NULL_RTX;
+ rtx avail_reg;
+ rtx dest, pat;
+ struct occr *a_occr;
+ struct unoccr *occr, *avail_occrs = NULL;
+ struct unoccr *unoccr, *unavail_occrs = NULL;
+ int npred_ok = 0;
+ gcov_type ok_count = 0; /* Redundant load execution count. */
+ gcov_type critical_count = 0; /* Execution count of critical edges. */
+
+ /* The execution count of the loads to be added to make the
+ load fully redundant. */
+ gcov_type not_ok_count = 0;
+ basic_block pred_bb;
+
+ pat = PATTERN (insn);
+ dest = SET_DEST (pat);
+ /* Check if the loaded register is not used nor killed from the beginning
+ of the block. */
+ if (reg_used_between_after_reload_p (dest,
+ PREV_INSN (BB_HEAD (bb)), insn))
+ return;
+
+ /* Check potential for replacing load with copy for predecessors. */
+ for (pred = bb->pred; pred; pred = pred->pred_next)
+ {
+ rtx next_pred_bb_end;
+
+ avail_insn = NULL_RTX;
+ pred_bb = pred->src;
+ next_pred_bb_end = NEXT_INSN (BB_END (pred_bb));
+ for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr;
+ a_occr = get_bb_avail_insn (pred_bb, a_occr->next))
+ {
+ /* Check if the loaded register is not used. */
+ avail_insn = a_occr->insn;
+ if (! (avail_reg = get_avail_load_store_reg (avail_insn)))
+ abort ();
+ /* Make sure we can generate a move from register avail_reg to
+ dest. */
+ extract_insn (gen_move_insn (copy_rtx (dest),
+ copy_rtx (avail_reg)));
+ if (! constrain_operands (1)
+ || reg_killed_on_edge (avail_reg, pred)
+ || reg_used_on_edge (dest, pred))
+ {
+ avail_insn = NULL;
+ continue;
+ }
+ if (! reg_set_between_after_reload_p (avail_reg, avail_insn,
+ next_pred_bb_end))
+ /* AVAIL_INSN remains non-null. */
+ break;
+ else
+ avail_insn = NULL;
+ }
+ if (avail_insn != NULL_RTX)
+ {
+ npred_ok++;
+ ok_count += pred->count;
+ if (EDGE_CRITICAL_P (pred))
+ critical_count += pred->count;
+ occr = (struct unoccr *) gmalloc (sizeof (struct unoccr));
+ occr->insn = avail_insn;
+ occr->pred = pred;
+ occr->next = avail_occrs;
+ avail_occrs = occr;
+ }
+ else
+ {
+ not_ok_count += pred->count;
+ if (EDGE_CRITICAL_P (pred))
+ critical_count += pred->count;
+ unoccr = (struct unoccr *) gmalloc (sizeof (struct unoccr));
+ unoccr->insn = NULL_RTX;
+ unoccr->pred = pred;
+ unoccr->next = unavail_occrs;
+ unavail_occrs = unoccr;
+ }
+ }
+
+ if (npred_ok == 0 /* No load can be replaced by copy. */
+ || (optimize_size && npred_ok > 1)) /* Prevent exploding the code. */
+ return;
+
+ /* Check if it's worth applying the partial redundancy elimination. */
+ if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count)
+ return;
+
+ if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count)
+ return;
+
+ /* Generate moves to the loaded register from where
+ the memory is available. */
+ for (occr = avail_occrs; occr; occr = occr->next)
+ {
+ avail_insn = occr->insn;
+ pred = occr->pred;
+ /* Set avail_reg to be the register having the value of the
+ memory. */
+ avail_reg = get_avail_load_store_reg (avail_insn);
+ if (! avail_reg)
+ abort ();
+
+ insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
+ copy_rtx (avail_reg)),
+ pred);
+
+ if (gcse_file)
+ fprintf (gcse_file,
+ "GCSE AFTER reload generating move from %d to %d on \
+ edge from %d to %d\n",
+ REGNO (avail_reg),
+ REGNO (dest),
+ pred->src->index,
+ pred->dest->index);
+ }
+
+ /* Regenerate loads where the memory is unavailable. */
+ for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
+ {
+ pred = unoccr->pred;
+ insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
+
+ if (gcse_file)
+ fprintf (gcse_file,
+ "GCSE AFTER reload: generating on edge from %d to %d\
+ a copy of load:\n",
+ pred->src->index,
+ pred->dest->index);
+ }
+
+ /* Delete the insn if it is not available in this block and mark it
+ for deletion if it is available. If insn is available it may help
+ discover additional redundancies, so mark it for later deletion.*/
+ for (a_occr = get_bb_avail_insn (bb, expr->avail_occr);
+ a_occr && (a_occr->insn != insn);
+ a_occr = get_bb_avail_insn (bb, a_occr->next));
+
+ if (!a_occr)
+ delete_insn (insn);
+ else
+ a_occr->deleted_p = 1;
+}
+
+/* Performing the redundancy elimination as described before. */
+
+static void
+gcse_after_reload (void)
+{
+ unsigned int i;
+ rtx insn;
+ basic_block bb;
+ struct expr *expr;
+ struct occr *occr;
+
+ /* Note we start at block 1. */
+
+ if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
+ return;
+
+ FOR_BB_BETWEEN (bb,
+ ENTRY_BLOCK_PTR->next_bb->next_bb,
+ EXIT_BLOCK_PTR,
+ next_bb)
+ {
+ if (! bb_has_well_behaved_predecessors (bb))
+ continue;
+
+ /* Do not try this optimization on cold basic blocks. */
+ if (probably_cold_bb_p (bb))
+ continue;
+
+ reset_opr_set_tables ();
+
+ for (insn = BB_HEAD (bb);
+ insn != NULL
+ && insn != NEXT_INSN (BB_END (bb));
+ insn = NEXT_INSN (insn))
+ {
+ /* Is it a load - of the form (set (reg) (mem))? */
+ if (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (SET_DEST (PATTERN (insn))) == REG
+ && GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
+ {
+ rtx pat = PATTERN (insn);
+ rtx src = SET_SRC (pat);
+ struct expr *expr;
+
+ if (general_operand (src, GET_MODE (src))
+ /* Is the expression recorded? */
+ && (expr = lookup_expr (src, &expr_hash_table)) != NULL
+ /* Are the operands unchanged since the start of the
+ block? */
+ && oprs_not_set_p (src, insn)
+ && ! MEM_VOLATILE_P (src)
+ && GET_MODE (src) != BLKmode
+ && !(flag_non_call_exceptions && may_trap_p (src))
+ && !side_effects_p (src))
+ {
+ /* We now have a load (insn) and an available memory at
+ its BB start (expr). Try to remove the loads if it is
+ redundant. */
+ eliminate_partially_redundant_loads (bb, insn, expr);
+ }
+ }
+
+ /* Keep track of everything modified by this insn. */
+ if (INSN_P (insn))
+ mark_oprs_set (insn);
+ }
+ }
+
+ commit_edge_insertions ();
+
+ /* Go over the expression hash table and delete insns that were
+ marked for later deletion. */
+ for (i = 0; i < expr_hash_table.size; i++)
+ {
+ for (expr = expr_hash_table.table[i];
+ expr != NULL;
+ expr = expr->next_same_hash)
+ for (occr = expr->avail_occr; occr; occr = occr->next)
+ if (occr->deleted_p)
+ delete_insn (occr->insn);
+ }
+}
+
+/* Scan pattern PAT of INSN and add an entry to the hash TABLE.
+ After reload we are interested in loads/stores only. */
+
+static void
+hash_scan_set_after_reload (rtx pat, rtx insn, struct hash_table *table)
+{
+ rtx src = SET_SRC (pat);
+ rtx dest = SET_DEST (pat);
+
+ if (GET_CODE (src) != MEM && GET_CODE (dest) != MEM)
+ return;
+
+ if (GET_CODE (dest) == REG)
+ {
+ if (/* Don't GCSE something if we can't do a reg/reg copy. */
+ can_copy_p (GET_MODE (dest))
+ /* GCSE commonly inserts instruction after the insn. We can't
+ do that easily for EH_REGION notes so disable GCSE on these
+ for now. */
+ && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
+ /* Is SET_SRC something we want to gcse? */
+ && general_operand (src, GET_MODE (src))
+ /* Don't CSE a nop. */
+ && ! set_noop_p (pat)
+ && ! JUMP_P (insn))
+ {
+ /* An expression is not available if its operands are
+ subsequently modified, including this insn. */
+ if (oprs_available_p (src, insn))
+ insert_expr_in_table (src, GET_MODE (dest), insn, 0, 1, table);
+ }
+ }
+ else if ((GET_CODE (src) == REG))
+ {
+ /* Only record sets of pseudo-regs in the hash table. */
+ if (/* Don't GCSE something if we can't do a reg/reg copy. */
+ can_copy_p (GET_MODE (src))
+ /* GCSE commonly inserts instruction after the insn. We can't
+ do that easily for EH_REGION notes so disable GCSE on these
+ for now. */
+ && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
+ /* Is SET_DEST something we want to gcse? */
+ && general_operand (dest, GET_MODE (dest))
+ /* Don't CSE a nop. */
+ && ! set_noop_p (pat)
+ &&! JUMP_P (insn)
+ && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
+ /* Check if the memory expression is killed after insn. */
+ && ! load_killed_in_block_p (BLOCK_FOR_INSN (insn),
+ INSN_CUID (insn) + 1,
+ dest,
+ 1)
+ && oprs_unchanged_p (XEXP (dest, 0), insn, 1))
+ {
+ insert_expr_in_table (dest, GET_MODE (dest), insn, 0, 1, table);
+ }
+ }
+}
+
+
+/* Create hash table of memory expressions available at end of basic
+ blocks. */
+
+static void
+compute_hash_table_after_reload (struct hash_table *table)
+{
+ unsigned int i;
+
+ table->set_p = 0;
+
+ /* Initialize count of number of entries in hash table. */
+ table->n_elems = 0;
+ memset ((char *) table->table, 0,
+ table->size * sizeof (struct expr *));
+
+ /* While we compute the hash table we also compute a bit array of which
+ registers are set in which blocks. */
+ sbitmap_vector_zero (reg_set_in_block, last_basic_block);
+
+ /* Re-cache any INSN_LIST nodes we have allocated. */
+ clear_modify_mem_tables ();
+
+ /* Some working arrays used to track first and last set in each block. */
+ reg_avail_info = (struct reg_avail_info*)
+ gmalloc (max_gcse_regno * sizeof (struct reg_avail_info));
+
+ for (i = 0; i < max_gcse_regno; ++i)
+ reg_avail_info[i].last_bb = NULL;
+
+ FOR_EACH_BB (current_bb)
+ {
+ rtx insn;
+ unsigned int regno;
+
+ /* First pass over the instructions records information used to
+ determine when registers and memory are first and last set. */
+ for (insn = BB_HEAD (current_bb);
+ insn && insn != NEXT_INSN (BB_END (current_bb));
+ insn = NEXT_INSN (insn))
+ {
+ if (! INSN_P (insn))
+ continue;
+
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ bool clobbers_all = false;
+
+#ifdef NON_SAVING_SETJMP
+ if (NON_SAVING_SETJMP
+ && find_reg_note (insn, REG_SETJMP, NULL_RTX))
+ clobbers_all = true;
+#endif
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (clobbers_all
+ || TEST_HARD_REG_BIT (regs_invalidated_by_call,
+ regno))
+ record_last_reg_set_info (insn, regno);
+
+ mark_call (insn);
+ }
+
+ note_stores (PATTERN (insn), record_last_set_info, insn);
+
+ if (GET_CODE (PATTERN (insn)) == SET)
+ {
+ rtx src, dest;
+
+ src = SET_SRC (PATTERN (insn));
+ dest = SET_DEST (PATTERN (insn));
+ if (GET_CODE (src) == MEM && auto_inc_p (XEXP (src, 0)))
+ {
+ regno = REGNO (XEXP (XEXP (src, 0), 0));
+ record_last_reg_set_info (insn, regno);
+ }
+ if (GET_CODE (dest) == MEM && auto_inc_p (XEXP (dest, 0)))
+ {
+ regno = REGNO (XEXP (XEXP (dest, 0), 0));
+ record_last_reg_set_info (insn, regno);
+ }
+ }
+ }
+
+ /* The next pass builds the hash table. */
+ for (insn = BB_HEAD (current_bb);
+ insn && insn != NEXT_INSN (BB_END (current_bb));
+ insn = NEXT_INSN (insn))
+ if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
+ if (! find_reg_note (insn, REG_LIBCALL, NULL_RTX))
+ hash_scan_set_after_reload (PATTERN (insn), insn, table);
+ }
+
+ free (reg_avail_info);
+ reg_avail_info = NULL;
+}
+
+
+/* Main entry point of the GCSE after reload - clean some redundant loads
+ due to spilling. */
+
+void
+gcse_after_reload_main (rtx f, FILE* file)
+{
+ gcse_subst_count = 0;
+ gcse_create_count = 0;
+
+ gcse_file = file;
+
+ gcc_obstack_init (&gcse_obstack);
+ bytes_used = 0;
+
+ /* We need alias. */
+ init_alias_analysis ();
+
+ max_gcse_regno = max_reg_num ();
+
+ alloc_reg_set_mem (max_gcse_regno);
+ alloc_gcse_mem (f);
+ alloc_hash_table (max_cuid, &expr_hash_table, 0);
+ compute_hash_table_after_reload (&expr_hash_table);
+
+ if (gcse_file)
+ dump_hash_table (gcse_file, "Expression", &expr_hash_table);
+
+ if (expr_hash_table.n_elems > 0)
+ gcse_after_reload ();
+
+ free_hash_table (&expr_hash_table);
+
+ free_gcse_mem ();
+ free_reg_set_mem ();
+
+ /* We are finished with alias. */
+ end_alias_analysis ();
+
+ obstack_free (&gcse_obstack, NULL);
+}
+
#include "gt-gcse.h"
diff --git a/gcc/opts.c b/gcc/opts.c
index a4267f87723..fa1971cd651 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -574,6 +574,7 @@ decode_options (unsigned int argc, const char **argv)
flag_rename_registers = 1;
flag_unswitch_loops = 1;
flag_web = 1;
+ flag_gcse_after_reload = 1;
}
if (optimize < 2 || optimize_size)
@@ -1035,6 +1036,10 @@ common_handle_option (size_t scode, const char *arg,
flag_gcse_sm = value;
break;
+ case OPT_fgcse_after_reload:
+ flag_gcse_after_reload = value;
+ break;
+
case OPT_fgcse_las:
flag_gcse_las = value;
break;
diff --git a/gcc/params.def b/gcc/params.def
index e00e22e3b0d..7be8ddc1322 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -131,7 +131,25 @@ DEFPARAM(PARAM_MAX_GCSE_PASSES,
"max-gcse-passes",
"The maximum number of passes to make when doing GCSE",
1)
-
+/* This is the threshold ratio when to perform partial redundancy
+ elimination after reload. We perform partial redundancy elimination
+ when the following holds:
+ (Redundant load execution count)
+ ------------------------------- >= GCSE_AFTER_RELOAD_PARTIAL_FRACTION
+ (Added loads execution count) */
+DEFPARAM(PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
+ "gcse-after-reload-partial-fraction",
+ "The threshold ratio for performing partial redundancy elimination \
+ after reload.",
+ 3)
+/* This is the threshold ratio of the critical edges execution count compared to
+ the redundant loads execution count that permits performing the load
+ redundancy elimination in gcse after reload. */
+DEFPARAM(PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION,
+ "gcse-after-reload-critical-fraction",
+ "The threshold ratio of critical edges execution count that permit \
+ performing redundancy elimination after reload.",
+ 10)
/* This parameter limits the number of insns in a loop that will be unrolled,
and by how much the loop is unrolled.
diff --git a/gcc/params.h b/gcc/params.h
index 0a784454ccd..d030dbe645a 100644
--- a/gcc/params.h
+++ b/gcc/params.h
@@ -104,6 +104,10 @@ typedef enum compiler_param
((size_t) PARAM_VALUE (PARAM_MAX_GCSE_MEMORY))
#define MAX_GCSE_PASSES \
PARAM_VALUE (PARAM_MAX_GCSE_PASSES)
+#define GCSE_AFTER_RELOAD_PARTIAL_FRACTION \
+ PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION)
+#define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \
+ PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION)
#define MAX_UNROLLED_INSNS \
PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
#endif /* ! GCC_PARAMS_H */
diff --git a/gcc/passes.c b/gcc/passes.c
index 1b012772f4b..1bd554c61e1 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -158,6 +158,7 @@ enum dump_file_index
DFI_lreg,
DFI_greg,
DFI_postreload,
+ DFI_gcse2,
DFI_flow2,
DFI_peephole2,
DFI_ce3,
@@ -178,7 +179,7 @@ enum dump_file_index
Remaining -d letters:
" e m q "
- " JK O Q WXY "
+ " K O Q WXY "
*/
static struct dump_file_info dump_file_tbl[DFI_MAX] =
@@ -210,6 +211,7 @@ static struct dump_file_info dump_file_tbl[DFI_MAX] =
{ "lreg", 'l', 1, 0, 0 },
{ "greg", 'g', 1, 0, 0 },
{ "postreload", 'o', 1, 0, 0 },
+ { "gcse2", 'J', 0, 0, 0 },
{ "flow2", 'w', 1, 0, 0 },
{ "peephole2", 'z', 1, 0, 0 },
{ "ce3", 'E', 1, 0, 0 },
@@ -788,6 +790,23 @@ rest_of_handle_sched2 (tree decl, rtx insns)
}
#endif
+static void
+rest_of_handle_gcse2 (tree decl, rtx insns)
+{
+ open_dump_file (DFI_gcse2, decl);
+
+ gcse_after_reload_main (insns, dump_file);
+ rebuild_jump_labels (insns);
+ delete_trivially_dead_insns (insns, max_reg_num ());
+ close_dump_file (DFI_gcse2, print_rtl_with_bb, insns);
+
+ ggc_collect ();
+
+#ifdef ENABLE_CHECKING
+ verify_flow_info ();
+#endif
+}
+
/* Register allocation pre-pass, to reduce number of moves necessary
for two-address machines. */
static void
@@ -1842,6 +1861,9 @@ rest_of_compilation (tree decl)
close_dump_file (DFI_postreload, print_rtl_with_bb, insns);
+ if (optimize > 0 && flag_gcse_after_reload)
+ rest_of_handle_gcse2 (decl, insns);
+
/* Re-create the death notes which were deleted during reload. */
timevar_push (TV_FLOW2);
open_dump_file (DFI_flow2, decl);
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 66578d08bf5..2e911362578 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2289,6 +2289,7 @@ extern rtx fis_get_condition (rtx);
#ifdef BUFSIZ
extern int gcse_main (rtx, FILE *);
extern int bypass_jumps (FILE *);
+extern void gcse_after_reload_main (rtx, FILE *);
#endif
/* In global.c */
diff --git a/gcc/toplev.c b/gcc/toplev.c
index da9e7f90bab..8192a1a8ea8 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -526,6 +526,9 @@ int flag_gcse_sm = 1;
int flag_gcse_las = 1;
+/* Nonzero means perform global cse after register allocation. */
+int flag_gcse_after_reload = 0;
+
/* Perform target register optimization before prologue / epilogue
threading. */
@@ -915,6 +918,7 @@ static const lang_independent_options f_options[] =
{"gcse-lm", &flag_gcse_lm, 1 },
{"gcse-sm", &flag_gcse_sm, 1 },
{"gcse-las", &flag_gcse_las, 1 },
+ {"gcse-after-reload", &flag_gcse_after_reload, 1},
{"branch-target-load-optimize", &flag_branch_target_load_optimize, 1 },
{"branch-target-load-optimize2", &flag_branch_target_load_optimize2, 1 },
{"btr-bb-exclusive", &flag_btr_bb_exclusive, 1 },
OpenPOWER on IntegriCloud