summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/doc/invoke.texi25
-rw-r--r--gcc/opts.c1
-rw-r--r--gcc/tree-data-ref.c26
-rw-r--r--gcc/tree-data-ref.h1
-rw-r--r--gcc/tree-loop-distribution.c45
7 files changed, 100 insertions, 14 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7d6a3de7bc3..ad2b271a5bb 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2010-08-02 Sebastian Pop <sebastian.pop@amd.com>
+
+ * common.opt (ftree-loop-distribute-patterns): New.
+ * invoke.texi (-ftree-loop-distribute-patterns): Documented.
+ * opts.c (decode_options): Enable flag_tree_loop_distribute_patterns
+ at -O3.
+ * tree-data-ref.c (stores_zero_from_loop): New.
+ * tree-data-ref.h (stores_zero_from_loop): Declared.
+ * tree-loop-distribution.c (tree_loop_distribution): Call
+ stores_zero_from_loop.
+ (tree_loop_distribution): Check flag_tree_loop_distribute_patterns.
+
2010-08-02 Bernd Schmidt <bernds@codesourcery.com>
* postreload.c (reload_cse_simplify_operands): Take attribute enabled
diff --git a/gcc/common.opt b/gcc/common.opt
index 78dc1ba382d..0fe09a82109 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1333,6 +1333,10 @@ ftree-loop-distribution
Common Report Var(flag_tree_loop_distribution) Optimization
Enable loop distribution on trees
+ftree-loop-distribute-patterns
+Common Report Var(flag_tree_loop_distribute_patterns) Optimization
+Enable loop distribution for patterns transformed into a library call
+
ftree-loop-im
Common Report Var(flag_tree_loop_im) Init(1) Optimization
Enable loop invariant motion on trees
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c9f7664514c..b54407fe30d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -384,7 +384,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop @gol
-ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse @gol
-ftree-forwprop -ftree-fre -ftree-loop-if-convert -ftree-loop-im @gol
--ftree-phiprop -ftree-loop-distribution @gol
+-ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
-ftree-sink -ftree-sra -ftree-switch-conversion @gol
@@ -6925,6 +6925,29 @@ DO I = 1, N
ENDDO
@end smallexample
+@item -ftree-loop-distribute-patterns
+Perform loop distribution of patterns that can be code generated with
+calls to a library. This flag is enabled by default at @option{-O3}.
+
+This pass distributes the initialization loops and generates a call to
+memset zero. For example, the loop
+@smallexample
+DO I = 1, N
+ A(I) = 0
+ B(I) = A(I) + I
+ENDDO
+@end smallexample
+is transformed to
+@smallexample
+DO I = 1, N
+ A(I) = 0
+ENDDO
+DO I = 1, N
+ B(I) = A(I) + I
+ENDDO
+@end smallexample
+and the initialization loop is transformed into a call to memset zero.
+
@item -ftree-loop-im
@opindex ftree-loop-im
Perform loop invariant motion on trees. This pass moves only invariants that
diff --git a/gcc/opts.c b/gcc/opts.c
index ebb3f38056a..caf4e166b91 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -819,6 +819,7 @@ decode_options (unsigned int argc, const char **argv,
/* -O3 optimizations. */
opt3 = (optimize >= 3);
+ flag_tree_loop_distribute_patterns = opt3;
flag_predictive_commoning = opt3;
flag_inline_functions = opt3;
flag_unswitch_loops = opt3;
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index e7aa277a69f..265635039a6 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -5038,6 +5038,32 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
free (bbs);
}
+/* Initialize STMTS with all the statements of LOOP that contain a
+ store to memory of the form "A[i] = 0". */
+
+void
+stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
+{
+ unsigned int i;
+ basic_block bb;
+ gimple_stmt_iterator si;
+ gimple stmt;
+ tree op;
+ basic_block *bbs = get_loop_body_in_dom_order (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ if ((stmt = gsi_stmt (si))
+ && gimple_vdef (stmt)
+ && is_gimple_assign (stmt)
+ && gimple_assign_rhs_code (stmt) == INTEGER_CST
+ && (op = gimple_assign_rhs1 (stmt))
+ && (integer_zerop (op) || real_zerop (op)))
+ VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
+
+ free (bbs);
+}
+
/* For a data reference REF, return the declaration of its base
address or NULL_TREE if the base is not determined. */
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index eff53483116..9e18e266dd8 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -564,6 +564,7 @@ index_in_loop_nest (int var, VEC (loop_p, heap) *loop_nest)
}
void stores_from_loop (struct loop *, VEC (gimple, heap) **);
+void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
void remove_similar_memory_refs (VEC (gimple, heap) **);
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
bool have_similar_memory_accesses (gimple, gimple);
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 099a7fe479f..59054063cfb 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1184,18 +1184,36 @@ tree_loop_distribution (void)
{
VEC (gimple, heap) *work_list = VEC_alloc (gimple, heap, 3);
- /* With the following working list, we're asking distribute_loop
- to separate the stores of the loop: when dependences allow,
- it will end on having one store per loop. */
- stores_from_loop (loop, &work_list);
-
- /* A simple heuristic for cache locality is to not split stores
- to the same array. Without this call, an unrolled loop would
- be split into as many loops as unroll factor, each loop
- storing in the same array. */
- remove_similar_memory_refs (&work_list);
-
- nb_generated_loops = distribute_loop (loop, work_list);
+ /* If both flag_tree_loop_distribute_patterns and
+ flag_tree_loop_distribution are set, then only
+ distribute_patterns is executed. */
+ if (flag_tree_loop_distribute_patterns)
+ {
+ /* With the following working list, we're asking
+ distribute_loop to separate from the rest of the loop the
+ stores of the form "A[i] = 0". */
+ stores_zero_from_loop (loop, &work_list);
+
+ /* Do nothing if there are no patterns to be distributed. */
+ if (VEC_length (gimple, work_list) > 0)
+ nb_generated_loops = distribute_loop (loop, work_list);
+ }
+ else if (flag_tree_loop_distribution)
+ {
+ /* With the following working list, we're asking
+ distribute_loop to separate the stores of the loop: when
+ dependences allow, it will end on having one store per
+ loop. */
+ stores_from_loop (loop, &work_list);
+
+ /* A simple heuristic for cache locality is to not split
+ stores to the same array. Without this call, an unrolled
+ loop would be split into as many loops as unroll factor,
+ each loop storing in the same array. */
+ remove_similar_memory_refs (&work_list);
+
+ nb_generated_loops = distribute_loop (loop, work_list);
+ }
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -1217,7 +1235,8 @@ tree_loop_distribution (void)
static bool
gate_tree_loop_distribution (void)
{
- return flag_tree_loop_distribution != 0;
+ return flag_tree_loop_distribution
+ || flag_tree_loop_distribute_patterns;
}
struct gimple_opt_pass pass_loop_distribution =
OpenPOWER on IntegriCloud