summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4>2013-01-29 18:39:43 +0000
committergretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4>2013-01-29 18:39:43 +0000
commiteca9ccc83be58342989d2f3768b4e058e3eb6c09 (patch)
tree25ac4c0c62938b185230ab636503df43f102733b
parentfd923cfd5910b380cedbdee9983053c7b5e39737 (diff)
downloadppe42-gcc-eca9ccc83be58342989d2f3768b4e058e3eb6c09.tar.gz
ppe42-gcc-eca9ccc83be58342989d2f3768b4e058e3eb6c09.zip
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/arm-protos.h (arm_mac_accumulator_is_result): New declaration. * config/arm/arm.c (arm_mac_accumulator_is_result): New function. * config/arm/cortex-a7.md: New bypasses using arm_mac_accumulator_is_result. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@195553 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm.c56
-rw-r--r--gcc/config/arm/cortex-a7.md14
4 files changed, 79 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 669185909fa..a40f35c9298 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
+ * config/arm/arm-protos.h (arm_mac_accumulator_is_result): New
+ declaration.
+ * config/arm/arm.c (arm_mac_accumulator_is_result): New function.
+ * config/arm/cortex-a7.md: New bypasses using
+ arm_mac_accumulator_is_result.
+
+2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
+
* config/arm/cortex-a7.md (cortex_a7_neon_mul): New reservation.
(cortex_a7_neon_mla): Likewise.
(cortex_a7_fpfmad): New reservation.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 83c8d4bb3de..e9f74dc17a2 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -101,6 +101,7 @@ extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_no_early_alu_shift_dep (rtx, rtx);
extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx);
+extern int arm_mac_accumulator_is_result (rtx, rtx);
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
extern int tls_mentioned_p (rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 9d3981d5e48..0aa24b1796b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -24608,6 +24608,62 @@ arm_cxx_guard_type (void)
return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
}
+/* Return non-zero iff the consumer (a multiply-accumulate or a
+ multiple-subtract instruction) has an accumulator dependency on the
+ result of the producer and no other dependency on that result. It
+ does not check if the producer is multiply-accumulate instruction. */
+int
+arm_mac_accumulator_is_result (rtx producer, rtx consumer)
+{
+ rtx result;
+ rtx op0, op1, acc;
+
+ producer = PATTERN (producer);
+ consumer = PATTERN (consumer);
+
+ if (GET_CODE (producer) == COND_EXEC)
+ producer = COND_EXEC_CODE (producer);
+ if (GET_CODE (consumer) == COND_EXEC)
+ consumer = COND_EXEC_CODE (consumer);
+
+ if (GET_CODE (producer) != SET)
+ return 0;
+
+ result = XEXP (producer, 0);
+
+ if (GET_CODE (consumer) != SET)
+ return 0;
+
+ /* Check that the consumer is of the form
+ (set (...) (plus (mult ...) (...)))
+ or
+ (set (...) (minus (...) (mult ...))). */
+ if (GET_CODE (XEXP (consumer, 1)) == PLUS)
+ {
+ if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
+ return 0;
+
+ op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
+ op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
+ acc = XEXP (XEXP (consumer, 1), 1);
+ }
+ else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
+ {
+ if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
+ return 0;
+
+ op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
+ op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
+ acc = XEXP (XEXP (consumer, 1), 0);
+ }
+ else
+ return 0;
+
+ return (reg_overlap_mentioned_p (result, acc)
+ && !reg_overlap_mentioned_p (result, op0)
+ && !reg_overlap_mentioned_p (result, op1));
+}
+
/* Return non-zero if the consumer (a multiply-accumulate instruction)
has an accumulator dependency on the result of the producer (a
multiplication instruction) and no other dependency on that result. */
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md
index a55efeb2e34..a8d4432e508 100644
--- a/gcc/config/arm/cortex-a7.md
+++ b/gcc/config/arm/cortex-a7.md
@@ -137,6 +137,12 @@
(eq_attr "neon_type" "none")))
"cortex_a7_both")
+;; Forward the result of a multiply operation to the accumulator
+;; of the following multiply and accumulate instruction.
+(define_bypass 1 "cortex_a7_mul"
+ "cortex_a7_mul"
+ "arm_mac_accumulator_is_result")
+
;; The latency depends on the operands, so we use an estimate here.
(define_insn_reservation "cortex_a7_idiv" 5
(and (eq_attr "tune" "cortexa7")
@@ -264,6 +270,10 @@
neon_fp_vmla_qqq_scalar"))
"cortex_a7_both+cortex_a7_fpmul_pipe")
+(define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla"
+ "cortex_a7_fpmacs,cortex_a7_neon_mla"
+ "arm_mac_accumulator_is_result")
+
;; Non-multiply instructions can issue between two cycles of a
;; double-precision multiply.
@@ -285,6 +295,10 @@
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4")
+(define_bypass 7 "cortex_a7_fpmacd"
+ "cortex_a7_fpmacd,cortex_a7_fpfmad"
+ "arm_mac_accumulator_is_result")
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point divide/square root instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
OpenPOWER on IntegriCloud