diff options
author | gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-01-29 18:39:43 +0000 |
---|---|---|
committer | gretay <gretay@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-01-29 18:39:43 +0000 |
commit | eca9ccc83be58342989d2f3768b4e058e3eb6c09 (patch) | |
tree | 25ac4c0c62938b185230ab636503df43f102733b | |
parent | fd923cfd5910b380cedbdee9983053c7b5e39737 (diff) | |
download | ppe42-gcc-eca9ccc83be58342989d2f3768b4e058e3eb6c09.tar.gz ppe42-gcc-eca9ccc83be58342989d2f3768b4e058e3eb6c09.zip |
2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/arm-protos.h (arm_mac_accumulator_is_result): New
declaration.
* config/arm/arm.c (arm_mac_accumulator_is_result): New function.
* config/arm/cortex-a7.md: New bypasses using
arm_mac_accumulator_is_result.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@195553 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 56 | ||||
-rw-r--r-- | gcc/config/arm/cortex-a7.md | 14 |
4 files changed, 79 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 669185909fa..a40f35c9298 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,13 @@ 2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com> + * config/arm/arm-protos.h (arm_mac_accumulator_is_result): New + declaration. + * config/arm/arm.c (arm_mac_accumulator_is_result): New function. + * config/arm/cortex-a7.md: New bypasses using + arm_mac_accumulator_is_result. + +2013-01-29 Greta Yorsh <Greta.Yorsh@arm.com> + * config/arm/cortex-a7.md (cortex_a7_neon_mul): New reservation. (cortex_a7_neon_mla): Likewise. (cortex_a7_fpfmad): New reservation. diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 83c8d4bb3de..e9f74dc17a2 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -101,6 +101,7 @@ extern int arm_early_load_addr_dep (rtx, rtx); extern int arm_no_early_alu_shift_dep (rtx, rtx); extern int arm_no_early_alu_shift_value_dep (rtx, rtx); extern int arm_no_early_mul_dep (rtx, rtx); +extern int arm_mac_accumulator_is_result (rtx, rtx); extern int arm_mac_accumulator_is_mul_result (rtx, rtx); extern int tls_mentioned_p (rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 9d3981d5e48..0aa24b1796b 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -24608,6 +24608,62 @@ arm_cxx_guard_type (void) return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node; } +/* Return non-zero iff the consumer (a multiply-accumulate or a + multiple-subtract instruction) has an accumulator dependency on the + result of the producer and no other dependency on that result. It + does not check if the producer is multiply-accumulate instruction. */ +int +arm_mac_accumulator_is_result (rtx producer, rtx consumer) +{ + rtx result; + rtx op0, op1, acc; + + producer = PATTERN (producer); + consumer = PATTERN (consumer); + + if (GET_CODE (producer) == COND_EXEC) + producer = COND_EXEC_CODE (producer); + if (GET_CODE (consumer) == COND_EXEC) + consumer = COND_EXEC_CODE (consumer); + + if (GET_CODE (producer) != SET) + return 0; + + result = XEXP (producer, 0); + + if (GET_CODE (consumer) != SET) + return 0; + + /* Check that the consumer is of the form + (set (...) (plus (mult ...) (...))) + or + (set (...) (minus (...) (mult ...))). */ + if (GET_CODE (XEXP (consumer, 1)) == PLUS) + { + if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT) + return 0; + + op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0); + op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1); + acc = XEXP (XEXP (consumer, 1), 1); + } + else if (GET_CODE (XEXP (consumer, 1)) == MINUS) + { + if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT) + return 0; + + op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0); + op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1); + acc = XEXP (XEXP (consumer, 1), 0); + } + else + return 0; + + return (reg_overlap_mentioned_p (result, acc) + && !reg_overlap_mentioned_p (result, op0) + && !reg_overlap_mentioned_p (result, op1)); +} + /* Return non-zero if the consumer (a multiply-accumulate instruction) has an accumulator dependency on the result of the producer (a multiplication instruction) and no other dependency on that result. */ diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md index a55efeb2e34..a8d4432e508 100644 --- a/gcc/config/arm/cortex-a7.md +++ b/gcc/config/arm/cortex-a7.md @@ -137,6 +137,12 @@ (eq_attr "neon_type" "none"))) "cortex_a7_both") +;; Forward the result of a multiply operation to the accumulator +;; of the following multiply and accumulate instruction. +(define_bypass 1 "cortex_a7_mul" + "cortex_a7_mul" + "arm_mac_accumulator_is_result") + ;; The latency depends on the operands, so we use an estimate here. (define_insn_reservation "cortex_a7_idiv" 5 (and (eq_attr "tune" "cortexa7") @@ -264,6 +270,10 @@ neon_fp_vmla_qqq_scalar")) "cortex_a7_both+cortex_a7_fpmul_pipe") +(define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla" + "cortex_a7_fpmacs,cortex_a7_neon_mla" + "arm_mac_accumulator_is_result") + ;; Non-multiply instructions can issue between two cycles of a ;; double-precision multiply. @@ -285,6 +295,10 @@ (eq_attr "neon_type" "none"))) "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4") +(define_bypass 7 "cortex_a7_fpmacd" + "cortex_a7_fpmacd,cortex_a7_fpfmad" + "arm_mac_accumulator_is_result") + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Floating-point divide/square root instructions. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |