diff options
author | uweigand <uweigand@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-11-13 13:31:50 +0000 |
---|---|---|
committer | uweigand <uweigand@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-11-13 13:31:50 +0000 |
commit | aa06c51c6b3ae85533c8c36031c0694496dd9372 (patch) | |
tree | 5bb8c5af3bd87edc10bb08d1f3b4154d65894d09 /gcc/config/arm/neon.md | |
parent | f7691b1e4525c071a1d0caab32a4ec2017220fc3 (diff) | |
download | ppe42-gcc-aa06c51c6b3ae85533c8c36031c0694496dd9372.tar.gz ppe42-gcc-aa06c51c6b3ae85533c8c36031c0694496dd9372.zip |
2012-11-13 Andrew Stubbs <ams@codesourcery.com>
Ulrich Weigand <ulrich.weigand@linaro.org>
* config/arm/arm.c (arm_emit_coreregs_64bit_shift): Fix comment.
* config/arm/arm.md (opt, opt_enabled): New attributes.
(enabled): Use opt_enabled.
(ashldi3, ashrdi3, lshrdi3): Add TARGET_NEON case.
(ashldi3): Allow general operands for TARGET_NEON case.
* config/arm/iterators.md (rshifts): New code iterator.
(shift, shifttype): New code attributes.
* config/arm/neon.md (UNSPEC_LOAD_COUNT): New unspec type.
(neon_load_count, ashldi3_neon_noclobber, ashldi3_neon,
signed_shift_di3_neon, unsigned_shift_di3_neon,
ashrdi3_neon_imm_noclobber, lshrdi3_neon_imm_noclobber,
<shift>di3_neon): New patterns.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@193476 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/arm/neon.md')
-rw-r--r-- | gcc/config/arm/neon.md | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 92e03b09760..21035808c0e 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -23,6 +23,7 @@ (define_c_enum "unspec" [ UNSPEC_ASHIFT_SIGNED UNSPEC_ASHIFT_UNSIGNED + UNSPEC_LOAD_COUNT UNSPEC_VABD UNSPEC_VABDL UNSPEC_VADD @@ -1200,6 +1201,189 @@ DONE; }) +;; 64-bit shifts + +;; This pattern loads a 32-bit shift count into a 64-bit NEON register, +;; leaving the upper half uninitalized. This is OK since the shift +;; instruction only looks at the low 8 bits anyway. To avoid confusing +;; data flow analysis however, we pretend the full register is set +;; using an unspec. +(define_insn "neon_load_count" + [(set (match_operand:DI 0 "s_register_operand" "=w,w") + (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] + UNSPEC_LOAD_COUNT))] + "TARGET_NEON" + "@ + vld1.32\t{%P0[0]}, %A1 + vmov.32\t%P0[0], %1" + [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")] +) + +(define_insn "ashldi3_neon_noclobber" + [(set (match_operand:DI 0 "s_register_operand" "=w,w") + (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w") + (match_operand:DI 2 "reg_or_int_operand" " i,w")))] + "TARGET_NEON && reload_completed + && (!CONST_INT_P (operands[2]) + || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))" + "@ + vshl.u64\t%P0, %P1, %2 + vshl.u64\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_vshl_ddd,neon_vshl_ddd")] +) + +(define_insn_and_split "ashldi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w") + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w") + (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i"))) + (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X")) + (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X")) + (clobber (reg:CC_C CC_REGNUM))] + "TARGET_NEON" + "#" + "TARGET_NEON && reload_completed" + [(const_int 0)] + " + { + if (IS_VFP_REGNUM (REGNO (operands[0]))) + { + if (CONST_INT_P (operands[2])) + { + if (INTVAL (operands[2]) < 1) + { + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + else if (INTVAL (operands[2]) > 63) + operands[2] = gen_rtx_CONST_INT (VOIDmode, 63); + } + else + { + emit_insn (gen_neon_load_count (operands[5], operands[2])); + operands[2] = operands[5]; + } + + /* Ditch the unnecessary clobbers. */ + emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1], + operands[2])); + } + else + { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1) + /* This clobbers CC. */ + emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); + else + arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], + operands[2], operands[3], operands[4]); + } + DONE; + }" + [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8") + (set_attr "opt" "*,*,speed,speed,*,*")] +) + +; The shift amount needs to be negated for right-shifts +(define_insn "signed_shift_di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") + (match_operand:DI 2 "s_register_operand" " w")] + UNSPEC_ASHIFT_SIGNED))] + "TARGET_NEON && reload_completed" + "vshl.s64\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_vshl_ddd")] +) + +; The shift amount needs to be negated for right-shifts +(define_insn "unsigned_shift_di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") + (match_operand:DI 2 "s_register_operand" " w")] + UNSPEC_ASHIFT_UNSIGNED))] + "TARGET_NEON && reload_completed" + "vshl.u64\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_vshl_ddd")] +) + +(define_insn "ashrdi3_neon_imm_noclobber" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w") + (match_operand:DI 2 "const_int_operand" " i")))] + "TARGET_NEON && reload_completed + && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" + "vshr.s64\t%P0, %P1, %2" + [(set_attr "neon_type" "neon_vshl_ddd")] +) + +(define_insn "lshrdi3_neon_imm_noclobber" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w") + (match_operand:DI 2 "const_int_operand" " i")))] + "TARGET_NEON && reload_completed + && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" + "vshr.u64\t%P0, %P1, %2" + [(set_attr "neon_type" "neon_vshl_ddd")] +) + +;; ashrdi3_neon +;; lshrdi3_neon +(define_insn_and_split "<shift>di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") + (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) + (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) + (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" + "#" + "TARGET_NEON && reload_completed" + [(const_int 0)] + " + { + if (IS_VFP_REGNUM (REGNO (operands[0]))) + { + if (CONST_INT_P (operands[2])) + { + if (INTVAL (operands[2]) < 1) + { + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + else if (INTVAL (operands[2]) > 64) + operands[2] = gen_rtx_CONST_INT (VOIDmode, 64); + + /* Ditch the unnecessary clobbers. */ + emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0], + operands[1], + operands[2])); + } + else + { + /* We must use a negative left-shift. */ + emit_insn (gen_negsi2 (operands[3], operands[2])); + emit_insn (gen_neon_load_count (operands[5], operands[3])); + emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1], + operands[5])); + } + } + else + { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1) + /* This clobbers CC. */ + emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); + else + /* This clobbers CC (ASHIFTRT by register only). */ + arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1], + operands[2], operands[3], operands[4]); + } + + DONE; + }" + [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8") + (set_attr "opt" "*,*,speed,speed,*,*")] +) + ;; Widening operations (define_insn "widen_ssum<mode>3" |