diff options
author | Doug Gilbert <dgilbert@us.ibm.com> | 2015-04-28 15:40:30 -0500 |
---|---|---|
committer | Patrick Williams <patrick@stwcx.xyz> | 2016-08-15 11:45:09 -0500 |
commit | dcca4f047e20f3872dc02b86703109c62e58298c (patch) | |
tree | f1f991961c94b572870721ef50bf9281adf3fd22 | |
parent | 9869efd8bf59fa3e95c77640fce5831673ca57af (diff) | |
download | ppe42-gcc-dcca4f047e20f3872dc02b86703109c62e58298c.tar.gz ppe42-gcc-dcca4f047e20f3872dc02b86703109c62e58298c.zip |
64 bit load/store peepholes
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 152 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 48 |
3 files changed, 188 insertions, 13 deletions
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 62e6c558b5c..90af4f1baa4 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -223,4 +223,5 @@ extern unsigned char rs6000_class_max_nregs[][LIM_REG_CLASSES]; extern unsigned char rs6000_hard_regno_nregs[][FIRST_PSEUDO_REGISTER]; extern bool rs6000_linux_float_exceptions_rounding_supported_p (void); +extern bool mem_contiguous(rtx, rtx); #endif /* rs6000-protos.h */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 889b9885219..ff292750628 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -23402,13 +23402,33 @@ rs6000_emit_prologue (void) if ((strategy & SAVE_INLINE_GPRS)) { - for (i = 0; i < 32 - info->first_gp_reg_save; i++) - if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) - emit_frame_save (spe_save_area_ptr, reg_mode, - info->first_gp_reg_save + i, - (info->spe_gp_save_offset + save_off - + reg_size * i), - sp_off - save_off); + // ppe42 - use 64 bit stores - No evidence that this gained anything + i = 0; + if((info->first_gp_reg_save & 0x01) == 1) // odd reg num + { + if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save)) + emit_frame_save (spe_save_area_ptr, + reg_mode, + info->first_gp_reg_save, + (info->spe_gp_save_offset + save_off), + sp_off - save_off); + i = 1; + } + for(;i < 32 - info->first_gp_reg_save; i += 2) + if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) + emit_frame_save (spe_save_area_ptr, DImode, + info->first_gp_reg_save + i, + (info->spe_gp_save_offset + save_off + + reg_size * i), + sp_off - save_off); + + //for (i = 0; i < 32 - info->first_gp_reg_save; i++) + // if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) + // emit_frame_save (spe_save_area_ptr, reg_mode, + // info->first_gp_reg_save + i, + // (info->spe_gp_save_offset + save_off + // + reg_size * i), + // sp_off - save_off); } else { @@ -23489,12 +23509,32 @@ rs6000_emit_prologue (void) else if (!WORLD_SAVE_P (info)) { int i; - for (i = 0; i < 32 - info->first_gp_reg_save; i++) - if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) - emit_frame_save (frame_reg_rtx, reg_mode, - info->first_gp_reg_save + i, - info->gp_save_offset + frame_off + reg_size * i, - sp_off - frame_off); + + // ppe42 save using 64-bit stores + i = 0; + if((info->first_gp_reg_save & 0x1) == 1) // odd regnum + { + if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save)) + emit_frame_save (frame_reg_rtx, reg_mode, + info->first_gp_reg_save, + info->gp_save_offset + frame_off, + sp_off - frame_off); + i = 1; + } + + for (; i < 32 - info->first_gp_reg_save; i += 2) + if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) + emit_frame_save (frame_reg_rtx, DImode, + info->first_gp_reg_save + i, + info->gp_save_offset + frame_off + reg_size * i, + sp_off - frame_off); + +// for (i = 0; i < 32 - info->first_gp_reg_save; i++) +// if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) +// emit_frame_save (frame_reg_rtx, reg_mode, +// info->first_gp_reg_save + i, +// info->gp_save_offset + frame_off + reg_size * i, +// sp_off - frame_off); } if (crtl->calls_eh_return) @@ -24921,12 +24961,46 @@ rs6000_emit_epilogue (int sibcall) } else { + // ppe42 - use 64 bit loads + i = 0; + if((info->first_gp_reg_save & 0x1) == 1) // odd reg + { + if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save)) + emit_insn (gen_frame_load + (gen_rtx_REG (reg_mode, info->first_gp_reg_save), + frame_reg_rtx, + info->gp_save_offset + frame_off)); + i = 1; + } + reg_mode = DImode; + for(; i < 32 - info->first_gp_reg_save; i += 2) + { + if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) + { + emit_insn + (gen_rtx_SET + (VOIDmode, + gen_rtx_REG(reg_mode, info->first_gp_reg_save + i), + gen_frame_mem(reg_mode, + gen_rtx_PLUS + (Pmode, + frame_reg_rtx, + GEN_INT(info->gp_save_offset + + frame_off + + reg_size * i) + )))); + + } + } + reg_mode = Pmode; +/* for (i = 0; i < 32 - info->first_gp_reg_save; i++) if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) emit_insn (gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), frame_reg_rtx, info->gp_save_offset + frame_off + reg_size * i)); + */ } if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) @@ -33233,6 +33307,58 @@ emit_fusion_gpr_load (rtx target, rtx mem) return ""; } +bool mem_contiguous(rtx mem1, rtx mem2) +{ + bool result = false; + int regno1 = -1; + int regno2 = -2; + + int offset1 = -1; + int offset2 = -2; + + debug_rtx(mem1); + debug_rtx(mem2); + int code = GET_CODE(XEXP(mem1,0)); + if(code == PLUS) + { + if(GET_CODE(XEXP(XEXP(mem1,0),0)) == REG) + { + regno1 = REGNO(XEXP(XEXP(mem1,0),0)); + if ( GET_CODE(XEXP(XEXP(mem1,0),1)) == CONST_INT) + offset1 = INTVAL(XEXP(XEXP(mem1,0),1)); + } + } + else if (code == REG) + { + regno1 = REGNO(XEXP(mem1,0)); + offset1 = 0; + } + + code = GET_CODE(XEXP(mem2,0)); + if(code == PLUS) + { + if(GET_CODE(XEXP(XEXP(mem2,0),0)) == REG) + { + regno2 = REGNO(XEXP(XEXP(mem2,0),0)); + if ( GET_CODE(XEXP(XEXP(mem2,0),1)) == CONST_INT) + offset2 = INTVAL(XEXP(XEXP(mem2,0),1)); + } + } + else if (code == REG) + { + regno2 = REGNO(XEXP(mem2,0)); + offset2 = 0; + } + if((regno1 == regno2) && // same base reg + ((offset1 & 0x7) == 0) && // 8 byte aligned + ((offset1+4) == offset2)) // contiguous memory + { + result = true; + } + fprintf(stderr,"Return %s\n",(result ? "true":"false")); + return result; +} + struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 42f23ff19ce..f85327ae357 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -13504,6 +13504,54 @@ (const_int 0)))] "") + +;; ppe convert 2 si moves to one di move +;; gpr numbers in sequence +;; mem base reg the same +;; first mem offset is 8 byte aligned +;; second mem offset == first mem offset + 4 +;; FIXME +(define_peephole + [(set (match_operand:SI 0 "gpc_reg_operand" "r") + (match_operand:SI 1 "offsettable_mem_operand" "m")) + (set (match_operand:SI 2 "gpc_reg_operand" "r") + (match_operand:SI 3 "offsettable_mem_operand" "m"))] +"((REGNO(operands[0]) + 1) == REGNO(operands[2])) && + mem_contiguous(operands[1],operands[3])" +"lvd %0, %1 #peephole %0 %1 %2 %3" +[(set_attr "type" "load")]) + +(define_peephole + [(set (match_operand:SI 0 "gpc_reg_operand" "r") + (match_operand:SI 1 "offsettable_mem_operand" "m")) + (set (match_operand:SI 2 "gpc_reg_operand" "r") + (match_operand:SI 3 "offsettable_mem_operand" "m"))] +"((REGNO(operands[0]) - 1) == REGNO(operands[2])) && + mem_contiguous(operands[3],operands[1])" +"lvd %2, %3 #peephole %0 %1 %2 %3" +[(set_attr "type" "load")]) + +(define_peephole + [(set (match_operand:SI 0 "offsettable_mem_operand" "m") + (match_operand:SI 1 "gpc_reg_operand" "r")) + (set (match_operand:SI 2 "offsettable_mem_operand" "m") + (match_operand:SI 3 "gpc_reg_operand" "r"))] +"((REGNO(operands[1]) + 1) == REGNO(operands[3])) && + mem_contiguous(operands[0],operands[2])" +"stvd %1, %0 # peephole %0 %1 %2 %3" +[(set_attr "type" "store")]) + +(define_peephole + [(set (match_operand:SI 0 "offsettable_mem_operand" "m") + (match_operand:SI 1 "gpc_reg_operand" "r")) + (set (match_operand:SI 2 "offsettable_mem_operand" "m") + (match_operand:SI 3 "gpc_reg_operand" "r"))] +"((REGNO(operands[1]) - 1) == REGNO(operands[3])) && + mem_contiguous(operands[2],operands[0])" +"stvd %3, %2 # peephole %0 %1 %2 %3" +[(set_attr "type" "store")]) + + ;; There is a 3 cycle delay between consecutive mfcr instructions ;; so it is useful to combine 2 scc instructions to use only one mfcr. |