summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDoug Gilbert <dgilbert@us.ibm.com>2015-04-28 15:40:30 -0500
committerPatrick Williams <patrick@stwcx.xyz>2016-08-15 11:45:09 -0500
commitdcca4f047e20f3872dc02b86703109c62e58298c (patch)
treef1f991961c94b572870721ef50bf9281adf3fd22
parent9869efd8bf59fa3e95c77640fce5831673ca57af (diff)
downloadppe42-gcc-dcca4f047e20f3872dc02b86703109c62e58298c.tar.gz
ppe42-gcc-dcca4f047e20f3872dc02b86703109c62e58298c.zip
64 bit load/store peepholes
-rw-r--r--gcc/config/rs6000/rs6000-protos.h1
-rw-r--r--gcc/config/rs6000/rs6000.c152
-rw-r--r--gcc/config/rs6000/rs6000.md48
3 files changed, 188 insertions, 13 deletions
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 62e6c558b5c..90af4f1baa4 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -223,4 +223,5 @@ extern unsigned char rs6000_class_max_nregs[][LIM_REG_CLASSES];
extern unsigned char rs6000_hard_regno_nregs[][FIRST_PSEUDO_REGISTER];
extern bool rs6000_linux_float_exceptions_rounding_supported_p (void);
+extern bool mem_contiguous(rtx, rtx);
#endif /* rs6000-protos.h */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 889b9885219..ff292750628 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -23402,13 +23402,33 @@ rs6000_emit_prologue (void)
if ((strategy & SAVE_INLINE_GPRS))
{
- for (i = 0; i < 32 - info->first_gp_reg_save; i++)
- if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
- emit_frame_save (spe_save_area_ptr, reg_mode,
- info->first_gp_reg_save + i,
- (info->spe_gp_save_offset + save_off
- + reg_size * i),
- sp_off - save_off);
+ // ppe42 - use 64 bit stores - No evidence that this gained anything
+ i = 0;
+ if((info->first_gp_reg_save & 0x01) == 1) // odd reg num
+ {
+ if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save))
+ emit_frame_save (spe_save_area_ptr,
+ reg_mode,
+ info->first_gp_reg_save,
+ (info->spe_gp_save_offset + save_off),
+ sp_off - save_off);
+ i = 1;
+ }
+ for(;i < 32 - info->first_gp_reg_save; i += 2)
+ if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+ emit_frame_save (spe_save_area_ptr, DImode,
+ info->first_gp_reg_save + i,
+ (info->spe_gp_save_offset + save_off
+ + reg_size * i),
+ sp_off - save_off);
+
+ //for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+ // if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+ // emit_frame_save (spe_save_area_ptr, reg_mode,
+ // info->first_gp_reg_save + i,
+ // (info->spe_gp_save_offset + save_off
+ // + reg_size * i),
+ // sp_off - save_off);
}
else
{
@@ -23489,12 +23509,32 @@ rs6000_emit_prologue (void)
else if (!WORLD_SAVE_P (info))
{
int i;
- for (i = 0; i < 32 - info->first_gp_reg_save; i++)
- if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
- emit_frame_save (frame_reg_rtx, reg_mode,
- info->first_gp_reg_save + i,
- info->gp_save_offset + frame_off + reg_size * i,
- sp_off - frame_off);
+
+ // ppe42 save using 64-bit stores
+ i = 0;
+ if((info->first_gp_reg_save & 0x1) == 1) // odd regnum
+ {
+ if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save))
+ emit_frame_save (frame_reg_rtx, reg_mode,
+ info->first_gp_reg_save,
+ info->gp_save_offset + frame_off,
+ sp_off - frame_off);
+ i = 1;
+ }
+
+ for (; i < 32 - info->first_gp_reg_save; i += 2)
+ if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+ emit_frame_save (frame_reg_rtx, DImode,
+ info->first_gp_reg_save + i,
+ info->gp_save_offset + frame_off + reg_size * i,
+ sp_off - frame_off);
+
+// for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+// if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+// emit_frame_save (frame_reg_rtx, reg_mode,
+// info->first_gp_reg_save + i,
+// info->gp_save_offset + frame_off + reg_size * i,
+// sp_off - frame_off);
}
if (crtl->calls_eh_return)
@@ -24921,12 +24961,46 @@ rs6000_emit_epilogue (int sibcall)
}
else
{
+ // ppe42 - use 64 bit loads
+ i = 0;
+ if((info->first_gp_reg_save & 0x1) == 1) // odd reg
+ {
+ if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save))
+ emit_insn (gen_frame_load
+ (gen_rtx_REG (reg_mode, info->first_gp_reg_save),
+ frame_reg_rtx,
+ info->gp_save_offset + frame_off));
+ i = 1;
+ }
+ reg_mode = DImode;
+ for(; i < 32 - info->first_gp_reg_save; i += 2)
+ {
+ if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+ {
+ emit_insn
+ (gen_rtx_SET
+ (VOIDmode,
+ gen_rtx_REG(reg_mode, info->first_gp_reg_save + i),
+ gen_frame_mem(reg_mode,
+ gen_rtx_PLUS
+ (Pmode,
+ frame_reg_rtx,
+ GEN_INT(info->gp_save_offset +
+ frame_off +
+ reg_size * i)
+ ))));
+
+ }
+ }
+ reg_mode = Pmode;
+/*
for (i = 0; i < 32 - info->first_gp_reg_save; i++)
if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
emit_insn (gen_frame_load
(gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
frame_reg_rtx,
info->gp_save_offset + frame_off + reg_size * i));
+ */
}
if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
@@ -33233,6 +33307,58 @@ emit_fusion_gpr_load (rtx target, rtx mem)
return "";
}
+bool mem_contiguous(rtx mem1, rtx mem2)
+{
+ bool result = false;
+ int regno1 = -1;
+ int regno2 = -2;
+
+ int offset1 = -1;
+ int offset2 = -2;
+
+ debug_rtx(mem1);
+ debug_rtx(mem2);
+ int code = GET_CODE(XEXP(mem1,0));
+ if(code == PLUS)
+ {
+ if(GET_CODE(XEXP(XEXP(mem1,0),0)) == REG)
+ {
+ regno1 = REGNO(XEXP(XEXP(mem1,0),0));
+ if ( GET_CODE(XEXP(XEXP(mem1,0),1)) == CONST_INT)
+ offset1 = INTVAL(XEXP(XEXP(mem1,0),1));
+ }
+ }
+ else if (code == REG)
+ {
+ regno1 = REGNO(XEXP(mem1,0));
+ offset1 = 0;
+ }
+
+ code = GET_CODE(XEXP(mem2,0));
+ if(code == PLUS)
+ {
+ if(GET_CODE(XEXP(XEXP(mem2,0),0)) == REG)
+ {
+ regno2 = REGNO(XEXP(XEXP(mem2,0),0));
+ if ( GET_CODE(XEXP(XEXP(mem2,0),1)) == CONST_INT)
+ offset2 = INTVAL(XEXP(XEXP(mem2,0),1));
+ }
+ }
+ else if (code == REG)
+ {
+ regno2 = REGNO(XEXP(mem2,0));
+ offset2 = 0;
+ }
+ if((regno1 == regno2) && // same base reg
+ ((offset1 & 0x7) == 0) && // 8 byte aligned
+ ((offset1+4) == offset2)) // contiguous memory
+ {
+ result = true;
+ }
+ fprintf(stderr,"Return %s\n",(result ? "true":"false"));
+ return result;
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 42f23ff19ce..f85327ae357 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -13504,6 +13504,54 @@
(const_int 0)))]
"")
+
+;; ppe convert 2 si moves to one di move
+;; gpr numbers in sequence
+;; mem base reg the same
+;; first mem offset is 8 byte aligned
+;; second mem offset == first mem offset + 4
+;; FIXME
+(define_peephole
+ [(set (match_operand:SI 0 "gpc_reg_operand" "r")
+ (match_operand:SI 1 "offsettable_mem_operand" "m"))
+ (set (match_operand:SI 2 "gpc_reg_operand" "r")
+ (match_operand:SI 3 "offsettable_mem_operand" "m"))]
+"((REGNO(operands[0]) + 1) == REGNO(operands[2])) &&
+ mem_contiguous(operands[1],operands[3])"
+"lvd %0, %1 #peephole %0 %1 %2 %3"
+[(set_attr "type" "load")])
+
+(define_peephole
+ [(set (match_operand:SI 0 "gpc_reg_operand" "r")
+ (match_operand:SI 1 "offsettable_mem_operand" "m"))
+ (set (match_operand:SI 2 "gpc_reg_operand" "r")
+ (match_operand:SI 3 "offsettable_mem_operand" "m"))]
+"((REGNO(operands[0]) - 1) == REGNO(operands[2])) &&
+ mem_contiguous(operands[3],operands[1])"
+"lvd %2, %3 #peephole %0 %1 %2 %3"
+[(set_attr "type" "load")])
+
+(define_peephole
+ [(set (match_operand:SI 0 "offsettable_mem_operand" "m")
+ (match_operand:SI 1 "gpc_reg_operand" "r"))
+ (set (match_operand:SI 2 "offsettable_mem_operand" "m")
+ (match_operand:SI 3 "gpc_reg_operand" "r"))]
+"((REGNO(operands[1]) + 1) == REGNO(operands[3])) &&
+ mem_contiguous(operands[0],operands[2])"
+"stvd %1, %0 # peephole %0 %1 %2 %3"
+[(set_attr "type" "store")])
+
+(define_peephole
+ [(set (match_operand:SI 0 "offsettable_mem_operand" "m")
+ (match_operand:SI 1 "gpc_reg_operand" "r"))
+ (set (match_operand:SI 2 "offsettable_mem_operand" "m")
+ (match_operand:SI 3 "gpc_reg_operand" "r"))]
+"((REGNO(operands[1]) - 1) == REGNO(operands[3])) &&
+ mem_contiguous(operands[2],operands[0])"
+"stvd %3, %2 # peephole %0 %1 %2 %3"
+[(set_attr "type" "store")])
+
+
;; There is a 3 cycle delay between consecutive mfcr instructions
;; so it is useful to combine 2 scc instructions to use only one mfcr.
OpenPOWER on IntegriCloud