From a0776ec8e97bf109e7d973d09fc3e1814eb32bfb Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Fri, 13 Oct 2006 10:05:45 -0700 Subject: [IA64] remove per-cpu ia64_phys_stacked_size_p8 It's not efficient to use a per-cpu variable just to store how many physical stack register a cpu has. Ever since the incarnation of ia64 up till upcoming Montecito processor, that variable has "glued" to 96. Having a variable in memory means that the kernel is burning an extra cacheline access on every syscall and kernel exit path. Such "static" value is better served with the instruction patching utility exists today. Convert ia64_phys_stacked_size_p8 into dynamic insn patching. This also has a pleasant side effect of eliminating access to per-cpu area while psr.ic=0 in the kernel exit path. (fixable for per-cpu DTC work, but why bother?) There are some concerns with the default value that the instruc- tion encoded in the kernel image. It shouldn't be concerned. The reasons are: (1) cpu_init() is called at CPU initialization. In there, we find out physical stack register size from PAL and patch two instructions in kernel exit code. The code in question can not be executed before the patching is done. (2) current implementation stores zero in ia64_phys_stacked_size_p8, and that's what the current kernel exit path loads the value with. With the new code, it is equivalent that we store reg size 96 in ia64_phys_stacked_size_p8, thus creating a better safety net. Given (1) above can never fail, having (2) is just a bonus. All in all, this patch allow one less memory reference in the kernel exit path, thus reducing syscall and interrupt return latency; and avoid polluting potential useful data in the CPU cache. Signed-off-by: Ken Chen Signed-off-by: Tony Luck --- include/asm-ia64/asmmacro.h | 10 ++++++++++ include/asm-ia64/patch.h | 1 + include/asm-ia64/processor.h | 1 + include/asm-ia64/sections.h | 1 + 4 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h index c22b4658fc61..c1642fd64029 100644 --- a/include/asm-ia64/asmmacro.h +++ b/include/asm-ia64/asmmacro.h @@ -103,6 +103,16 @@ name: # define FSYS_RETURN br.ret.sptk.many b6 #endif +/* + * If physical stack register size is different from DEF_NUM_STACK_REG, + * dynamically patch the kernel for correct size. + */ + .section ".data.patch.phys_stack_reg", "a" + .previous +#define LOAD_PHYS_STACK_REG_SIZE(reg) \ +[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \ + .xdata4 ".data.patch.phys_stack_reg", 1b-. + /* * Up until early 2004, use of .align within a function caused bad unwind info. * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing diff --git a/include/asm-ia64/patch.h b/include/asm-ia64/patch.h index 4797f3535e6d..a71543084fb4 100644 --- a/include/asm-ia64/patch.h +++ b/include/asm-ia64/patch.h @@ -20,6 +20,7 @@ extern void ia64_patch_imm60 (u64 insn_addr, u64 val); /* patch "brl" w/ip-rel extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end); extern void ia64_patch_vtop (unsigned long start, unsigned long end); +extern void ia64_patch_phys_stack_reg(unsigned long val); extern void ia64_patch_gate (void); #endif /* _ASM_IA64_PATCH_H */ diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 5830d36fd8e6..88c728b9ff45 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -19,6 +19,7 @@ #include #include +#define IA64_NUM_PHYS_STACK_REG 96 #define IA64_NUM_DBG_REGS 8 #define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000) diff --git a/include/asm-ia64/sections.h b/include/asm-ia64/sections.h index e9eb7f62d32b..dc42a359894f 100644 --- a/include/asm-ia64/sections.h +++ b/include/asm-ia64/sections.h @@ -11,6 +11,7 @@ extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[]; extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; +extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[]; extern char __start_gate_section[]; extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[]; extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[]; -- cgit v1.2.1