diff options
author | Paul Mackerras <paulus@samba.org> | 2010-08-26 19:56:43 +0000 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2010-09-02 14:07:31 +1000 |
commit | cf9efce0ce3136fa076f53e53154e98455229514 (patch) | |
tree | 0e110018b160aff4813b81e0e8c3a43a364edd48 /arch/powerpc/kernel | |
parent | 93c22703efa72c7527dbd586d1951c1f4a85fd70 (diff) | |
download | blackbird-op-linux-cf9efce0ce3136fa076f53e53154e98455229514.tar.gz blackbird-op-linux-cf9efce0ce3136fa076f53e53154e98455229514.zip |
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kernel/entry_64.S | 18 | ||||
-rw-r--r-- | arch/powerpc/kernel/process.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/smp.c | 5 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 268 |
5 files changed, 150 insertions, 150 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1c0607ddccc0..c63494090854 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -181,17 +181,19 @@ int main(void) offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); DEFINE(SLBSHADOW_STACKESID, offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); + DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); - DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); + DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); + DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); - DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); - DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); + DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); + DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 4d5fa12ca6e8..d82878c4daa6 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -97,6 +97,24 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ +#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +BEGIN_FW_FTR_SECTION + beq 33f + /* if from user, see if there are any DTL entries to process */ + ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */ + ld r11,PACA_DTL_RIDX(r13) /* get log read index */ + ld r10,LPPACA_DTLIDX(r10) /* get log write index */ + cmpd cr1,r11,r10 + beq+ cr1,33f + bl .accumulate_stolen_time + REST_GPR(0,r1) + REST_4GPRS(3,r1) + REST_2GPRS(7,r1) + addi r9,r1,STACK_FRAME_OVERHEAD +33: +END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ + #ifdef CONFIG_TRACE_IRQFLAGS bl .trace_hardirqs_on REST_GPR(0,r1) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 37bc8ff16cac..84906d3fc860 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -517,7 +517,6 @@ struct task_struct *__switch_to(struct task_struct *prev, account_system_vtime(current); account_process_vtime(current); - calculate_steal_time(); /* * We can't take a PMU exception inside _switch() since there is a diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9019f0f1bb5e..68034bbf2e4f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -508,9 +508,6 @@ int __devinit start_secondary(void *unused) if (smp_ops->take_timebase) smp_ops->take_timebase(); - if (system_state > SYSTEM_BOOTING) - snapshot_timebase(); - secondary_cpu_time_init(); ipi_call_lock(); @@ -575,8 +572,6 @@ void __init smp_cpus_done(unsigned int max_cpus) free_cpumask_var(old_mask); - snapshot_timebases(); - dump_numa_cpu_topology(); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 8533b3b83f5d..fca20643c368 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -164,8 +164,6 @@ unsigned long ppc_proc_freq; EXPORT_SYMBOL(ppc_proc_freq); unsigned long ppc_tb_freq; -static DEFINE_PER_CPU(u64, last_jiffy); - #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Factors for converting from cputime_t (timebase ticks) to @@ -200,62 +198,151 @@ static void calc_cputime_factors(void) } /* - * Read the PURR on systems that have it, otherwise the timebase. + * Read the SPURR on systems that have it, otherwise the PURR, + * or if that doesn't exist return the timebase value passed in. */ -static u64 read_purr(void) +static u64 read_spurr(u64 tb) { + if (cpu_has_feature(CPU_FTR_SPURR)) + return mfspr(SPRN_SPURR); if (cpu_has_feature(CPU_FTR_PURR)) return mfspr(SPRN_PURR); - return mftb(); + return tb; } +#ifdef CONFIG_PPC_SPLPAR + /* - * Read the SPURR on systems that have it, otherwise the purr + * Scan the dispatch trace log and count up the stolen time. + * Should be called with interrupts disabled. */ -static u64 read_spurr(u64 purr) +static u64 scan_dispatch_log(u64 stop_tb) { - /* - * cpus without PURR won't have a SPURR - * We already know the former when we use this, so tell gcc - */ - if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR)) - return mfspr(SPRN_SPURR); - return purr; + unsigned long i = local_paca->dtl_ridx; + struct dtl_entry *dtl = local_paca->dtl_curr; + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + u64 tb_delta; + u64 stolen = 0; + u64 dtb; + + if (i == vpa->dtl_idx) + return 0; + while (i < vpa->dtl_idx) { + dtb = dtl->timebase; + tb_delta = dtl->enqueue_to_dispatch_time + + dtl->ready_to_enqueue_time; + barrier(); + if (i + N_DISPATCH_LOG < vpa->dtl_idx) { + /* buffer has overflowed */ + i = vpa->dtl_idx - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + if (dtb > stop_tb) + break; + stolen += tb_delta; + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + local_paca->dtl_ridx = i; + local_paca->dtl_curr = dtl; + return stolen; } /* + * Accumulate stolen time by scanning the dispatch trace log. + * Called on entry from user mode. + */ +void accumulate_stolen_time(void) +{ + u64 sst, ust; + + sst = scan_dispatch_log(get_paca()->starttime_user); + ust = scan_dispatch_log(get_paca()->starttime); + get_paca()->system_time -= sst; + get_paca()->user_time -= ust; + get_paca()->stolen_time += ust + sst; +} + +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + u64 stolen = 0; + + if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) { + stolen = scan_dispatch_log(stop_tb); + get_paca()->system_time -= stolen; + } + + stolen += get_paca()->stolen_time; + get_paca()->stolen_time = 0; + return stolen; +} + +#else /* CONFIG_PPC_SPLPAR */ +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + return 0; +} + +#endif /* CONFIG_PPC_SPLPAR */ + +/* * Account time for a transition between system, hard irq * or soft irq state. */ void account_system_vtime(struct task_struct *tsk) { - u64 now, nowscaled, delta, deltascaled, sys_time; + u64 now, nowscaled, delta, deltascaled; unsigned long flags; + u64 stolen, udelta, sys_scaled, user_scaled; local_irq_save(flags); - now = read_purr(); + now = mftb(); nowscaled = read_spurr(now); - delta = now - get_paca()->startpurr; + get_paca()->system_time += now - get_paca()->starttime; + get_paca()->starttime = now; deltascaled = nowscaled - get_paca()->startspurr; - get_paca()->startpurr = now; get_paca()->startspurr = nowscaled; - if (!in_interrupt()) { - /* deltascaled includes both user and system time. - * Hence scale it based on the purr ratio to estimate - * the system time */ - sys_time = get_paca()->system_time; - if (get_paca()->user_time) - deltascaled = deltascaled * sys_time / - (sys_time + get_paca()->user_time); - delta += sys_time; - get_paca()->system_time = 0; + + stolen = calculate_stolen_time(now); + + delta = get_paca()->system_time; + get_paca()->system_time = 0; + udelta = get_paca()->user_time - get_paca()->utime_sspurr; + get_paca()->utime_sspurr = get_paca()->user_time; + + /* + * Because we don't read the SPURR on every kernel entry/exit, + * deltascaled includes both user and system SPURR ticks. + * Apportion these ticks to system SPURR ticks and user + * SPURR ticks in the same ratio as the system time (delta) + * and user time (udelta) values obtained from the timebase + * over the same interval. The system ticks get accounted here; + * the user ticks get saved up in paca->user_time_scaled to be + * used by account_process_tick. + */ + sys_scaled = delta; + user_scaled = udelta; + if (deltascaled != delta + udelta) { + if (udelta) { + sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - sys_scaled; + } else { + sys_scaled = deltascaled; + } + } + get_paca()->user_time_scaled += user_scaled; + + if (in_irq() || idle_task(smp_processor_id()) != tsk) { + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); + } else { + account_idle_time(delta + stolen); } - if (in_irq() || idle_task(smp_processor_id()) != tsk) - account_system_time(tsk, 0, delta, deltascaled); - else - account_idle_time(delta); - __get_cpu_var(cputime_last_delta) = delta; - __get_cpu_var(cputime_scaled_last_delta) = deltascaled; local_irq_restore(flags); } EXPORT_SYMBOL_GPL(account_system_vtime); @@ -265,125 +352,26 @@ EXPORT_SYMBOL_GPL(account_system_vtime); * by the exception entry and exit code to the generic process * user and system time records. * Must be called with interrupts disabled. + * Assumes that account_system_vtime() has been called recently + * (i.e. since the last entry from usermode) so that + * get_paca()->user_time_scaled is up to date. */ void account_process_tick(struct task_struct *tsk, int user_tick) { cputime_t utime, utimescaled; utime = get_paca()->user_time; + utimescaled = get_paca()->user_time_scaled; get_paca()->user_time = 0; - utimescaled = cputime_to_scaled(utime); + get_paca()->user_time_scaled = 0; + get_paca()->utime_sspurr = 0; account_user_time(tsk, utime, utimescaled); } -/* - * Stuff for accounting stolen time. - */ -struct cpu_purr_data { - int initialized; /* thread is running */ - u64 tb; /* last TB value read */ - u64 purr; /* last PURR value read */ - u64 spurr; /* last SPURR value read */ -}; - -/* - * Each entry in the cpu_purr_data array is manipulated only by its - * "owner" cpu -- usually in the timer interrupt but also occasionally - * in process context for cpu online. As long as cpus do not touch - * each others' cpu_purr_data, disabling local interrupts is - * sufficient to serialize accesses. - */ -static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data); - -static void snapshot_tb_and_purr(void *data) -{ - unsigned long flags; - struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); - - local_irq_save(flags); - p->tb = get_tb_or_rtc(); - p->purr = mfspr(SPRN_PURR); - wmb(); - p->initialized = 1; - local_irq_restore(flags); -} - -/* - * Called during boot when all cpus have come up. - */ -void snapshot_timebases(void) -{ - if (!cpu_has_feature(CPU_FTR_PURR)) - return; - on_each_cpu(snapshot_tb_and_purr, NULL, 1); -} - -/* - * Must be called with interrupts disabled. - */ -void calculate_steal_time(void) -{ - u64 tb, purr; - s64 stolen; - struct cpu_purr_data *pme; - - pme = &__get_cpu_var(cpu_purr_data); - if (!pme->initialized) - return; /* !CPU_FTR_PURR or early in early boot */ - tb = mftb(); - purr = mfspr(SPRN_PURR); - stolen = (tb - pme->tb) - (purr - pme->purr); - if (stolen > 0) { - if (idle_task(smp_processor_id()) != current) - account_steal_time(stolen); - else - account_idle_time(stolen); - } - pme->tb = tb; - pme->purr = purr; -} - -#ifdef CONFIG_PPC_SPLPAR -/* - * Must be called before the cpu is added to the online map when - * a cpu is being brought up at runtime. - */ -static void snapshot_purr(void) -{ - struct cpu_purr_data *pme; - unsigned long flags; - - if (!cpu_has_feature(CPU_FTR_PURR)) - return; - local_irq_save(flags); - pme = &__get_cpu_var(cpu_purr_data); - pme->tb = mftb(); - pme->purr = mfspr(SPRN_PURR); - pme->initialized = 1; - local_irq_restore(flags); -} - -#endif /* CONFIG_PPC_SPLPAR */ - #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() -#define calculate_steal_time() do { } while (0) #endif -#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)) -#define snapshot_purr() do { } while (0) -#endif - -/* - * Called when a cpu comes up after the system has finished booting, - * i.e. as a result of a hotplug cpu action. - */ -void snapshot_timebase(void) -{ - __get_cpu_var(last_jiffy) = get_tb_or_rtc(); - snapshot_purr(); -} - void __delay(unsigned long loops) { unsigned long start; @@ -585,8 +573,6 @@ void timer_interrupt(struct pt_regs * regs) old_regs = set_irq_regs(regs); irq_enter(); - calculate_steal_time(); - if (test_perf_event_pending()) { clear_perf_event_pending(); perf_event_do_pending(); |