diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-02-05 13:10:33 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-02-05 13:10:33 +0100 |
commit | b2c77a57e4a0a7877e357dead7ee8acc19944f3e (patch) | |
tree | fa192b5a058711299c2a8ce2621df6c9bd8f3a99 /include/linux | |
parent | c3c186403c6abd32e719f005f0af950155a9e54d (diff) | |
parent | 6a61671bb2f3a1bd12cd17b8fca811a624782632 (diff) | |
download | blackbird-obmc-linux-b2c77a57e4a0a7877e357dead7ee8acc19944f3e.tar.gz blackbird-obmc-linux-b2c77a57e4a0a7877e357dead7ee8acc19944f3e.zip |
Merge tag 'full-dynticks-cputime-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into sched/core
Pull full-dynticks (user-space execution is undisturbed and
receives no timer IRQs) preparation changes that convert the
cputime accounting code to be full-dynticks ready,
from Frederic Weisbecker:
"This implements the cputime accounting on full dynticks CPUs.
Typical cputime stats infrastructure relies on the timer tick and
its periodic polling on the CPU to account the amount of time
spent by the CPUs and the tasks per high level domains such as
userspace, kernelspace, guest, ...
Now we are preparing to implement full dynticks capability on
Linux for Real Time and HPC users who want full CPU isolation.
This feature requires a cputime accounting that doesn't depend
on the timer tick.
To implement it, this new cputime infrastructure plugs into
kernel/user/guest boundaries to take snapshots of cputime and
flush these to the stats when needed. This performs pretty
much like CONFIG_VIRT_CPU_ACCOUNTING except that context location
and cputime snaphots are synchronized between write and read
side such that the latter can safely retrieve the pending tickless
cputime of a task and add it to its latest cputime snapshot to
return the correct result to the user."
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/context_tracking.h | 28 | ||||
-rw-r--r-- | include/linux/hardirq.h | 4 | ||||
-rw-r--r-- | include/linux/init_task.h | 11 | ||||
-rw-r--r-- | include/linux/kernel_stat.h | 2 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 55 | ||||
-rw-r--r-- | include/linux/sched.h | 40 | ||||
-rw-r--r-- | include/linux/tsacct_kern.h | 3 | ||||
-rw-r--r-- | include/linux/vtime.h | 59 |
8 files changed, 171 insertions, 31 deletions
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index e24339ccb7f0..b28d161c1091 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -3,12 +3,40 @@ #ifdef CONFIG_CONTEXT_TRACKING #include <linux/sched.h> +#include <linux/percpu.h> + +struct context_tracking { + /* + * When active is false, probes are unset in order + * to minimize overhead: TIF flags are cleared + * and calls to user_enter/exit are ignored. This + * may be further optimized using static keys. + */ + bool active; + enum { + IN_KERNEL = 0, + IN_USER, + } state; +}; + +DECLARE_PER_CPU(struct context_tracking, context_tracking); + +static inline bool context_tracking_in_user(void) +{ + return __this_cpu_read(context_tracking.state) == IN_USER; +} + +static inline bool context_tracking_active(void) +{ + return __this_cpu_read(context_tracking.active); +} extern void user_enter(void); extern void user_exit(void); extern void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next); #else +static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } static inline void context_tracking_task_switch(struct task_struct *prev, diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 624ef3f45c8e..7105d5cbb762 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void); */ #define __irq_enter() \ do { \ - vtime_account_irq_enter(current); \ + account_irq_enter_time(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -169,7 +169,7 @@ extern void irq_enter(void); #define __irq_exit() \ do { \ trace_hardirq_exit(); \ - vtime_account_irq_exit(current); \ + account_irq_exit_time(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6d087c5f57f7..cc898b871cef 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -10,6 +10,7 @@ #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/securebits.h> +#include <linux/seqlock.h> #include <net/net_namespace.h> #ifdef CONFIG_SMP @@ -141,6 +142,15 @@ extern struct task_group root_task_group; # define INIT_PERF_EVENTS(tsk) #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# define INIT_VTIME(tsk) \ + .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ + .vtime_snap = 0, \ + .vtime_snap_whence = VTIME_SYS, +#else +# define INIT_VTIME(tsk) +#endif + #define INIT_TASK_COMM "swapper" /* @@ -210,6 +220,7 @@ extern struct task_group root_task_group; INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ INIT_CPUSET_SEQ \ + INIT_VTIME(tsk) \ } diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 66b70780e910..ed5f6ed6eb77 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t) extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) { vtime_account_user(tsk); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c497ab0d03d..b7996a768eb2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -22,6 +22,7 @@ #include <linux/rcupdate.h> #include <linux/ratelimit.h> #include <linux/err.h> +#include <linux/irqflags.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm, } #endif /* CONFIG_IOMMU_API */ -static inline void kvm_guest_enter(void) +static inline void __guest_enter(void) { - BUG_ON(preemptible()); /* * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system_irqsafe(current); + vtime_account_system(current); current->flags |= PF_VCPU; +} + +static inline void __guest_exit(void) +{ + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); + current->flags &= ~PF_VCPU; +} + +#ifdef CONFIG_CONTEXT_TRACKING +extern void guest_enter(void); +extern void guest_exit(void); + +#else /* !CONFIG_CONTEXT_TRACKING */ +static inline void guest_enter(void) +{ + __guest_enter(); +} + +static inline void guest_exit(void) +{ + __guest_exit(); +} +#endif /* !CONFIG_CONTEXT_TRACKING */ + +static inline void kvm_guest_enter(void) +{ + unsigned long flags; + + BUG_ON(preemptible()); + + local_irq_save(flags); + guest_enter(); + local_irq_restore(flags); + /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspase from rcu point of view. In @@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. - */ - vtime_account_system_irqsafe(current); - current->flags &= ~PF_VCPU; + unsigned long flags; + + local_irq_save(flags); + guest_exit(); + local_irq_restore(flags); } /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 924e42a8df58..719ee0815e3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1369,6 +1369,15 @@ struct task_struct { #ifndef CONFIG_VIRT_CPU_ACCOUNTING struct cputime prev_cputime; #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqlock_t vtime_seqlock; + unsigned long long vtime_snap; + enum { + VTIME_SLEEPING = 0, + VTIME_USER, + VTIME_SYS, + } vtime_snap_whence; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ @@ -1793,6 +1802,37 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime); +extern void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, cputime_t *stimescaled); +extern cputime_t task_gtime(struct task_struct *t); +#else +static inline void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime) +{ + if (utime) + *utime = t->utime; + if (stime) + *stime = t->stime; +} + +static inline void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, + cputime_t *stimescaled) +{ + if (utimescaled) + *utimescaled = t->utimescaled; + if (stimescaled) + *stimescaled = t->stimescaled; +} + +static inline cputime_t task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 44893e5ec8f7..3251965bf4cc 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns, #ifdef CONFIG_TASK_XACCT extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); extern void acct_update_integrals(struct task_struct *tsk); +extern void acct_account_cputime(struct task_struct *tsk); extern void acct_clear_integrals(struct task_struct *tsk); #else static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) {} static inline void acct_update_integrals(struct task_struct *tsk) {} +static inline void acct_account_cputime(struct task_struct *tsk) +{} static inline void acct_clear_integrals(struct task_struct *tsk) {} #endif /* CONFIG_TASK_XACCT */ diff --git a/include/linux/vtime.h b/include/linux/vtime.h index ae30ab58431a..71a5782d8c59 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -6,15 +6,46 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -extern void vtime_account(struct task_struct *tsk); -#else +extern void vtime_account_irq_enter(struct task_struct *tsk); + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +static inline bool vtime_accounting_enabled(void) { return true; } +#endif + +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ + static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } -static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } -static inline void vtime_account(struct task_struct *tsk) { } +static inline void vtime_account_user(struct task_struct *tsk) { } +static inline void vtime_account_irq_enter(struct task_struct *tsk) { } +static inline bool vtime_accounting_enabled(void) { return false; } +#endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void arch_vtime_task_switch(struct task_struct *tsk); +extern void vtime_account_irq_exit(struct task_struct *tsk); +extern bool vtime_accounting_enabled(void); +extern void vtime_user_enter(struct task_struct *tsk); +static inline void vtime_user_exit(struct task_struct *tsk) +{ + vtime_account_user(tsk); +} +extern void vtime_guest_enter(struct task_struct *tsk); +extern void vtime_guest_exit(struct task_struct *tsk); +extern void vtime_init_idle(struct task_struct *tsk); +#else +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ + /* On hard|softirq exit we always account to hard|softirq cputime */ + vtime_account_system(tsk); +} +static inline void vtime_user_enter(struct task_struct *tsk) { } +static inline void vtime_user_exit(struct task_struct *tsk) { } +static inline void vtime_guest_enter(struct task_struct *tsk) { } +static inline void vtime_guest_exit(struct task_struct *tsk) { } +static inline void vtime_init_idle(struct task_struct *tsk) { } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk); static inline void irqtime_account_irq(struct task_struct *tsk) { } #endif -static inline void vtime_account_irq_enter(struct task_struct *tsk) +static inline void account_irq_enter_time(struct task_struct *tsk) { - /* - * Hardirq can interrupt idle task anytime. So we need vtime_account() - * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING. - * Softirq can also interrupt idle task directly if it calls - * local_bh_enable(). Such case probably don't exist but we never know. - * Ksoftirqd is not concerned because idle time is flushed on context - * switch. Softirqs in the end of hardirqs are also not a problem because - * the idle time is flushed on hardirq time already. - */ - vtime_account(tsk); + vtime_account_irq_enter(tsk); irqtime_account_irq(tsk); } -static inline void vtime_account_irq_exit(struct task_struct *tsk) +static inline void account_irq_exit_time(struct task_struct *tsk) { - /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_system(tsk); + vtime_account_irq_exit(tsk); irqtime_account_irq(tsk); } |