From 186f43608a5c827f8284fe4559225b4dccaa49ef Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 13 Jul 2016 20:18:56 -0400 Subject: x86/kernel: Audit and remove any unnecessary uses of module.h Historically a lot of these existed because we did not have a distinction between what was modular code and what was providing support to modules via EXPORT_SYMBOL and friends. That changed when we forked out support for the latter into the export.h file. This means we should be able to reduce the usage of module.h in code that is obj-y Makefile or bool Kconfig. The advantage in doing so is that module.h itself sources about 15 other headers; adding significantly to what we feed cpp, and it can obscure what headers we are effectively using. Since module.h was the source for init.h (for __init) and for export.h (for EXPORT_SYMBOL) we consider each obj-y/bool instance for the presence of either and replace as needed. Build testing revealed some implicit header usage that was fixed up accordingly. Note that some bool/obj-y instances remain since module.h is the header for some exception table entry stuff, and for things like __init_or_module (code that is tossed when MODULES=n). Signed-off-by: Paul Gortmaker Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160714001901.31603-4-paul.gortmaker@windriver.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 96becbbb52e0..61b703c179d3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,7 +7,8 @@ #include #include #include -#include +#include +#include #include #include #include -- cgit v1.2.3 From 08e237fa56a1d95c1372033bc29c4a2517b3c0fa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Jul 2016 11:41:10 -0700 Subject: x86/cpu: Add workaround for MONITOR instruction erratum on Goldmont based CPUs Monitored cached line may not wake up from mwait on certain Goldmont based CPUs. This patch will avoid calling current_set_polling_and_test() and thereby not set the TIF_ flag. The result is that we'll always send IPIs for wakeups. Signed-off-by: Peter Zijlstra Signed-off-by: Jacob Pan Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Len Brown Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468867270-18493-1-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/mwait.h | 2 +- arch/x86/kernel/cpu/intel.c | 5 +++++ arch/x86/kernel/process.c | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c64b1e9c5d1a..19ecc6e11f4f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -310,5 +310,5 @@ #endif #define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ - +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 0deeb2d26df7..f37f2d8a2989 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -97,7 +97,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) */ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { - if (!current_set_polling_and_test()) { + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { mb(); clflush((void *)¤t_thread_info()->flags); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c1a89bc026ac..abf601235b29 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -508,6 +509,10 @@ static void init_intel(struct cpuinfo_x86 *c) (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); + if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) && + ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT))) + set_cpu_bug(c, X86_BUG_MONITOR); + #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 96becbbb52e0..59f68f1d734b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -404,7 +404,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) if (c->x86_vendor != X86_VENDOR_INTEL) return 0; - if (!cpu_has(c, X86_FEATURE_MWAIT)) + if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR)) return 0; return 1; -- cgit v1.2.3 From 7b32aeadbc95d4a41402c1c0da6aa3ab51af4c10 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 13 Aug 2016 12:38:18 -0400 Subject: sched/x86: Add 'struct inactive_task_frame' to better document the sleeping task stack frame Add 'struct inactive_task_frame', which defines the layout of the stack for a sleeping process. For now, the only defined field is the BP register (frame pointer). Signed-off-by: Brian Gerst Reviewed-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1471106302-10159-4-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 4 ++-- arch/x86/include/asm/switch_to.h | 5 +++++ arch/x86/kernel/kgdb.c | 3 ++- arch/x86/kernel/process.c | 3 ++- 4 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 0944218af9e2..7646fb2772f8 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -8,6 +8,7 @@ #include #include +#include extern int kstack_depth_to_print; @@ -70,8 +71,7 @@ stack_frame(struct task_struct *task, struct pt_regs *regs) return bp; } - /* bp is the last reg pushed by switch_to */ - return *(unsigned long *)task->thread.sp; + return ((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 14e4b20f0aaf..ec689c62c01f 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -30,6 +30,11 @@ static inline void prepare_switch_to(struct task_struct *prev, #endif } +/* data that is pointed to by thread.sp */ +struct inactive_task_frame { + unsigned long bp; +}; + #ifdef CONFIG_X86_32 #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5e3f294ce264..8e36f249646e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -50,6 +50,7 @@ #include #include #include +#include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -166,7 +167,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_DX] = 0; gdb_regs[GDB_SI] = 0; gdb_regs[GDB_DI] = 0; - gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; + gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp; #ifdef CONFIG_X86_32 gdb_regs[GDB_DS] = __KERNEL_DS; gdb_regs[GDB_ES] = __KERNEL_DS; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0ea2ce4..0115a4a4db96 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -556,7 +557,7 @@ unsigned long get_wchan(struct task_struct *p) if (sp < bottom || sp > top) return 0; - fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); + fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); do { if (fp < bottom || fp > top) return 0; -- cgit v1.2.3 From ffcb043ba524d3fbd979a9dac2c9ce8ad352000d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 13 Aug 2016 12:38:21 -0400 Subject: sched/x86: Fix thread_saved_pc() thread_saved_pc() was using a completely bogus method to get the return address. Since switch_to() was previously inlined, there was no sane way to know where on the stack the return address was stored. Now with the frame of a sleeping thread well defined, this can be implemented correctly. Signed-off-by: Brian Gerst Reviewed-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1471106302-10159-7-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 10 ++-------- arch/x86/kernel/process.c | 11 +++++++++++ arch/x86/kernel/process_32.c | 8 -------- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6fee8635340b..b22fb5a4ff3c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -721,8 +721,6 @@ static inline void spin_lock_prefetch(const void *x) .addr_limit = KERNEL_DS, \ } -extern unsigned long thread_saved_pc(struct task_struct *tsk); - /* * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" @@ -773,17 +771,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); .addr_limit = KERNEL_DS, \ } -/* - * Return saved PC of a blocked thread. - * What is this good for? it will be always the scheduler or ret_from_fork. - */ -#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8)) - #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ +extern unsigned long thread_saved_pc(struct task_struct *tsk); + extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0115a4a4db96..c1fa790c81cd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -513,6 +513,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } +/* + * Return saved PC of a blocked thread. + * What is this good for? it will be always the scheduler or ret_from_fork. + */ +unsigned long thread_saved_pc(struct task_struct *tsk) +{ + struct inactive_task_frame *frame = + (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); + return READ_ONCE_NOCHECK(frame->ret_addr); +} + /* * Called from fs/proc with a reference on @p to find the function * which called into schedule(). This needs to be done carefully diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 18714a191b2d..404efdfa083b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,14 +55,6 @@ #include #include -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *)tsk->thread.sp)[3]; -} - void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; -- cgit v1.2.3 From 15f4eae70d365bba26854c90b6002aaabb18c8aa Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Sep 2016 14:29:25 -0700 Subject: x86: Move thread_info into task_struct Now that most of the thread_info users have been cleaned up, this is straightforward. Most of this code was written by Linus. Originally-from: Linus Torvalds Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a50eab40abeaec9cb9a9e3cbdeafd32190206654.1473801993.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/entry/entry_64.S | 7 ++++-- arch/x86/include/asm/thread_info.h | 46 -------------------------------------- arch/x86/kernel/asm-offsets.c | 4 +--- arch/x86/kernel/irq_64.c | 3 +-- arch/x86/kernel/process.c | 6 ++--- 6 files changed, 10 insertions(+), 57 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4c3972847c2a..2a83bc8b24c6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -157,6 +157,7 @@ config X86 select SPARSE_IRQ select SRCU select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select X86_DEV_DMA_OPS if X86_64 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e7fba58f4d9c..2b46384b4a4f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) * If we need to do entry work or if we guess we'll need to do * exit work, go straight to the slow path. */ - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + movq PER_CPU_VAR(current_task), %r11 + testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) jnz entry_SYSCALL64_slow_path entry_SYSCALL_64_fastpath: @@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath: */ DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + movq PER_CPU_VAR(current_task), %r11 + testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) jnz 1f LOCKDEP_SYS_EXIT @@ -370,6 +372,7 @@ END(ptregs_\func) /* * %rdi: prev task * %rsi: next task + * rsi: task we're switching to */ ENTRY(__switch_to_asm) /* diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c9dcfe7c7e4b..2aaca53c0974 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -52,20 +52,6 @@ struct task_struct; #include #include -struct thread_info { - struct task_struct *task; /* main task structure */ - __u32 flags; /* low level flags */ - __u32 cpu; /* current CPU */ -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .flags = 0, \ - .cpu = 0, \ -} - -#define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ @@ -157,11 +143,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); -} - static inline unsigned long current_stack_pointer(void) { unsigned long sp; @@ -223,33 +204,6 @@ static inline int arch_within_stack_frames(const void * const stack, # define cpu_current_top_of_stack (cpu_tss + TSS_sp0) #endif -/* - * ASM operand which evaluates to a 'thread_info' address of - * the current task, if it is known that "reg" is exactly "off" - * bytes below the top of the stack currently. - * - * ( The kernel stack's size is known at build time, it is usually - * 2 or 4 pages, and the bottom of the kernel stack contains - * the thread_info structure. So to access the thread_info very - * quickly from assembly code we can calculate down from the - * top of the kernel stack to the bottom, using constant, - * build-time calculations only. ) - * - * For example, to fetch the current thread_info->flags value into %eax - * on x86-64 defconfig kernels, in syscall entry code where RSP is - * currently at exactly SIZEOF_PTREGS bytes away from the top of the - * stack: - * - * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax - * - * will translate to: - * - * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax - * - * which is below the current RSP by almost 16K. - */ -#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) - #endif #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index add5f90b93d4..c62e015b126c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -35,9 +35,7 @@ void common(void) { #endif BLANK(); - OFFSET(TI_flags, thread_info, flags); - - BLANK(); + OFFSET(TASK_TI_flags, task_struct, thread_info.flags); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4a7903714065..9ebd0b0e73d9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (user_mode(regs)) return; - if (regs->sp >= curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + STACK_TOP_MARGIN && + if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && regs->sp <= curbase + THREAD_SIZE) return; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c1fa790c81cd..0b9ed8ec5226 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -549,9 +549,7 @@ unsigned long get_wchan(struct task_struct *p) * PADDING * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING * stack - * ----------- bottom = start + sizeof(thread_info) - * thread_info - * ----------- start + * ----------- bottom = start * * The tasks stack pointer points at the location where the * framepointer is stored. The data on the stack is: @@ -562,7 +560,7 @@ unsigned long get_wchan(struct task_struct *p) */ top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; top -= 2 * sizeof(unsigned long); - bottom = start + sizeof(struct thread_info); + bottom = start; sp = READ_ONCE(p->thread.sp); if (sp < bottom || sp > top) -- cgit v1.2.3 From 74327a3e884a0ff895ba7b51d3488e6a177407b2 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Sep 2016 22:45:46 -0700 Subject: x86/process: Pin the target stack in get_wchan() This will prevent a crash if get_wchan() runs after the task stack is freed. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/337aeca8614024aa4d8d9c81053bbf8fcffbe4ad.1474003868.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0b9ed8ec5226..4002b475171c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -532,15 +532,18 @@ unsigned long thread_saved_pc(struct task_struct *tsk) */ unsigned long get_wchan(struct task_struct *p) { - unsigned long start, bottom, top, sp, fp, ip; + unsigned long start, bottom, top, sp, fp, ip, ret = 0; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; + if (!try_get_task_stack(p)) + return 0; + start = (unsigned long)task_stack_page(p); if (!start) - return 0; + goto out; /* * Layout of the stack page: @@ -564,16 +567,21 @@ unsigned long get_wchan(struct task_struct *p) sp = READ_ONCE(p->thread.sp); if (sp < bottom || sp > top) - return 0; + goto out; fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); do { if (fp < bottom || fp > top) - return 0; + goto out; ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); - if (!in_sched_functions(ip)) - return ip; + if (!in_sched_functions(ip)) { + ret = ip; + goto out; + } fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); } while (count++ < 16 && p->state != TASK_RUNNING); - return 0; + +out: + put_task_stack(p); + return ret; } -- cgit v1.2.3 From 6727ad9e206cc08b80d8000a4d67f8417e53539d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 7 Oct 2016 17:02:55 -0700 Subject: nmi_backtrace: generate one-line reports for idle cpus When doing an nmi backtrace of many cores, most of which are idle, the output is a little overwhelming and very uninformative. Suppress messages for cpus that are idling when they are interrupted and just emit one line, "NMI backtrace for N skipped: idling at pc 0xNNN". We do this by grouping all the cpuidle code together into a new .cpuidle.text section, and then checking the address of the interrupted PC to see if it lies within that section. This commit suitably tags x86 and tile idle routines, and only adds in the minimal framework for other architectures. Link: http://lkml.kernel.org/r/1472487169-14923-5-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf Acked-by: Peter Zijlstra (Intel) Tested-by: Peter Zijlstra (Intel) Tested-by: Daniel Thompson [arm] Tested-by: Petr Mladek Cc: Aaron Tomlin Cc: Peter Zijlstra (Intel) Cc: "Rafael J. Wysocki" Cc: Russell King Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/vmlinux.lds.S | 1 + arch/arc/kernel/vmlinux.lds.S | 1 + arch/arm/kernel/vmlinux-xip.lds.S | 1 + arch/arm/kernel/vmlinux.lds.S | 1 + arch/arm64/kernel/vmlinux.lds.S | 1 + arch/avr32/kernel/vmlinux.lds.S | 1 + arch/blackfin/kernel/vmlinux.lds.S | 1 + arch/c6x/kernel/vmlinux.lds.S | 1 + arch/cris/kernel/vmlinux.lds.S | 1 + arch/frv/kernel/vmlinux.lds.S | 1 + arch/h8300/kernel/vmlinux.lds.S | 1 + arch/hexagon/kernel/vmlinux.lds.S | 1 + arch/ia64/kernel/vmlinux.lds.S | 1 + arch/m32r/kernel/vmlinux.lds.S | 1 + arch/m68k/kernel/vmlinux-nommu.lds | 1 + arch/m68k/kernel/vmlinux-std.lds | 1 + arch/m68k/kernel/vmlinux-sun3.lds | 1 + arch/metag/kernel/vmlinux.lds.S | 1 + arch/microblaze/kernel/vmlinux.lds.S | 1 + arch/mips/kernel/vmlinux.lds.S | 1 + arch/mn10300/kernel/vmlinux.lds.S | 1 + arch/nios2/kernel/vmlinux.lds.S | 1 + arch/openrisc/kernel/vmlinux.lds.S | 1 + arch/parisc/kernel/vmlinux.lds.S | 1 + arch/powerpc/kernel/vmlinux.lds.S | 1 + arch/s390/kernel/vmlinux.lds.S | 1 + arch/score/kernel/vmlinux.lds.S | 1 + arch/sh/kernel/vmlinux.lds.S | 1 + arch/sparc/kernel/vmlinux.lds.S | 1 + arch/tile/kernel/entry.S | 2 +- arch/tile/kernel/vmlinux.lds.S | 1 + arch/um/kernel/dyn.lds.S | 1 + arch/um/kernel/uml.lds.S | 1 + arch/unicore32/kernel/vmlinux.lds.S | 1 + arch/x86/include/asm/irqflags.h | 12 ++++++++---- arch/x86/kernel/acpi/cstate.c | 2 +- arch/x86/kernel/process.c | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 1 + arch/xtensa/kernel/vmlinux.lds.S | 3 +++ drivers/acpi/processor_idle.c | 5 +++-- drivers/cpuidle/driver.c | 5 +++-- drivers/idle/intel_idle.c | 4 ++-- include/asm-generic/vmlinux.lds.h | 6 ++++++ include/linux/cpu.h | 5 +++++ kernel/sched/idle.c | 13 +++++++++++-- lib/nmi_backtrace.c | 16 +++++++++++----- scripts/mod/modpost.c | 2 +- scripts/recordmcount.c | 1 + scripts/recordmcount.pl | 1 + 49 files changed, 93 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 647b84c15382..cebecfb76fbf 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -22,6 +22,7 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 36611072305f..f35ed578e007 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -89,6 +89,7 @@ SECTIONS _text = .; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index cba1ec899a69..7fa487ef7e2f 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -98,6 +98,7 @@ SECTIONS IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT *(.gnu.warning) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index d24e5dd2aa7a..f7f55df0bf7b 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -111,6 +111,7 @@ SECTIONS SOFTIRQENTRY_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT HYPERVISOR_TEXT KPROBES_TEXT diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 5ce9b2929e0d..1105aab1e6d6 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -122,6 +122,7 @@ SECTIONS ENTRY_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT HYPERVISOR_TEXT diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index a4589176bed5..17f2730eb497 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -52,6 +52,7 @@ SECTIONS KPROBES_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index d920b959ff3a..68069a120055 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -33,6 +33,7 @@ SECTIONS #ifndef CONFIG_SCHEDULE_L1 SCHED_TEXT #endif + CPUIDLE_TEXT LOCK_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S index 50bc10f97bcb..a1a5c166bc9b 100644 --- a/arch/c6x/kernel/vmlinux.lds.S +++ b/arch/c6x/kernel/vmlinux.lds.S @@ -70,6 +70,7 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index 7552c2557506..979586261520 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -43,6 +43,7 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.text.__*) diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 7e958d829ec9..aa6e573d57da 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -63,6 +63,7 @@ SECTIONS *(.text..tlbmiss) TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT #ifdef CONFIG_DEBUG_INFO INIT_TEXT diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index cb5dfb02c88d..7f11da1b895e 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -29,6 +29,7 @@ SECTIONS _stext = . ; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT #if defined(CONFIG_ROMKERNEL) *(.int_redirect) diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 5f268c1071b3..ec87e67feb19 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -50,6 +50,7 @@ SECTIONS _text = .; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index dc506b05ffbd..f89d20c97412 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -46,6 +46,7 @@ SECTIONS { __end_ivt_text = .; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT *(.gnu.linkonce.t*) diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 018e4a711d79..ad1fe56455aa 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -31,6 +31,7 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds index 06a763f49fd3..d2c8abf1c8c4 100644 --- a/arch/m68k/kernel/vmlinux-nommu.lds +++ b/arch/m68k/kernel/vmlinux-nommu.lds @@ -45,6 +45,7 @@ SECTIONS { HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) . = ALIGN(16); diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index d0993594f558..5b5ce1e4d1ed 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -16,6 +16,7 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 8080469ee6c1..fe5ea1974b16 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -16,6 +16,7 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/metag/kernel/vmlinux.lds.S b/arch/metag/kernel/vmlinux.lds.S index 150ace92c7ad..e6c700eaf207 100644 --- a/arch/metag/kernel/vmlinux.lds.S +++ b/arch/metag/kernel/vmlinux.lds.S @@ -21,6 +21,7 @@ SECTIONS .text : { TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index 0a47f0410554..289d0e7f3e3a 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -33,6 +33,7 @@ SECTIONS { EXIT_TEXT EXIT_CALL SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index a82c178d0bb9..d5de67591735 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -55,6 +55,7 @@ SECTIONS .text : { TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index 13c4814c29f8..2d5f1c3f1afb 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -30,6 +30,7 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S index e23e89539967..6a8045bb1a77 100644 --- a/arch/nios2/kernel/vmlinux.lds.S +++ b/arch/nios2/kernel/vmlinux.lds.S @@ -37,6 +37,7 @@ SECTIONS .text : { TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index d936de4c07ca..d68b9ede8423 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -47,6 +47,7 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index f3ead0b6ce46..9ec8ec075dae 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -69,6 +69,7 @@ SECTIONS .text ALIGN(PAGE_SIZE) : { TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b5fba689fca6..7ed59f0d947f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -52,6 +52,7 @@ SECTIONS /* careful! __ftr_alt_* sections need to be close to .text */ *(.text .fixup __ftr_alt_* .ref.text) SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 429bfd111961..000e6e91f6a0 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -35,6 +35,7 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/score/kernel/vmlinux.lds.S b/arch/score/kernel/vmlinux.lds.S index 7274b5c4287e..4117890b1db1 100644 --- a/arch/score/kernel/vmlinux.lds.S +++ b/arch/score/kernel/vmlinux.lds.S @@ -40,6 +40,7 @@ SECTIONS _text = .; /* Text and read-only data */ TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT *(.text.*) diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 235a4101999f..5b9a3cc90c58 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -36,6 +36,7 @@ SECTIONS TEXT_TEXT EXTRA_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index d79b3b734245..572db686f845 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -49,6 +49,7 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 670a3569450f..101de132e363 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -50,7 +50,7 @@ STD_ENTRY(smp_nap) * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and * as a result return to the function that called _cpu_idle(). */ -STD_ENTRY(_cpu_idle) +STD_ENTRY_SECTION(_cpu_idle, .cpuidle.text) movei r1, 1 IRQ_ENABLE_LOAD(r2, r3) mtspr INTERRUPT_CRITICAL_SECTION, r1 diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 9d449caf8910..e1baf094fba4 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -42,6 +42,7 @@ SECTIONS .text : AT (ADDR(.text) - LOAD_OFFSET) { HEAD_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index adde088aeeff..4fdbcf958cd5 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -68,6 +68,7 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.stub .text.* .gnu.linkonce.t.*) diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 6899195602b7..1840f55ed042 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -28,6 +28,7 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) /* .gnu.warning sections are handled specially by elf32.em. */ diff --git a/arch/unicore32/kernel/vmlinux.lds.S b/arch/unicore32/kernel/vmlinux.lds.S index 77e407e49a63..56e788e8ee83 100644 --- a/arch/unicore32/kernel/vmlinux.lds.S +++ b/arch/unicore32/kernel/vmlinux.lds.S @@ -37,6 +37,7 @@ SECTIONS .text : { /* Real text segment */ TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT *(.fixup) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index b77f5edb03b0..ac7692dcfa2e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -4,6 +4,10 @@ #include #ifndef __ASSEMBLY__ + +/* Provide __cpuidle; we can't safely include */ +#define __cpuidle __attribute__((__section__(".cpuidle.text"))) + /* * Interrupt control: */ @@ -44,12 +48,12 @@ static inline void native_irq_enable(void) asm volatile("sti": : :"memory"); } -static inline void native_safe_halt(void) +static inline __cpuidle void native_safe_halt(void) { asm volatile("sti; hlt": : :"memory"); } -static inline void native_halt(void) +static inline __cpuidle void native_halt(void) { asm volatile("hlt": : :"memory"); } @@ -86,7 +90,7 @@ static inline notrace void arch_local_irq_enable(void) * Used in the idle loop; sti takes one instruction cycle * to complete: */ -static inline void arch_safe_halt(void) +static inline __cpuidle void arch_safe_halt(void) { native_safe_halt(); } @@ -95,7 +99,7 @@ static inline void arch_safe_halt(void) * Used when interrupts are already enabled or to * shutdown the processor: */ -static inline void halt(void) +static inline __cpuidle void halt(void) { native_halt(); } diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index bdfad642123f..af15f4444330 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -152,7 +152,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); -void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) +void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) { unsigned int cpu = smp_processor_id(); struct cstate_entry *percpu_entry; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4002b475171c..28cea7802ecb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -302,7 +302,7 @@ void arch_cpu_idle(void) /* * We use this if we don't have any better idle routine.. */ -void default_idle(void) +void __cpuidle default_idle(void) { trace_cpu_idle_rcuidle(1, smp_processor_id()); safe_halt(); @@ -417,7 +417,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) * with interrupts enabled and no flags, which is backwards compatible with the * original MWAIT implementation. */ -static void mwait_idle(void) +static __cpuidle void mwait_idle(void) { if (!current_set_polling_and_test()) { trace_cpu_idle_rcuidle(1, smp_processor_id()); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9297a002d8e5..dbf67f64d5ec 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -97,6 +97,7 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT ENTRY_TEXT diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 72cfe3587dd8..31411fc82662 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -89,6 +89,9 @@ SECTIONS VMLINUX_SYMBOL(__sched_text_start) = .; *(.sched.literal .sched.text) VMLINUX_SYMBOL(__sched_text_end) = .; + VMLINUX_SYMBOL(__cpuidle_text_start) = .; + *(.cpuidle.literal .cpuidle.text) + VMLINUX_SYMBOL(__cpuidle_text_end) = .; VMLINUX_SYMBOL(__lock_text_start) = .; *(.spinlock.literal .spinlock.text) VMLINUX_SYMBOL(__lock_text_end) = .; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index cea52528aa18..2237d3f24f0e 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -31,6 +31,7 @@ #include /* need_resched() */ #include #include +#include #include /* @@ -115,7 +116,7 @@ static const struct dmi_system_id processor_power_dmi_table[] = { * Callers should disable interrupts before the call and enable * interrupts after return. */ -static void acpi_safe_halt(void) +static void __cpuidle acpi_safe_halt(void) { if (!tif_need_resched()) { safe_halt(); @@ -645,7 +646,7 @@ static int acpi_idle_bm_check(void) * * Caller disables interrupt before call and enables interrupt after return. */ -static void acpi_idle_do_entry(struct acpi_processor_cx *cx) +static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx) { if (cx->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 389ade4572be..ab264d393233 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -178,8 +179,8 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) } #ifdef CONFIG_ARCH_HAS_CPU_RELAX -static int poll_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static int __cpuidle poll_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { local_irq_enable(); if (!current_set_polling_and_test()) { diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 67ec58f9ef99..4466a2f969d7 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -863,8 +863,8 @@ static struct cpuidle_state dnv_cstates[] = { * * Must be called under local_irq_disable(). */ -static int intel_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int intel_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 24563970ff7b..3e42bcdd014b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -454,6 +454,12 @@ *(.spinlock.text) \ VMLINUX_SYMBOL(__lock_text_end) = .; +#define CPUIDLE_TEXT \ + ALIGN_FUNCTION(); \ + VMLINUX_SYMBOL(__cpuidle_text_start) = .; \ + *(.cpuidle.text) \ + VMLINUX_SYMBOL(__cpuidle_text_end) = .; + #define KPROBES_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__kprobes_text_start) = .; \ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7572d9e9dced..b886dc17f2f3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -231,6 +231,11 @@ void cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); +/* Attach to any functions which should be considered cpuidle. */ +#define __cpuidle __attribute__((__section__(".cpuidle.text"))) + +bool cpu_in_idle(unsigned long pc); + void arch_cpu_idle(void); void arch_cpu_idle_prepare(void); void arch_cpu_idle_enter(void); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 9fb873cfc75c..1d8718d5300d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -16,6 +16,9 @@ #include "sched.h" +/* Linker adds these: start and end of __cpuidle functions */ +extern char __cpuidle_text_start[], __cpuidle_text_end[]; + /** * sched_idle_set_state - Record idle state for the current CPU. * @idle_state: State to record. @@ -53,7 +56,7 @@ static int __init cpu_idle_nopoll_setup(char *__unused) __setup("hlt", cpu_idle_nopoll_setup); #endif -static inline int cpu_idle_poll(void) +static noinline int __cpuidle cpu_idle_poll(void) { rcu_idle_enter(); trace_cpu_idle_rcuidle(0, smp_processor_id()); @@ -84,7 +87,7 @@ void __weak arch_cpu_idle(void) * * To use when the cpuidle framework cannot be used. */ -void default_idle_call(void) +void __cpuidle default_idle_call(void) { if (current_clr_polling_and_test()) { local_irq_enable(); @@ -271,6 +274,12 @@ static void cpu_idle_loop(void) } } +bool cpu_in_idle(unsigned long pc) +{ + return pc >= (unsigned long)__cpuidle_text_start && + pc < (unsigned long)__cpuidle_text_end; +} + void cpu_startup_entry(enum cpuhp_state state) { /* diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 393a3cca1f47..75554754eadf 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef arch_trigger_cpumask_backtrace /* For reliability, we're prepared to waste bits here. */ @@ -87,11 +88,16 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - pr_warn("NMI backtrace for cpu %d\n", cpu); - if (regs) - show_regs(regs); - else - dump_stack(); + if (regs && cpu_in_idle(instruction_pointer(regs))) { + pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", + cpu, instruction_pointer(regs)); + } else { + pr_warn("NMI backtrace for cpu %d\n", cpu); + if (regs) + show_regs(regs); + else + dump_stack(); + } cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 48958d3cec9e..bd8349759095 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -888,7 +888,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ - ".kprobes.text" + ".kprobes.text", ".cpuidle.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", ".text.*", \ ".coldtext" diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index a68f03133df9..5423a58d1b06 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -365,6 +365,7 @@ is_mcounted_section_name(char const *const txtname) strcmp(".irqentry.text", txtname) == 0 || strcmp(".softirqentry.text", txtname) == 0 || strcmp(".kprobes.text", txtname) == 0 || + strcmp(".cpuidle.text", txtname) == 0 || strcmp(".text.unlikely", txtname) == 0; } diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 2d48011bc362..faac4b10d8ea 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -136,6 +136,7 @@ my %text_sections = ( ".irqentry.text" => 1, ".softirqentry.text" => 1, ".kprobes.text" => 1, + ".cpuidle.text" => 1, ".text.unlikely" => 1, ); -- cgit v1.2.3 From 9c6f0902a55226b08b9703408f04b258b9afa0bc Mon Sep 17 00:00:00 2001 From: Jason Cooper Date: Tue, 11 Oct 2016 13:53:56 -0700 Subject: x86: use simpler API for random address requests Currently, all callers to randomize_range() set the length to 0 and calculate end by adding a constant to the start address. We can simplify the API to remove a bunch of needless checks and variables. Use the new randomize_addr(start, range) call to set the requested address. Link: http://lkml.kernel.org/r/20160803233913.32511-3-jason@lakedaemon.net Signed-off-by: Jason Cooper Acked-by: Kees Cook Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H . Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/process.c | 3 +-- arch/x86/kernel/sys_x86_64.c | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 28cea7802ecb..0888a879120f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -509,8 +509,7 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; + return randomize_page(mm->brk, 0x02000000); } /* diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 10e0272d789a..a55ed63b9f91 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -101,7 +101,6 @@ static void find_start_end(unsigned long flags, unsigned long *begin, unsigned long *end) { if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) { - unsigned long new_begin; /* This is usually used needed to map code in small model, so it needs to be in the first 31bit. Limit it to that. This means we need to move the @@ -112,9 +111,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = 0x40000000; *end = 0x80000000; if (current->flags & PF_RANDOMIZE) { - new_begin = randomize_range(*begin, *begin + 0x02000000, 0); - if (new_begin) - *begin = new_begin; + *begin = randomize_page(*begin, 0x02000000); } } else { *begin = current->mm->mmap_legacy_base; -- cgit v1.2.3