From 37b23e0525d393d48a7d59f870b3bc061a30ccdb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:11:30 -0700 Subject: x86,mm: make pagefault killable When an oom killing occurs, almost all processes are getting stuck at the following two points. 1) __alloc_pages_nodemask 2) __lock_page_or_retry 1) is not very problematic because TIF_MEMDIE leads to an allocation failure and getting out from page allocator. 2) is more problematic. In an OOM situation, zones typically don't have page cache at all and memory starvation might lead to greatly reduced IO performance. When a fork bomb occurs, TIF_MEMDIE tasks don't die quickly, meaning that a fork bomb may create new process quickly rather than the oom-killer killing it. Then, the system may become livelocked. This patch makes the pagefault interruptible by SIGKILL. Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Cc: Minchan Kim Cc: Matthew Wilcox Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index bcb394dfbb35..f7a2a054a3c0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -965,7 +965,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) struct mm_struct *mm; int fault; int write = error_code & PF_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (write ? FAULT_FLAG_WRITE : 0); tsk = current; @@ -1138,6 +1138,16 @@ good_area: return; } + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue pagefault. + */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); + return; + } + /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely -- cgit v1.2.1 From 1c395176962176660bb108f90e97e1686cfe0d85 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:11:58 -0700 Subject: mm: now that all old mmu_gather code is gone, remove the storage Fold all the mmu_gather rework patches into one for submission Signed-off-by: Peter Zijlstra Reported-by: Hugh Dickins Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 37b8b0fe8320..30326443ab81 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -16,8 +16,6 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - unsigned long __initdata pgt_buf_start; unsigned long __meminitdata pgt_buf_end; unsigned long __meminitdata pgt_buf_top; -- cgit v1.2.1 From 3d48ae45e72390ddf8cc5256ac32ed6f7a19cbea Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:12:06 -0700 Subject: mm: Convert i_mmap_lock to a mutex Straightforward conversion of i_mmap_lock to a mutex. Signed-off-by: Peter Zijlstra Acked-by: Hugh Dickins Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/hugetlbpage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index d4203988504a..f581a18c0d4d 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -72,7 +72,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (!vma_shareable(vma, addr)) return; - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; @@ -97,7 +97,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) put_page(virt_to_page(spte)); spin_unlock(&mm->page_table_lock); out: - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); } /* -- cgit v1.2.1 From de03c72cfce5b263a674d04348b58475ec50163c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:12:15 -0700 Subject: mm: convert mm->cpu_vm_cpumask into cpumask_var_t cpumask_t is very big struct and cpu_vm_mask is placed wrong position. It might lead to reduce cache hit ratio. This patch has two change. 1) Move the place of cpumask into last of mm_struct. Because usually cpumask is accessed only front bits when the system has cpu-hotplug capability 2) Convert cpu_vm_mask into cpumask_var_t. It may help to reduce memory footprint if cpumask_size() will use nr_cpumask_bits properly in future. In addition, this patch change the name of cpu_vm_mask with cpu_vm_mask_var. It may help to detect out of tree cpu_vm_mask users. This patch has no functional change. [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro Cc: David Howells Cc: Koichi Yasutake Cc: Hugh Dickins Cc: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/tboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 998e972f3b1a..30ac65df7d4e 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -110,7 +110,6 @@ static struct mm_struct tboot_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), - .cpu_vm_mask = CPU_MASK_ALL, }; static inline void switch_to_tboot_pt(void) -- cgit v1.2.1 From 1495f230fa7750479c79e3656286b9183d662077 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 24 May 2011 17:12:27 -0700 Subject: vmscan: change shrinker API by passing shrink_control struct Change each shrinker's API by consolidating the existing parameters into shrink_control struct. This will simplify any further features added w/o touching each file of shrinker. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: fix warning] [kosaki.motohiro@jp.fujitsu.com: fix up new shrinker API] [akpm@linux-foundation.org: fix xfs warning] [akpm@linux-foundation.org: update gfs2] Signed-off-by: Ying Han Cc: KOSAKI Motohiro Cc: Minchan Kim Acked-by: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Hugh Dickins Cc: Dave Hansen Cc: Steven Whitehouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 28418054b880..bd14bb4c8594 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3545,10 +3545,11 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); } -static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; struct kvm *kvm_freed = NULL; + int nr_to_scan = sc->nr_to_scan; if (nr_to_scan == 0) goto out; -- cgit v1.2.1 From dbee8a0affd5e6eaa5d7c816c4bc233f6f110f50 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 24 May 2011 17:13:09 -0700 Subject: x86: remove 32-bit versions of readq()/writeq() The presense of a writeq() implementation on 32-bit x86 that splits the 64-bit write into two 32-bit writes turns out to break the mpt2sas driver (and in general is risky for drivers as was discussed in ). To fix this, revert 2c5643b1c5c7 ("x86: provide readq()/writeq() on 32-bit too") and follow-on cleanups. This unfortunately leads to pushing non-atomic definitions of readq() and write() to various x86-only drivers that in the meantime started using the definitions in the x86 version of . However as discussed exhaustively, this is actually the right thing to do, because the right way to split a 64-bit transaction is hardware dependent and therefore belongs in the hardware driver (eg mpt2sas needs a spinlock to make sure no other accesses occur in between the two halves of the access). Build tested on 32- and 64-bit x86 allmodconfig. Link: http://lkml.kernel.org/r/x86-32-writeq-is-broken@mdm.bga.com Acked-by: Hitoshi Mitake Cc: Kashyap Desai Cc: Len Brown Cc: Ravi Anand Cc: Vikas Chaudhary Cc: Matthew Garrett Cc: Jason Uhlenkott Acked-by: James Bottomley Acked-by: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 -- arch/x86/include/asm/io.h | 24 ++---------------------- 2 files changed, 2 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 880fcb6c86f4..fa2cc8c5d01c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -17,8 +17,6 @@ config X86_64 config X86 def_bool y select HAVE_AOUT if X86_32 - select HAVE_READQ - select HAVE_WRITEQ select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 072273082528..d02804d650c4 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -38,7 +38,6 @@ #include #include -#include #include #include @@ -87,27 +86,6 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) build_mmio_read(readq, "q", unsigned long, "=r", :"memory") build_mmio_write(writeq, "q", unsigned long, "r", :"memory") -#else - -static inline __u64 readq(const volatile void __iomem *addr) -{ - const volatile u32 __iomem *p = addr; - u32 low, high; - - low = readl(p); - high = readl(p + 1); - - return low + ((u64)high << 32); -} - -static inline void writeq(__u64 val, volatile void __iomem *addr) -{ - writel(val, addr); - writel(val >> 32, addr+4); -} - -#endif - #define readq_relaxed(a) readq(a) #define __raw_readq(a) readq(a) @@ -117,6 +95,8 @@ static inline void writeq(__u64 val, volatile void __iomem *addr) #define readq readq #define writeq writeq +#endif + /** * virt_to_phys - map virtual addresses to physical * @address: address to remap -- cgit v1.2.1 From 162a7e7500f9664636e649ba59defe541b7c2c60 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 24 May 2011 17:13:20 -0700 Subject: printk: allocate kernel log buffer earlier On larger systems, because of the numerous ACPI, Bootmem and EFI messages, the static log buffer overflows before the larger one specified by the log_buf_len param is allocated. Minimize the overflow by allocating the new log buffer as soon as possible. On kernels without memblock, a later call to setup_log_buf from kernel/init.c is the fallback. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix CONFIG_PRINTK=n build] Signed-off-by: Mike Travis Cc: Yinghai Lu Cc: "H. Peter Anvin" Cc: Jack Steiner Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 605e5ae19c7f..a3e5948670c2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -946,6 +946,8 @@ void __init setup_arch(char **cmdline_p) if (init_ohci1394_dma_early) init_ohci1394_dma_on_all_controllers(); #endif + /* Allocate bigger log buffer */ + setup_log_buf(1); reserve_initrd(); -- cgit v1.2.1 From 44ec7abe359204cc9186e32d31ef5b34c8d17274 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 24 May 2011 17:13:32 -0700 Subject: lib: consolidate DEBUG_PER_CPU_MAPS DEBUG_PER_CPU_MAPS is used in lib/cpumask.c as well as in inlcude/linux/cpumask.h and thus it has outgrown its use within x86 and powerpc alone. Any arch with SMP support may want to get some more debugging, so make this option generic. Signed-off-by: Stephen Boyd Cc: Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Acked-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig.debug | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 615e18810f48..1bf88396ba23 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -75,17 +75,6 @@ config DEBUG_STACK_USAGE This option will slow down process creation somewhat. -config DEBUG_PER_CPU_MAPS - bool "Debug access to per_cpu maps" - depends on DEBUG_KERNEL - depends on SMP - ---help--- - Say Y to verify that the per_cpu map being accessed has - been setup. Adds a fair amount of code to kernel memory - and decreases performance. - - Say N if unsure. - config X86_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL -- cgit v1.2.1 From 5ca43f6c3b365024d889bc77064bb331f5a72a45 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 24 May 2011 17:13:36 -0700 Subject: lib: consolidate DEBUG_STACK_USAGE option Most arches define CONFIG_DEBUG_STACK_USAGE exactly the same way. Move it to lib/Kconfig.debug so each arch doesn't have to define it. This obviously makes the option generic, but that's fine because the config is already used in generic code. It's not obvious to me that sysrq-P actually does anything caution by keeping the most inclusive wording. Signed-off-by: Stephen Boyd Cc: Chris Metcalf Acked-by: David S. Miller Acked-by: Richard Weinberger Acked-by: Mike Frysinger Cc: Russell King Cc: Hirokazu Takata Acked-by: Ralf Baechle Cc: Paul Mackerras Acked-by: Benjamin Herrenschmidt Cc: Chen Liqin Cc: Lennox Wu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig.debug | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 1bf88396ba23..c0f8a5c88910 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -66,15 +66,6 @@ config DEBUG_STACKOVERFLOW This option will cause messages to be printed if free stack space drops below a certain limit. -config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL - ---help--- - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T and sysrq-P debug output. - - This option will slow down process creation somewhat. - config X86_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL -- cgit v1.2.1 From 949a9d70020defd7c241607ab3ed037ea88f551c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 25 May 2011 20:43:33 +0200 Subject: i8k: Integrate with the hwmon subsystem Let i8k create an hwmon class device so that libsensors will expose the CPU temperature and fan speeds to monitoring applications. Signed-off-by: Jean Delvare Acked-by: Guenter Roeck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Massimo Dal Zotto --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 880fcb6c86f4..dcf25b71f6d7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -917,6 +917,7 @@ config TOSHIBA config I8K tristate "Dell laptop support" + select HWMON ---help--- This adds a driver to safely access the System Management Mode of the CPU on the Dell Inspiron 8000. The System Management Mode -- cgit v1.2.1