From 86c0baf123e474b6eb404798926ecf62b426bf3a Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: Change sysenter_setup to __cpuinit & improve __INIT, __INITDATA Change sysenter_setup to __cpuinit. Change __INIT & __INITDATA to be cpu hotplug aware. Resolve MODPOST warnings similar to: WARNING: vmlinux - Section mismatch: reference to .init.text:sysenter_setup from .text between 'identify_cpu' (at offset 0xc040a380) and 'detect_ht' and WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_int80_end from .text between 'sysenter_setup' (at offset 0xc041a269) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_int80_start from .text between 'sysenter_setup' (at offset 0xc041a26e) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_sysenter_end from .text between 'sysenter_setup' (at offset 0xc041a275) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_sysenter_start from .text between 'sysenter_setup' (at offset 0xc041a27a) and 'enable_sep_cpu' Signed-off-by: Prarit Bhargava Signed-off-by: Andi Kleen --- include/linux/init.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index e290a010e3f2..9abf120ec9f8 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -52,9 +52,14 @@ #endif /* For assembly routines */ +#ifdef CONFIG_HOTPLUG_CPU +#define __INIT .section ".text","ax" +#define __INITDATA .section ".data","aw" +#else #define __INIT .section ".init.text","ax" -#define __FINIT .previous #define __INITDATA .section ".init.data","aw" +#endif +#define __FINIT .previous #ifndef __ASSEMBLY__ /* -- cgit v1.2.1 From 973efae21beb2feda138f152ed06d4204774d93c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] i386: clean up mach_reboot_fixups The reboot_fixups stuff seems to be a bit of a mess, specifically the header is in linux/ when its a purely i386-specific piece of code. I'm not sure why it has its config option; its only currently needed for "geode-gx1/cs5530a", so perhaps whatever config option controls that hardware should enable this? Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- include/linux/reboot_fixups.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 include/linux/reboot_fixups.h (limited to 'include/linux') diff --git a/include/linux/reboot_fixups.h b/include/linux/reboot_fixups.h deleted file mode 100644 index 480ea2d489d8..000000000000 --- a/include/linux/reboot_fixups.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _LINUX_REBOOT_FIXUPS_H -#define _LINUX_REBOOT_FIXUPS_H - -#ifdef CONFIG_X86_REBOOTFIXUPS -extern void mach_reboot_fixups(void); -#else -#define mach_reboot_fixups() ((void)(0)) -#endif - -#endif /* _LINUX_REBOOT_FIXUPS_H */ -- cgit v1.2.1 From 184c44d2049c4db7ef6ec65794546954da2c6a0e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86-64: fix x86_64-mm-sched-clock-share Fix for the following patch. Provide dummy cpufreq functions when CPUFREQ is not compiled in. Cc: Andi Kleen Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/linux/cpufreq.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 0899e2cdcdd1..cb9b2ec8849c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -32,7 +32,15 @@ * CPUFREQ NOTIFIER INTERFACE * *********************************************************************/ +#ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); +#else +static inline int cpufreq_register_notifier(struct notifier_block *nb, + unsigned int list) +{ + return 0; +} +#endif int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list); #define CPUFREQ_TRANSITION_NOTIFIER (0) @@ -261,17 +269,22 @@ int cpufreq_set_policy(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); -/* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */ -unsigned int cpufreq_get(unsigned int cpu); -/* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */ +/* + * query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it + */ #ifdef CONFIG_CPU_FREQ unsigned int cpufreq_quick_get(unsigned int cpu); +unsigned int cpufreq_get(unsigned int cpu); #else static inline unsigned int cpufreq_quick_get(unsigned int cpu) { return 0; } +static inline unsigned int cpufreq_get(unsigned int cpu) +{ + return 0; +} #endif -- cgit v1.2.1 From e073ae1b34d5600ffc550407625dcb2d4cf46c6e Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86-64: Set HASHDIST_DEFAULT to 1 for x86_64 NUMA Enable system hashtable memory to be distributed among nodes on x86_64 NUMA Forcing the kernel to use node interleaved vmalloc instead of bootmem for the system hashtable memory (alloc_large_system_hash) reduces the memory imbalance on node 0 by around 40MB on a 8 node x86_64 NUMA box: Before the following patch, on bootup of a 8 node box: Node 0 MemTotal: 3407488 kB Node 0 MemFree: 3206296 kB Node 0 MemUsed: 201192 kB Node 0 Active: 7012 kB Node 0 Inactive: 512 kB Node 0 Dirty: 0 kB Node 0 Writeback: 0 kB Node 0 FilePages: 1912 kB Node 0 Mapped: 420 kB Node 0 AnonPages: 5612 kB Node 0 PageTables: 468 kB Node 0 NFS_Unstable: 0 kB Node 0 Bounce: 0 kB Node 0 Slab: 5408 kB Node 0 SReclaimable: 644 kB Node 0 SUnreclaim: 4764 kB After the patch (or using hashdist=1 on the kernel command line): Node 0 MemTotal: 3407488 kB Node 0 MemFree: 3247608 kB Node 0 MemUsed: 159880 kB Node 0 Active: 3012 kB Node 0 Inactive: 616 kB Node 0 Dirty: 0 kB Node 0 Writeback: 0 kB Node 0 FilePages: 2424 kB Node 0 Mapped: 380 kB Node 0 AnonPages: 1200 kB Node 0 PageTables: 396 kB Node 0 NFS_Unstable: 0 kB Node 0 Bounce: 0 kB Node 0 Slab: 6304 kB Node 0 SReclaimable: 1596 kB Node 0 SUnreclaim: 4708 kB I guess it is a good idea to keep HASHDIST_DEFAULT "on" for x86_64 NUMA since x86_64 has no dearth of vmalloc space? Or maybe enable hash distribution for all 64bit NUMA arches? The following patch does it only for x86_64. I ran a HPC MPI benchmark -- 'Ansys wingsolid', which takes up quite a bit of memory and uses up tlb entries. This was on a 4 way, 2 socket Tyan AMD box (non vsmp), with 8G total memory (4G pernode). The results with and without hash distribution are: 1. Vanilla - runtime of 1188.000s 2. With hashdist=1 runtime of 1154.000s Oprofile output for the duration of run is: 1. Vanilla: PU: AMD64 processors, speed 2411.16 MHz (estimated) Counted L1_AND_L2_DTLB_MISSES events (L1 and L2 DTLB misses) with a unit mask of 0x00 (No unit mask) count 500 samples % app name symbol name 163054 6.5513 libansys1.so MultiFront::decompose(int, int, Elemset *, int *, int, int, int) 162061 6.5114 libansys3.so blockSaxpy6L_fd 162042 6.5107 libansys3.so blockInnerProduct6L_fd 156286 6.2794 libansys3.so maxb33_ 87879 3.5309 libansys1.so elmatrixmultpcg_ 84857 3.4095 libansys4.so saxpy_pcg 58637 2.3560 libansys4.so .st4560 46612 1.8728 libansys4.so .st4282 43043 1.7294 vmlinux-t copy_user_generic_string 41326 1.6604 libansys3.so blockSaxpyBackSolve6L_fd 41288 1.6589 libansys3.so blockInnerProductBackSolve6L_fd 2. With hashdist=1 CPU: AMD64 processors, speed 2411.13 MHz (estimated) Counted L1_AND_L2_DTLB_MISSES events (L1 and L2 DTLB misses) with a unit mask of 0x00 (No unit mask) count 500 samples % app name symbol name 162993 6.9814 libansys1.so MultiFront::decompose(int, int, Elemset *, int *, int, int, int) 160799 6.8874 libansys3.so blockInnerProduct6L_fd 160459 6.8729 libansys3.so blockSaxpy6L_fd 156018 6.6826 libansys3.so maxb33_ 84700 3.6279 libansys4.so saxpy_pcg 83434 3.5737 libansys1.so elmatrixmultpcg_ 58074 2.4875 libansys4.so .st4560 46000 1.9703 libansys4.so .st4282 41166 1.7632 libansys3.so blockSaxpyBackSolve6L_fd 41033 1.7575 libansys3.so blockInnerProductBackSolve6L_fd 35762 1.5318 libansys1.so inner_product_sub 35591 1.5245 libansys1.so inner_product_sub2 28259 1.2104 libansys4.so addVectors Signed-off-by: Pravin B. Shelar Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andi Kleen Acked-by: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/linux/bootmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 81c07cd18643..0365ec9fc0c9 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -122,9 +122,9 @@ extern void *alloc_large_system_hash(const char *tablename, #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ /* Only NUMA needs hash distribution. - * IA64 is known to have sufficient vmalloc space. + * IA64 and x86_64 have sufficient vmalloc space. */ -#if defined(CONFIG_NUMA) && defined(CONFIG_IA64) +#if defined(CONFIG_NUMA) && (defined(CONFIG_IA64) || defined(CONFIG_X86_64)) #define HASHDIST_DEFAULT 1 #else #define HASHDIST_DEFAULT 0 -- cgit v1.2.1 From 79e030114a8d97a1dcd593ab84fb986f8c91c536 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] i386: Allow i386 crash kernels to handle x86_64 dumps The specific case I am encountering is kdump under Xen with a 64 bit hypervisor and 32 bit kernel/userspace. The dump created is 64 bit due to the hypervisor but the dump kernel is 32 bit for maximum compatibility. It's possibly less likely to be useful in a purely native scenario but I see no reason to disallow it. [akpm@linux-foundation.org: build fix] Signed-off-by: Ian Campbell Signed-off-by: Andi Kleen Acked-by: Vivek Goyal Cc: Horms Cc: Magnus Damm Cc: "Eric W. Biederman" Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/linux/crash_dump.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 32503657f14f..22c7ac5cd80c 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -14,5 +14,13 @@ extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, extern const struct file_operations proc_vmcore_operations; extern struct proc_dir_entry *proc_vmcore; +/* Architecture code defines this if there are other possible ELF + * machine types, e.g. on bi-arch capable hardware. */ +#ifndef vmcore_elf_check_arch_cross +#define vmcore_elf_check_arch_cross(x) 0 +#endif + +#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) + #endif /* CONFIG_CRASH_DUMP */ #endif /* LINUX_CRASHDUMP_H */ -- cgit v1.2.1 From 6fb14755a676282a4e6caa05a08c92db8e45cfff Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86: tighten kernel image page access rights On x86-64, kernel memory freed after init can be entirely unmapped instead of just getting 'poisoned' by overwriting with a debug pattern. On i386 and x86-64 (under CONFIG_DEBUG_RODATA), kernel text and bug table can also be write-protected. Compared to the first version, this one prevents re-creating deleted mappings in the kernel image range on x86-64, if those got removed previously. This, together with the original changes, prevents temporarily having inconsistent mappings when cacheability attributes are being changed on such pages (e.g. from AGP code). While on i386 such duplicate mappings don't exist, the same change is done there, too, both for consistency and because checking pte_present() before using various other pte_XXX functions is a requirement anyway. At once, i386 code gets adjusted to use pte_huge() instead of open coding this. AK: split out cpa() changes Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/linux/poison.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index 3e628f990fdf..89580b764959 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -26,9 +26,6 @@ /********** arch/$ARCH/mm/init.c **********/ #define POISON_FREE_INITMEM 0xcc -/********** arch/x86_64/mm/init.c **********/ -#define POISON_FREE_INITDATA 0xba - /********** arch/ia64/hp/common/sba_iommu.c **********/ /* * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a -- cgit v1.2.1 From b00742d399513a4100c24cc2accefdc1bb1e0b15 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] x86-64: Account for module percpu space separately from kernel percpu Rather than using a single constant PERCPU_ENOUGH_ROOM, compute it as the sum of kernel_percpu + PERCPU_MODULE_RESERVE. This is now common to all architectures; if an architecture wants to set PERCPU_ENOUGH_ROOM to something special, then it may do so (ia64 is the only one which does). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Eric W. Biederman Cc: Andi Kleen --- include/linux/percpu.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 600e3d387ffc..b72be2f79e6a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -11,9 +11,16 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#ifdef CONFIG_MODULES +#define PERCPU_MODULE_RESERVE 8192 +#else +#define PERCPU_MODULE_RESERVE 0 #endif +#define PERCPU_ENOUGH_ROOM \ + (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) +#endif /* PERCPU_ENOUGH_ROOM */ + /* * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. -- cgit v1.2.1 From d4f7a2c18e59e0304a1c733589ce14fc02fec1bd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Relocate VDSO ELF headers to match mapped location with COMPAT_VDSO Some versions of libc can't deal with a VDSO which doesn't have its ELF headers matching its mapped address. COMPAT_VDSO maps the VDSO at a specific system-wide fixed address. Previously this was all done at build time, on the grounds that the fixed VDSO address is always at the top of the address space. However, a hypervisor may reserve some of that address space, pushing the fixmap address down. This patch does the adjustment dynamically at runtime, depending on the runtime location of the VDSO fixmap. [ Patch has been through several hands: Jan Beulich wrote the orignal version; Zach reworked it, and Jeremy converted it to relocate phdrs as well as sections. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: "Jan Beulich" Cc: Eric W. Biederman Cc: Andi Kleen Cc: Ingo Molnar Cc: Roland McGrath --- include/linux/elf.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 60713e6ea297..8b17ffe222c4 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -83,6 +83,23 @@ typedef __s64 Elf64_Sxword; #define DT_DEBUG 21 #define DT_TEXTREL 22 #define DT_JMPREL 23 +#define DT_ENCODING 32 +#define OLD_DT_LOOS 0x60000000 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff +#define OLD_DT_HIOS 0x6fffffff #define DT_LOPROC 0x70000000 #define DT_HIPROC 0x7fffffff -- cgit v1.2.1 From ce6234b5298902aaec831a67d5f8d9bd2ef5a488 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add kmap_atomic_pte for mapping highpte pages Xen and VMI both have special requirements when mapping a highmem pte page into the kernel address space. These can be dealt with by adding a new kmap_atomic_pte() function for mapping highptes, and hooking it into the paravirt_ops infrastructure. Xen specifically wants to map the pte page RO, so this patch exposes a helper function, kmap_atomic_prot, which maps the page with the specified page protections. This also adds a kmap_flush_unused() function to clear out the cached kmap mappings. Xen needs this to clear out any potential stray RW mappings of pages which will become part of a pagetable. [ Zach - vmi.c will need some attention after this patch. It wasn't immediately obvious to me what needs to be done. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- include/linux/highmem.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 645d440807c2..bca8e2dfa355 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -27,6 +27,8 @@ static inline void flush_kernel_dcache_page(struct page *page) unsigned int nr_free_highpages(void); extern unsigned long totalhigh_pages; +void kmap_flush_unused(void); + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -44,9 +46,13 @@ static inline void *kmap(struct page *page) #define kmap_atomic(page, idx) \ ({ pagefault_disable(); page_address(page); }) +#define kmap_atomic_prot(page, idx, prot) kmap_atomic(page, idx) + #define kunmap_atomic(addr, idx) do { pagefault_enable(); } while (0) #define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) + +#define kmap_flush_unused() do {} while(0) #endif #endif /* CONFIG_HIGHMEM */ -- cgit v1.2.1 From 03df4f6ee997589a84d5f9492c6419183724c710 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] i386: Clean up ELF note generation Three cleanups: 1: ELF notes are never mapped, so there's no need to have any access flags in their phdr. 2: When generating them from asm, tell the assembler to use a SHT_NOTE section type. There doesn't seem to be a way to do this from C. 3: Use ANSI rather than traditional cpp behaviour to stringify the macro argument. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Eric W. Biederman --- include/linux/elfnote.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h index 67396db141e8..9a1e0674e56c 100644 --- a/include/linux/elfnote.h +++ b/include/linux/elfnote.h @@ -39,12 +39,12 @@ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) */ #define ELFNOTE(name, type, desctype, descdata) \ -.pushsection .note.name ; \ +.pushsection .note.name, "",@note ; \ .align 4 ; \ .long 2f - 1f /* namesz */ ; \ .long 4f - 3f /* descsz */ ; \ .long type ; \ -1:.asciz "name" ; \ +1:.asciz #name ; \ 2:.align 4 ; \ 3:desctype descdata ; \ 4:.align 4 ; \ -- cgit v1.2.1