diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-28 11:05:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-28 11:05:28 -0700 |
commit | 8326e284f8deb75eee3d32b973464dd96e120843 (patch) | |
tree | a2c4e18f4e7984680946cab0303e5369f175d4f5 /arch/x86/kernel | |
parent | 187dd317f0169142e4adf6263852f93c3b6f6a3c (diff) | |
parent | e888d7facd1f1460a638151036d15b6cfb3ccc74 (diff) | |
download | blackbird-op-linux-8326e284f8deb75eee3d32b973464dd96e120843.tar.gz blackbird-op-linux-8326e284f8deb75eee3d32b973464dd96e120843.zip |
Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, delay: tsc based udelay should have rdtsc_barrier
x86, setup: correct include file in <asm/boot.h>
x86, setup: Fix typo "CONFIG_x86_64" in <asm/boot.h>
x86, mce: percpu mcheck_timer should be pinned
x86: Add sysctl to allow panic on IOCK NMI error
x86: Fix uv bau sending buffer initialization
x86, mce: Fix mce resume on 32bit
x86: Move init_gbpages() to setup_arch()
x86: ensure percpu lpage doesn't consume too much vmalloc space
x86: implement percpu_alloc kernel parameter
x86: fix pageattr handling for lpage percpu allocator and re-enable it
x86: reorganize cpa_process_alias()
x86: prepare setup_pcpu_lpage() for pageattr fix
x86: rename remap percpu first chunk allocator to lpage
x86: fix duplicate free in setup_pcpu_remap() failure path
percpu: fix too lazy vunmap cache flushing
x86: Set cpu_llc_id on AMD CPUs
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 219 | ||||
-rw-r--r-- | arch/x86/kernel/tlb_uv.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 3 |
7 files changed, 189 insertions, 67 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e5b27d8f1b47..28e5f5956042 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -258,13 +258,15 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_HT unsigned bits; + int cpu = smp_processor_id(); bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; + /* use socket ID also for last level cache */ + per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; #endif } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 284d1de968bc..af425b83202b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1117,7 +1117,7 @@ static void mcheck_timer(unsigned long data) *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); t->expires = jiffies + *n; - add_timer(t); + add_timer_on(t, smp_processor_id()); } static void mce_do_trigger(struct work_struct *work) @@ -1321,7 +1321,7 @@ static void mce_init_timer(void) return; setup_timer(t, mcheck_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); - add_timer(t); + add_timer_on(t, smp_processor_id()); } /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 95ea5fa7d444..c8405718a4c3 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -22,6 +22,7 @@ #include "dumpstack.h" int panic_on_unrecovered_nmi; +int panic_on_io_nmi; unsigned int code_bytes = 64; int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; static int die_counter; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index be5ae80f897f..de2cab132844 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -289,6 +289,20 @@ void * __init extend_brk(size_t size, size_t align) return ret; } +#ifdef CONFIG_X86_64 +static void __init init_gbpages(void) +{ + if (direct_gbpages && cpu_has_gbpages) + printk(KERN_INFO "Using GB pages for direct mapping\n"); + else + direct_gbpages = 0; +} +#else +static inline void init_gbpages(void) +{ +} +#endif + static void __init reserve_brk(void) { if (_brk_end > _brk_start) @@ -871,6 +885,8 @@ void __init setup_arch(char **cmdline_p) reserve_brk(); + init_gbpages(); + /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); max_pfn_mapped = max_low_pfn_mapped; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 9c3f0823e6aa..29a3eef7cf4a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, } /* - * Remap allocator + * Large page remap allocator * * This allocator uses PMD page as unit. A PMD page is allocated for * each cpu and each is remapped into vmalloc area using PMD mapping. @@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, * better than only using 4k mappings while still being NUMA friendly. */ #ifdef CONFIG_NEED_MULTIPLE_NODES -static size_t pcpur_size __initdata; -static void **pcpur_ptrs __initdata; +struct pcpul_ent { + unsigned int cpu; + void *ptr; +}; + +static size_t pcpul_size; +static struct pcpul_ent *pcpul_map; +static struct vm_struct pcpul_vm; -static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) +static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) { size_t off = (size_t)pageno << PAGE_SHIFT; - if (off >= pcpur_size) + if (off >= pcpul_size) return NULL; - return virt_to_page(pcpur_ptrs[cpu] + off); + return virt_to_page(pcpul_map[cpu].ptr + off); } -static ssize_t __init setup_pcpu_remap(size_t static_size) +static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { - static struct vm_struct vm; - size_t ptrs_size, dyn_size; + size_t map_size, dyn_size; unsigned int cpu; + int i, j; ssize_t ret; - /* - * If large page isn't supported, there's no benefit in doing - * this. Also, on non-NUMA, embedding is better. - * - * NOTE: disabled for now. - */ - if (true || !cpu_has_pse || !pcpu_need_numa()) + if (!chosen) { + size_t vm_size = VMALLOC_END - VMALLOC_START; + size_t tot_size = num_possible_cpus() * PMD_SIZE; + + /* on non-NUMA, embedding is better */ + if (!pcpu_need_numa()) + return -EINVAL; + + /* don't consume more than 20% of vmalloc area */ + if (tot_size > vm_size / 5) { + pr_info("PERCPU: too large chunk size %zuMB for " + "large page remap\n", tot_size >> 20); + return -EINVAL; + } + } + + /* need PSE */ + if (!cpu_has_pse) { + pr_warning("PERCPU: lpage allocator requires PSE\n"); return -EINVAL; + } /* * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. */ - pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE); - if (pcpur_size > PMD_SIZE) { + if (pcpul_size > PMD_SIZE) { pr_warning("PERCPU: static data is larger than large page, " "can't use large page\n"); return -EINVAL; } - dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; + dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ - ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); - pcpur_ptrs = alloc_bootmem(ptrs_size); + map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); + pcpul_map = alloc_bootmem(map_size); for_each_possible_cpu(cpu) { - pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); - if (!pcpur_ptrs[cpu]) + pcpul_map[cpu].cpu = cpu; + pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, + PMD_SIZE); + if (!pcpul_map[cpu].ptr) { + pr_warning("PERCPU: failed to allocate large page " + "for cpu%u\n", cpu); goto enomem; + } /* - * Only use pcpur_size bytes and give back the rest. + * Only use pcpul_size bytes and give back the rest. * * Ingo: The 2MB up-rounding bootmem is needed to make * sure the partial 2MB page is still fully RAM - it's * not well-specified to have a PAT-incompatible area * (unmapped RAM, device memory, etc.) in that hole. */ - free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), - PMD_SIZE - pcpur_size); + free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), + PMD_SIZE - pcpul_size); - memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); + memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); } /* allocate address and map */ - vm.flags = VM_ALLOC; - vm.size = num_possible_cpus() * PMD_SIZE; - vm_area_register_early(&vm, PMD_SIZE); + pcpul_vm.flags = VM_ALLOC; + pcpul_vm.size = num_possible_cpus() * PMD_SIZE; + vm_area_register_early(&pcpul_vm, PMD_SIZE); for_each_possible_cpu(cpu) { - pmd_t *pmd; + pmd_t *pmd, pmd_v; - pmd = populate_extra_pmd((unsigned long)vm.addr - + cpu * PMD_SIZE); - set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), - PAGE_KERNEL_LARGE)); + pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + + cpu * PMD_SIZE); + pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), + PAGE_KERNEL_LARGE); + set_pmd(pmd, pmd_v); } /* we're ready, commit */ pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", vm.addr, static_size); + "%zu bytes\n", pcpul_vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, + ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - PMD_SIZE, vm.addr, NULL); - goto out_free_ar; + PMD_SIZE, pcpul_vm.addr, NULL); + + /* sort pcpul_map array for pcpu_lpage_remapped() */ + for (i = 0; i < num_possible_cpus() - 1; i++) + for (j = i + 1; j < num_possible_cpus(); j++) + if (pcpul_map[i].ptr > pcpul_map[j].ptr) { + struct pcpul_ent tmp = pcpul_map[i]; + pcpul_map[i] = pcpul_map[j]; + pcpul_map[j] = tmp; + } + + return ret; enomem: for_each_possible_cpu(cpu) - if (pcpur_ptrs[cpu]) - free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); - ret = -ENOMEM; -out_free_ar: - free_bootmem(__pa(pcpur_ptrs), ptrs_size); - return ret; + if (pcpul_map[cpu].ptr) + free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); + free_bootmem(__pa(pcpul_map), map_size); + return -ENOMEM; +} + +/** + * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area + * @kaddr: the kernel address in question + * + * Determine whether @kaddr falls in the pcpul recycled area. This is + * used by pageattr to detect VM aliases and break up the pcpu PMD + * mapping such that the same physical page is not mapped under + * different attributes. + * + * The recycled area is always at the tail of a partially used PMD + * page. + * + * RETURNS: + * Address of corresponding remapped pcpu address if match is found; + * otherwise, NULL. + */ +void *pcpu_lpage_remapped(void *kaddr) +{ + void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); + unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; + int left = 0, right = num_possible_cpus() - 1; + int pos; + + /* pcpul in use at all? */ + if (!pcpul_map) + return NULL; + + /* okay, perform binary search */ + while (left <= right) { + pos = (left + right) / 2; + + if (pcpul_map[pos].ptr < pmd_addr) + left = pos + 1; + else if (pcpul_map[pos].ptr > pmd_addr) + right = pos - 1; + else { + /* it shouldn't be in the area for the first chunk */ + WARN_ON(offset < pcpul_size); + + return pcpul_vm.addr + + pcpul_map[pos].cpu * PMD_SIZE + offset; + } + } + + return NULL; } #else -static ssize_t __init setup_pcpu_remap(size_t static_size) +static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { return -EINVAL; } @@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * mapping so that it can use PMD mapping without additional TLB * pressure. */ -static ssize_t __init setup_pcpu_embed(size_t static_size) +static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) * this. Also, embedding allocation doesn't play well with * NUMA. */ - if (!cpu_has_pse || pcpu_need_numa()) + if (!chosen && (!cpu_has_pse || pcpu_need_numa())) return -EINVAL; return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, @@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) void *ptr; ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); - if (!ptr) + if (!ptr) { + pr_warning("PERCPU: failed to allocate " + "4k page for cpu%u\n", cpu); goto enomem; + } memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); pcpu4k_pages[j++] = virt_to_page(ptr); @@ -333,6 +416,16 @@ out_free_ar: return ret; } +/* for explicit first chunk allocator selection */ +static char pcpu_chosen_alloc[16] __initdata; + +static int __init percpu_alloc_setup(char *str) +{ + strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); + return 0; +} +early_param("percpu_alloc", percpu_alloc_setup); + static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu) #endif } -/* - * Great future plan: - * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. - * Always point %gs to its beginning - */ void __init setup_per_cpu_areas(void) { size_t static_size = __per_cpu_end - __per_cpu_start; @@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void) * of large page mappings. Please read comments on top of * each allocator for details. */ - ret = setup_pcpu_remap(static_size); - if (ret < 0) - ret = setup_pcpu_embed(static_size); + ret = -EINVAL; + if (strlen(pcpu_chosen_alloc)) { + if (strcmp(pcpu_chosen_alloc, "4k")) { + if (!strcmp(pcpu_chosen_alloc, "lpage")) + ret = setup_pcpu_lpage(static_size, true); + else if (!strcmp(pcpu_chosen_alloc, "embed")) + ret = setup_pcpu_embed(static_size, true); + else + pr_warning("PERCPU: unknown allocator %s " + "specified\n", pcpu_chosen_alloc); + if (ret < 0) + pr_warning("PERCPU: %s allocator failed (%zd), " + "falling back to 4k\n", + pcpu_chosen_alloc, ret); + } + } else { + ret = setup_pcpu_lpage(static_size, false); + if (ret < 0) + ret = setup_pcpu_embed(static_size, false); + } if (ret < 0) ret = setup_pcpu_4k(static_size); if (ret < 0) diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 124d40c575df..8ccabb8a2f6a 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -711,7 +711,6 @@ uv_activation_descriptor_init(int node, int pnode) unsigned long pa; unsigned long m; unsigned long n; - unsigned long mmr_image; struct bau_desc *adp; struct bau_desc *ad2; @@ -727,12 +726,8 @@ uv_activation_descriptor_init(int node, int pnode) n = pa >> uv_nshift; m = pa & uv_mmask; - mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); - if (mmr_image) { - uv_write_global_mmr64(pnode, (unsigned long) - UVH_LB_BAU_SB_DESCRIPTOR_BASE, - (n << UV_DESC_BASE_PNODE_SHIFT | m)); - } + uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, + (n << UV_DESC_BASE_PNODE_SHIFT | m)); /* * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a0f48f5671c0..5204332f475d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -346,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); + if (panic_on_io_nmi) + panic("NMI IOCK error: Not continuing"); + /* Re-enable the IOCK line, wait for a few seconds */ reason = (reason & 0xf) | 8; outb(reason, 0x61); |