diff options
author | Tony Luck <tony.luck@intel.com> | 2005-10-20 10:41:44 -0700 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-10-20 10:41:44 -0700 |
commit | 9cec58dc138d6fcad9f447a19c8ff69f6540e667 (patch) | |
tree | 4fe1cca94fdba8b705c87615bee06d3346f687ce /arch/x86_64 | |
parent | 17e5ad6c0ce5a970e2830d0de8bdd60a2f077d38 (diff) | |
parent | ac9b9c667c2e1194e22ebe0a441ae1c37aaa9b90 (diff) | |
download | blackbird-op-linux-9cec58dc138d6fcad9f447a19c8ff69f6540e667.tar.gz blackbird-op-linux-9cec58dc138d6fcad9f447a19c8ff69f6540e667.zip |
Update from upstream with manual merge of Yasunori Goto's
changes to swiotlb.c made in commit 281dd25cdc0d6903929b79183816d151ea626341
since this file has been moved from arch/ia64/lib/swiotlb.c to
lib/swiotlb.c
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 7 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_signal.c | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/e820.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/head.S | 40 | ||||
-rw-r--r-- | arch/x86_64/kernel/kprobes.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce.c | 10 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 22 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup64.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/suspend.c | 127 | ||||
-rw-r--r-- | arch/x86_64/kernel/suspend_asm.S | 17 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 3 | ||||
-rw-r--r-- | arch/x86_64/kernel/x8664_ksyms.c | 4 | ||||
-rw-r--r-- | arch/x86_64/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/x86_64/lib/dec_and_lock.c | 40 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 10 | ||||
-rw-r--r-- | arch/x86_64/mm/pageattr.c | 2 |
17 files changed, 204 insertions, 106 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index e63323e03ea9..21afa69a086d 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -277,11 +277,6 @@ source "mm/Kconfig" config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y -config HAVE_DEC_LOCK - bool - depends on SMP - default y - config NR_CPUS int "Maximum number of CPUs (2-256)" range 2 256 @@ -313,7 +308,7 @@ config HPET_TIMER present. The HPET provides a stable time base on SMP systems, unlike the TSC, but it is more expensive to access, as it is off-chip. You can find the HPET spec at - <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. + <http://www.intel.com/hardwaredesign/hpetspec.htm>. config X86_PM_TIMER bool "PM timer" diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index 66e2821533db..0903cc1faef2 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -425,7 +425,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) rsp = (unsigned long) ka->sa.sa_restorer; } - return (void __user *)((rsp - frame_size) & -8UL); + rsp -= frame_size; + /* Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. */ + rsp = ((rsp + 4) & -16ul) - 4; + return (void __user *) rsp; } int ia32_setup_frame(int sig, struct k_sigaction *ka, diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 4e34b0f9d613..ab3f87aaff70 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -17,6 +17,8 @@ #include <linux/ioport.h> #include <linux/string.h> #include <linux/kexec.h> +#include <linux/module.h> + #include <asm/page.h> #include <asm/e820.h> #include <asm/proto.h> diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 4592bf21fcaf..b92e5f45ed46 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -270,26 +270,26 @@ ENTRY(level3_kernel_pgt) .org 0x4000 ENTRY(level2_ident_pgt) /* 40MB for bootup. */ - .quad 0x0000000000000183 - .quad 0x0000000000200183 - .quad 0x0000000000400183 - .quad 0x0000000000600183 - .quad 0x0000000000800183 - .quad 0x0000000000A00183 - .quad 0x0000000000C00183 - .quad 0x0000000000E00183 - .quad 0x0000000001000183 - .quad 0x0000000001200183 - .quad 0x0000000001400183 - .quad 0x0000000001600183 - .quad 0x0000000001800183 - .quad 0x0000000001A00183 - .quad 0x0000000001C00183 - .quad 0x0000000001E00183 - .quad 0x0000000002000183 - .quad 0x0000000002200183 - .quad 0x0000000002400183 - .quad 0x0000000002600183 + .quad 0x0000000000000083 + .quad 0x0000000000200083 + .quad 0x0000000000400083 + .quad 0x0000000000600083 + .quad 0x0000000000800083 + .quad 0x0000000000A00083 + .quad 0x0000000000C00083 + .quad 0x0000000000E00083 + .quad 0x0000000001000083 + .quad 0x0000000001200083 + .quad 0x0000000001400083 + .quad 0x0000000001600083 + .quad 0x0000000001800083 + .quad 0x0000000001A00083 + .quad 0x0000000001C00083 + .quad 0x0000000001E00083 + .quad 0x0000000002000083 + .quad 0x0000000002200083 + .quad 0x0000000002400083 + .quad 0x0000000002600083 /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ .globl temp_boot_pmds temp_boot_pmds: diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index df08c43276a0..76a28b007be9 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -77,9 +77,9 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn) int __kprobes arch_prepare_kprobe(struct kprobe *p) { /* insn: must be on special executable page on x86_64. */ - up(&kprobe_mutex); - p->ainsn.insn = get_insn_slot(); down(&kprobe_mutex); + p->ainsn.insn = get_insn_slot(); + up(&kprobe_mutex); if (!p->ainsn.insn) { return -ENOMEM; } @@ -231,9 +231,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - up(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); down(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + up(&kprobe_mutex); } static inline void save_previous_kprobe(void) diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 08203b07f4bd..69541db5ff2c 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -54,9 +54,12 @@ void mce_log(struct mce *mce) { unsigned next, entry; mce->finished = 0; - smp_wmb(); + wmb(); for (;;) { entry = rcu_dereference(mcelog.next); + /* The rmb forces the compiler to reload next in each + iteration */ + rmb(); for (;;) { /* When the buffer fills up discard new entries. Assume that the earlier errors are the more interesting. */ @@ -69,6 +72,7 @@ void mce_log(struct mce *mce) entry++; continue; } + break; } smp_rmb(); next = entry + 1; @@ -76,9 +80,9 @@ void mce_log(struct mce *mce) break; } memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); - smp_wmb(); + wmb(); mcelog.entry[entry].finished = 1; - smp_wmb(); + wmb(); if (!test_and_set_bit(0, &console_logged)) notify_user = 1; diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 351d8d64c2fb..cb28df14ff6f 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -836,6 +836,23 @@ static int __init init_amd(struct cpuinfo_x86 *c) int r; int level; +#ifdef CONFIG_SMP + unsigned long value; + + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K8_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K8_HWCR, value); + } +#endif + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_bit(0*32+31, &c->x86_capability); @@ -950,13 +967,12 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) static void srat_detect_node(void) { #ifdef CONFIG_NUMA - unsigned apicid, node; + unsigned node; int cpu = smp_processor_id(); /* Don't do the funky fallback heuristics the AMD version employs for now. */ - apicid = phys_proc_id[cpu]; - node = apicid_to_node[apicid]; + node = apicid_to_node[hard_smp_processor_id()]; if (node == NUMA_NO_NODE) node = 0; cpu_to_node[cpu] = node; diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index bd33be24a386..79190891fbc5 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -87,6 +87,10 @@ void __init setup_per_cpu_areas(void) int i; unsigned long size; +#ifdef CONFIG_HOTPLUG_CPU + prefill_possible_map(); +#endif + /* Copy section for each CPU (we discard the original) */ size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); #ifdef CONFIG_MODULES diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index e12d7baeb33e..658a81b33f3b 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -892,7 +892,7 @@ static __init void disable_smp(void) * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. * - Ashok Raj */ -static void prefill_possible_map(void) +__init void prefill_possible_map(void) { int i; for (i = 0; i < NR_CPUS; i++) @@ -967,10 +967,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) current_cpu_data = boot_cpu_data; current_thread_info()->cpu = 0; /* needed? */ -#ifdef CONFIG_HOTPLUG_CPU - prefill_possible_map(); -#endif - if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index ebb9abf3ce6d..f066c6ab3618 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c @@ -11,6 +11,8 @@ #include <linux/smp.h> #include <linux/suspend.h> #include <asm/proto.h> +#include <asm/page.h> +#include <asm/pgtable.h> struct saved_context saved_context; @@ -140,4 +142,129 @@ void fix_processor_context(void) } +#ifdef CONFIG_SOFTWARE_SUSPEND +/* Defined in arch/x86_64/kernel/suspend_asm.S */ +extern int restore_image(void); +pgd_t *temp_level4_pgt; + +static void **pages; + +static inline void *__add_page(void) +{ + void **c; + + c = (void **)get_usable_page(GFP_ATOMIC); + if (c) { + *c = pages; + pages = c; + } + return c; +} + +static inline void *__next_page(void) +{ + void **c; + + c = pages; + if (c) { + pages = *c; + *c = NULL; + } + return c; +} + +/* + * Try to allocate as many usable pages as needed and daisy chain them. + * If one allocation fails, free the pages allocated so far + */ +static int alloc_usable_pages(unsigned long n) +{ + void *p; + + pages = NULL; + do + if (!__add_page()) + break; + while (--n); + if (n) { + p = __next_page(); + while (p) { + free_page((unsigned long)p); + p = __next_page(); + } + return -ENOMEM; + } + return 0; +} + +static void res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) +{ + long i, j; + + i = pud_index(address); + pud = pud + i; + for (; i < PTRS_PER_PUD; pud++, i++) { + unsigned long paddr; + pmd_t *pmd; + + paddr = address + i*PUD_SIZE; + if (paddr >= end) + break; + + pmd = (pmd_t *)__next_page(); + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { + unsigned long pe; + + if (paddr >= end) + break; + pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr; + pe &= __supported_pte_mask; + set_pmd(pmd, __pmd(pe)); + } + } +} + +static void set_up_temporary_mappings(void) +{ + unsigned long start, end, next; + + temp_level4_pgt = (pgd_t *)__next_page(); + + /* It is safe to reuse the original kernel mapping */ + set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), + init_level4_pgt[pgd_index(__START_KERNEL_map)]); + + /* Set up the direct mapping from scratch */ + start = (unsigned long)pfn_to_kaddr(0); + end = (unsigned long)pfn_to_kaddr(end_pfn); + + for (; start < end; start = next) { + pud_t *pud = (pud_t *)__next_page(); + next = start + PGDIR_SIZE; + if (next > end) + next = end; + res_phys_pud_init(pud, __pa(start), __pa(next)); + set_pgd(temp_level4_pgt + pgd_index(start), + mk_kernel_pgd(__pa(pud))); + } +} + +int swsusp_arch_resume(void) +{ + unsigned long n; + + n = ((end_pfn << PAGE_SHIFT) + PUD_SIZE - 1) >> PUD_SHIFT; + n += (n + PTRS_PER_PUD - 1) / PTRS_PER_PUD + 1; + pr_debug("swsusp_arch_resume(): pages needed = %lu\n", n); + if (alloc_usable_pages(n)) { + free_eaten_memory(); + return -ENOMEM; + } + /* We have got enough memory and from now on we cannot recover */ + set_up_temporary_mappings(); + restore_image(); + return 0; +} +#endif /* CONFIG_SOFTWARE_SUSPEND */ diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S index 4d659e97df10..320b6fb00cca 100644 --- a/arch/x86_64/kernel/suspend_asm.S +++ b/arch/x86_64/kernel/suspend_asm.S @@ -39,12 +39,13 @@ ENTRY(swsusp_arch_suspend) call swsusp_save ret -ENTRY(swsusp_arch_resume) - /* set up cr3 */ - leaq init_level4_pgt(%rip),%rax - subq $__START_KERNEL_map,%rax - movq %rax,%cr3 - +ENTRY(restore_image) + /* switch to temporary page tables */ + movq $__PAGE_OFFSET, %rdx + movq temp_level4_pgt(%rip), %rax + subq %rdx, %rax + movq %rax, %cr3 + /* Flush TLB */ movq mmu_cr4_features(%rip), %rax movq %rax, %rdx andq $~(1<<7), %rdx # PGE @@ -69,6 +70,10 @@ loop: movq pbe_next(%rdx), %rdx jmp loop done: + /* go back to the original page tables */ + leaq init_level4_pgt(%rip), %rax + subq $__START_KERNEL_map, %rax + movq %rax, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movq mmu_cr4_features(%rip), %rax movq %rax, %rdx diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 2373cb8b8625..703acde2a1a5 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -959,9 +959,6 @@ static __init int unsynchronized_tsc(void) are handled in the OEM check above. */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) return 0; - /* All in a single socket - should be synchronized */ - if (cpus_weight(cpu_core_map[0]) == num_online_cpus()) - return 0; #endif /* Assume multi socket systems are not synchronized */ return num_online_cpus() > 1; diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 68ec03070e5a..fd99ddd009bc 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -178,10 +178,6 @@ EXPORT_SYMBOL(rwsem_down_write_failed_thunk); EXPORT_SYMBOL(empty_zero_page); -#ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(_atomic_dec_and_lock); -#endif - EXPORT_SYMBOL(die_chain); EXPORT_SYMBOL(register_die_notifier); diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile index 6b26a1c1e9ff..bba5db6cebd6 100644 --- a/arch/x86_64/lib/Makefile +++ b/arch/x86_64/lib/Makefile @@ -10,5 +10,3 @@ lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \ usercopy.o getuser.o putuser.o \ thunk.o clear_page.o copy_page.o bitstr.o bitops.o lib-y += memcpy.o memmove.o memset.o copy_user.o - -lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o diff --git a/arch/x86_64/lib/dec_and_lock.c b/arch/x86_64/lib/dec_and_lock.c deleted file mode 100644 index ab43394dc775..000000000000 --- a/arch/x86_64/lib/dec_and_lock.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * x86 version of "atomic_dec_and_lock()" using - * the atomic "cmpxchg" instruction. - * - * (For CPU's lacking cmpxchg, we use the slow - * generic version, and this one never even gets - * compiled). - */ - -#include <linux/spinlock.h> -#include <asm/atomic.h> - -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) -{ - int counter; - int newcount; - -repeat: - counter = atomic_read(atomic); - newcount = counter-1; - - if (!newcount) - goto slow_path; - - asm volatile("lock; cmpxchgl %1,%2" - :"=a" (newcount) - :"r" (newcount), "m" (atomic->counter), "0" (counter)); - - /* If the above failed, "eax" will have changed */ - if (newcount != counter) - goto repeat; - return 0; - -slow_path: - spin_lock(lock); - if (atomic_dec_and_test(atomic)) - return 1; - spin_unlock(lock); - return 0; -} diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 80a49d9bd8a7..214803821001 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -167,18 +167,16 @@ void __init numa_init_array(void) mapping. To avoid this fill in the mapping for all possible CPUs, as the number of CPUs is not known yet. We round robin the existing nodes. */ - rr = 0; + rr = first_node(node_online_map); for (i = 0; i < NR_CPUS; i++) { if (cpu_to_node[i] != NUMA_NO_NODE) continue; + cpu_to_node[i] = rr; rr = next_node(rr, node_online_map); if (rr == MAX_NUMNODES) rr = first_node(node_online_map); - cpu_to_node[i] = rr; - rr++; } - set_bit(0, &node_to_cpumask[cpu_to_node(0)]); } #ifdef CONFIG_NUMA_EMU @@ -266,9 +264,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) __cpuinit void numa_add_cpu(int cpu) { - /* BP is initialized elsewhere */ - if (cpu) - set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); + set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); } unsigned long __init numa_free_all_bootmem(void) diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 94862e1ec032..b90e8fe9eeb0 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -220,8 +220,6 @@ void global_flush_tlb(void) down_read(&init_mm.mmap_sem); df = xchg(&df_list, NULL); up_read(&init_mm.mmap_sem); - if (!df) - return; flush_map((df && !df->next) ? df->address : 0); for (; df; df = next_df) { next_df = df->next; |