diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 13 | ||||
-rw-r--r-- | arch/x86_64/boot/setup.S | 2 | ||||
-rw-r--r-- | arch/x86_64/boot/tools/build.c | 4 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 2 | ||||
-rw-r--r-- | arch/x86_64/ia32/sys_ia32.c | 26 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/e820.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 12 | ||||
-rw-r--r-- | arch/x86_64/kernel/genapic.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/genapic_cluster.c | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/io_apic.c | 112 | ||||
-rw-r--r-- | arch/x86_64/kernel/kprobes.c | 41 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/semaphore.c | 180 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup64.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 10 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 14 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 1 | ||||
-rw-r--r-- | arch/x86_64/mm/fault.c | 4 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 6 |
23 files changed, 160 insertions, 303 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 40242c61e90a..e63323e03ea9 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -24,6 +24,10 @@ config X86 bool default y +config SEMAPHORE_SLEEPERS + bool + default y + config MMU bool default y @@ -61,6 +65,10 @@ config GENERIC_IOMAP bool default y +config ARCH_MAY_HAVE_PC_FDC + bool + default y + source "init/Kconfig" @@ -436,6 +444,11 @@ config ISA_DMA_API bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + menu "Power management options" source kernel/power/Kconfig diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S index ff58b2832b75..12ea0b6c52e2 100644 --- a/arch/x86_64/boot/setup.S +++ b/arch/x86_64/boot/setup.S @@ -81,7 +81,7 @@ start: # This is the setup header, and it must start at %cs:2 (old 0x9020:2) .ascii "HdrS" # header signature - .word 0x0203 # header version number (>= 0x0105) + .word 0x0204 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) realmode_swtch: .word 0, 0 # default_switch, SETUPSEG start_sys_seg: .word SYSSEG diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c index 18b5bac1c428..c44f5e2ec100 100644 --- a/arch/x86_64/boot/tools/build.c +++ b/arch/x86_64/boot/tools/build.c @@ -178,7 +178,9 @@ int main(int argc, char ** argv) die("Output: seek failed"); buf[0] = (sys_size & 0xff); buf[1] = ((sys_size >> 8) & 0xff); - if (write(1, buf, 2) != 2) + buf[2] = ((sys_size >> 16) & 0xff); + buf[3] = ((sys_size >> 24) & 0xff); + if (write(1, buf, 4) != 4) die("Write of image length failed"); return 0; /* Everything is OK */ diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c45d6a05b984..f174083d5567 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -307,7 +307,7 @@ ia32_sys_call_table: .quad stub32_fork .quad sys_read .quad sys_write - .quad sys32_open /* 5 */ + .quad compat_sys_open /* 5 */ .quad sys_close .quad sys32_waitpid .quad sys_creat diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index be996d1b691e..04d80406ce4f 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -969,32 +969,6 @@ long sys32_kill(int pid, int sig) return sys_kill(pid, sig); } -asmlinkage long sys32_open(const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - /* don't force O_LARGEFILE */ - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) { - put_unused_fd(fd); - fd = error; - } else { - fsnotify_open(f->f_dentry); - fd_install(fd, f); - } - } - putname(tmp); - } - return fd; -} - extern asmlinkage long sys_timer_create(clockid_t which_clock, struct sigevent __user *timer_event_spec, diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 0296ca6cbfa3..1579bdd0adcd 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -4,10 +4,10 @@ extra-y := head.o head64.o init_task.o vmlinux.lds EXTRA_AFLAGS := -traditional -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ +obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ - setup64.o bootflag.o e820.o reboot.o quirks.o + setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o @@ -45,3 +45,4 @@ swiotlb-$(CONFIG_SWIOTLB) += ../../ia64/lib/swiotlb.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o +i8237-y += ../../i386/kernel/i8237.o diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index b548dea4e5b9..116ac5f53dce 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -85,7 +85,7 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) struct e820entry *ei = &e820.map[i]; if (type && ei->type != type) continue; - if (ei->addr >= end || ei->addr + ei->size < start) + if (ei->addr >= end || ei->addr + ei->size <= start) continue; return 1; } diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 096d470e280f..be51dbe1f75e 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -784,8 +784,9 @@ ENTRY(execve) ret CFI_ENDPROC -ENTRY(page_fault) +KPROBE_ENTRY(page_fault) errorentry do_page_fault + .previous .text ENTRY(coprocessor_error) zeroentry do_coprocessor_error @@ -797,13 +798,14 @@ ENTRY(device_not_available) zeroentry math_state_restore /* runs on exception stack */ -ENTRY(debug) +KPROBE_ENTRY(debug) CFI_STARTPROC pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug jmp paranoid_exit CFI_ENDPROC + .previous .text /* runs on exception stack */ ENTRY(nmi) @@ -854,8 +856,9 @@ paranoid_schedule: jmp paranoid_userspace CFI_ENDPROC -ENTRY(int3) +KPROBE_ENTRY(int3) zeroentry do_int3 + .previous .text ENTRY(overflow) zeroentry do_overflow @@ -892,8 +895,9 @@ ENTRY(stack_segment) jmp paranoid_exit CFI_ENDPROC -ENTRY(general_protection) +KPROBE_ENTRY(general_protection) errorentry do_general_protection + .previous .text ENTRY(alignment_check) errorentry do_alignment_check diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index f031358906f5..b1c144f73149 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -25,7 +25,7 @@ #endif /* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(x86_cpu_to_apicid); u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index 9703da7202e3..f6523dd1bc09 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c @@ -72,10 +72,14 @@ static void cluster_send_IPI_mask(cpumask_t mask, int vector) static void cluster_send_IPI_allbutself(int vector) { cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); + int me = get_cpu(); /* Ensure we are not preempted when we clear */ + + cpu_clear(me, mask); if (!cpus_empty(mask)) cluster_send_IPI_mask(mask, vector); + + put_cpu(); } static void cluster_send_IPI_all(int vector) diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 1013a8fe44d7..40e0aca088fb 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -70,7 +70,7 @@ static struct irq_pin_list { short apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; #define vector_to_irq(vector) (vector) #endif +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#ifdef CONFIG_SMP +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(mask, tmp, CPU_MASK_ALL); + + dest = cpu_mask_to_apicid(mask); + + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = dest, ) + set_irq_info(irq, mask); + spin_unlock_irqrestore(&ioapic_lock, flags); +} +#endif + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ - \ - for (;;) { \ - unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ - break; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - if (!entry->next) \ - break; \ - entry = irq_2_pin + entry->next; \ - } \ - FINAL; \ -} #define DO_ACTION(name,R,ACTION, FINAL) \ \ @@ -655,7 +683,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) */ static void ack_edge_ioapic_irq(unsigned int irq) { + move_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq) static void end_level_ioapic_irq (unsigned int irq) { + move_irq(irq); ack_APIC_irq(); } -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - unsigned long flags; - unsigned int dest; - - dest = cpu_mask_to_apicid(mask); - - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = dest, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} - #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); end_level_ioapic_irq(irq); } @@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } -#endif +#endif // CONFIG_SMP +#endif // CONFIG_PCI_MSI /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1424,7 +1443,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -1432,10 +1451,12 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -1506,7 +1529,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ @@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; @@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void) } } +#endif diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 5c6dc7051482..df08c43276a0 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -74,7 +74,7 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn) return 0; } -int arch_prepare_kprobe(struct kprobe *p) +int __kprobes arch_prepare_kprobe(struct kprobe *p) { /* insn: must be on special executable page on x86_64. */ up(&kprobe_mutex); @@ -189,7 +189,7 @@ static inline s32 *is_riprel(u8 *insn) return NULL; } -void arch_copy_kprobe(struct kprobe *p) +void __kprobes arch_copy_kprobe(struct kprobe *p) { s32 *ripdisp; memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); @@ -215,21 +215,21 @@ void arch_copy_kprobe(struct kprobe *p) p->opcode = *p->addr; } -void arch_arm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p) { up(&kprobe_mutex); free_insn_slot(p->ainsn.insn); @@ -261,7 +261,7 @@ static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) kprobe_saved_rflags &= ~IF_MASK; } -static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { regs->eflags |= TF_MASK; regs->eflags &= ~IF_MASK; @@ -272,7 +272,8 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } -void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) { unsigned long *sara = (unsigned long *)regs->rsp; struct kretprobe_instance *ri; @@ -295,7 +296,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. */ -int kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -310,7 +311,8 @@ int kprobe_handler(struct pt_regs *regs) Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS) { + if (kprobe_status == KPROBE_HIT_SS && + *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; regs->eflags |= kprobe_saved_rflags; unlock_kprobes(); @@ -360,7 +362,10 @@ int kprobe_handler(struct pt_regs *regs) * either a probepoint or a debugger breakpoint * at this address. In either case, no further * handling of this interrupt is appropriate. + * Back up over the (now missing) int3 and run + * the original instruction. */ + regs->rip = (unsigned long)addr; ret = 1; } /* Not one of ours: let kernel handle it */ @@ -399,7 +404,7 @@ no_kprobe: /* * Called when we hit the probe point at kretprobe_trampoline */ -int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -478,7 +483,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { unsigned long *tos = (unsigned long *)regs->rsp; unsigned long next_rip = 0; @@ -536,7 +541,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled thoroughout this function. And we hold kprobe lock. */ -int post_kprobe_handler(struct pt_regs *regs) +int __kprobes post_kprobe_handler(struct pt_regs *regs) { if (!kprobe_running()) return 0; @@ -571,7 +576,7 @@ out: } /* Interrupts disabled, kprobe_lock held. */ -int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { if (current_kprobe->fault_handler && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) @@ -590,8 +595,8 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine for handling exceptions. */ -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, - void *data) +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; switch (val) { @@ -619,7 +624,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, return NOTIFY_DONE; } -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -640,7 +645,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -void jprobe_return(void) +void __kprobes jprobe_return(void) { preempt_enable_no_resched(); asm volatile (" xchg %%rbx,%%rsp \n" @@ -651,7 +656,7 @@ void jprobe_return(void) (jprobe_saved_rsp):"memory"); } -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { u8 *addr = (u8 *) (regs->rip - 1); unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 4e44d6e6b7e5..caf164959e19 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -290,7 +290,7 @@ void enable_timer_nmi_watchdog(void) static int nmi_pm_active; /* nmi_active before suspend */ -static int lapic_nmi_suspend(struct sys_device *dev, u32 state) +static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) { nmi_pm_active = nmi_active; disable_lapic_nmi_watchdog(); @@ -463,6 +463,8 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) per_cpu(nmi_touch, i) = 1; + + touch_softlockup_watchdog(); } void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) @@ -522,14 +524,14 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code) nmi_enter(); add_pda(__nmi_count,1); - if (!nmi_callback(regs, cpu)) + if (!rcu_dereference(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); } void set_nmi_callback(nmi_callback_t callback) { - nmi_callback = callback; + rcu_assign_pointer(nmi_callback, callback); } void unset_nmi_callback(void) diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7577f9d7a75d..8661f82ac70b 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -310,6 +310,7 @@ void __show_regs(struct pt_regs * regs) void show_regs(struct pt_regs *regs) { + printk("CPU %d:", smp_processor_id()); __show_regs(regs); show_trace(®s->rsp); } diff --git a/arch/x86_64/kernel/semaphore.c b/arch/x86_64/kernel/semaphore.c deleted file mode 100644 index 48f7c18172b9..000000000000 --- a/arch/x86_64/kernel/semaphore.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * x86_64 semaphore implementation. - * - * (C) Copyright 1999 Linus Torvalds - * - * Portions Copyright 1999 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> - */ -#include <linux/config.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <asm/errno.h> - -#include <asm/semaphore.h> - -/* - * Semaphores are implemented using a two-way counter: - * The "count" variable is decremented for each process - * that tries to acquire the semaphore, while the "sleeping" - * variable is a count of such acquires. - * - * Notably, the inline "up()" and "down()" functions can - * efficiently test if they need to do any extra work (up - * needs to do something only if count was negative before - * the increment operation. - * - * "sleeping" and the contention routine ordering is protected - * by the spinlock in the semaphore's waitqueue head. - * - * Note that these functions are only called when there is - * contention on the lock, and as such all this is the - * "non-critical" part of the whole semaphore business. The - * critical part is the inline stuff in <asm/semaphore.h> - * where we want to avoid any extra jumps and calls. - */ - -/* - * Logic: - * - only on a boundary condition do we need to care. When we go - * from a negative count to a non-negative, we wake people up. - * - when we go from a non-negative count to a negative do we - * (a) synchronize with the "sleeper" count and (b) make sure - * that we're on the wakeup list before we synchronize so that - * we cannot lose wakeup events. - */ - -void __up(struct semaphore *sem) -{ - wake_up(&sem->wait); -} - -void __sched __down(struct semaphore * sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_UNINTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * the wait_queue_head. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_UNINTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - tsk->state = TASK_RUNNING; -} - -int __sched __down_interruptible(struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_INTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * With signals pending, this turns into - * the trylock failure case - we won't be - * sleeping, and we* can't get the lock as - * it has contention. Just correct the count - * and exit. - */ - if (signal_pending(current)) { - retval = -EINTR; - sem->sleepers = 0; - atomic_add(sleepers, &sem->count); - break; - } - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * wait_queue_head. The "-1" is because we're - * still hoping to get the semaphore. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_INTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - - tsk->state = TASK_RUNNING; - return retval; -} - -/* - * Trylock failed - make sure we correct for - * having decremented the count. - * - * We could have done the trylock with a - * single "cmpxchg" without failure cases, - * but then it wouldn't work on a 386. - */ -int __down_trylock(struct semaphore * sem) -{ - int sleepers; - unsigned long flags; - - spin_lock_irqsave(&sem->wait.lock, flags); - sleepers = sem->sleepers + 1; - sem->sleepers = 0; - - /* - * Add "everybody else" and us into it. They aren't - * playing, because we own the spinlock in the - * wait_queue_head. - */ - if (!atomic_add_negative(sleepers, &sem->count)) { - wake_up_locked(&sem->wait); - } - - spin_unlock_irqrestore(&sem->wait.lock, flags); - return 1; -} - - diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index cfcebc8ab7b4..0511d8087910 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -65,7 +65,7 @@ * Machine setup.. */ -struct cpuinfo_x86 boot_cpu_data; +struct cpuinfo_x86 boot_cpu_data __read_mostly; unsigned long mmu_cr4_features; diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 34082c1cc41e..e3ffcacc8c90 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -36,7 +36,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); -unsigned long __supported_pte_mask = ~0UL; +unsigned long __supported_pte_mask __read_mostly = ~0UL; static int do_not_nx __initdata = 0; /* noexec=on|off diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index fa25e39fe54d..90aeccd15190 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -62,13 +62,13 @@ /* Number of siblings per CPU package */ int smp_num_siblings = 1; /* Package ID of each logical CPU */ -u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(phys_proc_id); EXPORT_SYMBOL(cpu_core_id); /* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); @@ -88,8 +88,8 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; /* Set when the idlers are all forked */ int smp_threads_ready; -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 66bf6ddeb0c3..7b6abe058253 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -176,10 +176,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -471,7 +468,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * off) isn't likely to go away much sooner anyway. */ - if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update && + if (ntp_synced() && xtime.tv_sec > rtc_update && abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) { set_rtc_mmss(xtime.tv_sec); rtc_update = xtime.tv_sec + 660; @@ -1041,6 +1038,7 @@ static int timer_resume(struct sys_device *dev) write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; + touch_softlockup_watchdog(); return 0; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 6ead433a3885..f238d6078a5a 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/nmi.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -419,8 +420,9 @@ void die_nmi(char *str, struct pt_regs *regs) do_exit(SIGSEGV); } -static void do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, siginfo_t *info) +static void __kprobes do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) { conditional_sti(regs); @@ -504,7 +506,8 @@ DO_ERROR(18, SIGSEGV, "reserved", reserved) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR( 8, SIGSEGV, "double fault", double_fault) -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) { conditional_sti(regs); @@ -622,7 +625,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) io_check_error(reason, regs); } -asmlinkage void do_int3(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; @@ -653,7 +656,8 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) +asmlinkage void __kprobes do_debug(struct pt_regs * regs, + unsigned long error_code) { unsigned long condition; struct task_struct *tsk = current; diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 2a94f9b60b2d..d4abb07af52d 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -21,6 +21,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index ca914c3bd49c..816732d8858c 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -23,6 +23,7 @@ #include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/compiler.h> #include <linux/module.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -294,7 +295,8 @@ int exception_trace = 1; * bit 2 == 0 means kernel, 1 means user-mode * bit 3 == 1 means fault was an instruction fetch */ -asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 6a156f5692ae..04f7a33e144c 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -22,14 +22,14 @@ #define Dprintk(x...) #endif -struct pglist_data *node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; bootmem_data_t plat_node_bdata[MAX_NUMNODES]; int memnode_shift; u8 memnodemap[NODEMAPSIZE]; -unsigned char cpu_to_node[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask[MAX_NUMNODES]; +unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; int numa_off __initdata; |