diff options
Diffstat (limited to 'arch/x86/kernel')
65 files changed, 870 insertions, 1904 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0f15af41bd80..b1b78ffe01d0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,8 +23,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n CFLAGS_irq.o := -I$(src)/../include/asm/trace obj-y := process_$(BITS).o signal.o +obj-$(CONFIG_COMPAT) += signal_compat.o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o +obj-y += time.o ioport.o dumpstack.o nmi.o +obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o @@ -69,8 +71,8 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o -obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o -obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o +obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-y += kprobes/ @@ -92,7 +94,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o -obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o +obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o @@ -107,8 +109,6 @@ obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o -obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o -obj-$(CONFIG_PMC_ATOM) += pmc_atom.o ### # 64 bit specific files diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e49ee24da85e..ded848c20e05 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -445,6 +445,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* * stash over-ride to indicate we've been here @@ -710,7 +711,7 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) #endif } -static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { int cpu; @@ -726,12 +727,6 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) *pcpu = cpu; return 0; } - -/* wrapper to silence section mismatch warning */ -int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) -{ - return _acpi_map_lsapic(handle, physid, pcpu); -} EXPORT_SYMBOL(acpi_map_cpu); int acpi_unmap_cpu(int cpu) diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index ede92c3364d3..222a57076039 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -263,7 +263,7 @@ static int apbt_clocksource_register(void) /* Verify whether apbt counter works */ t1 = dw_apb_clocksource_read(clocksource_apbt); - rdtscll(start); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -273,7 +273,7 @@ static int apbt_clocksource_register(void) */ do { rep_nop(); - rdtscll(now); + now = rdtsc(); } while ((now - start) < 200000UL); /* APBT is the only always on clocksource, it has to work! */ @@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void) old = dw_apb_clocksource_read(clocksource_apbt); old += loop; - t1 = __native_read_tsc(); + t1 = rdtsc(); do { new = dw_apb_clocksource_read(clocksource_apbt); } while (new < old); - t2 = __native_read_tsc(); + t2 = rdtsc(); shift = 5; if (unlikely(loop >> shift == 0)) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index cde732c1b495..3ca3e46aa405 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -457,45 +457,45 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; - rdtscll(tsc); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; } -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) +static int lapic_timer_shutdown(struct clock_event_device *evt) { - unsigned long flags; unsigned int v; /* Lapic used as dummy for broadcast ? */ if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; + return 0; - local_irq_save(flags); + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + apic_write(APIC_TMICT, 0); + return 0; +} - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(lapic_timer_frequency, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } +static inline int +lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) +{ + /* Lapic used as dummy for broadcast ? */ + if (evt->features & CLOCK_EVT_FEAT_DUMMY) + return 0; - local_irq_restore(flags); + __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1); + return 0; +} + +static int lapic_timer_set_periodic(struct clock_event_device *evt) +{ + return lapic_timer_set_periodic_oneshot(evt, false); +} + +static int lapic_timer_set_oneshot(struct clock_event_device *evt) +{ + return lapic_timer_set_periodic_oneshot(evt, true); } /* @@ -513,15 +513,18 @@ static void lapic_timer_broadcast(const struct cpumask *mask) * The local apic timer can be used for any function which is CPU local. */ static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP + | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_state_shutdown = lapic_timer_shutdown, + .set_state_periodic = lapic_timer_set_periodic, + .set_state_oneshot = lapic_timer_set_oneshot, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); @@ -592,7 +595,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) unsigned long pm = acpi_pm_read_early(); if (cpu_has_tsc) - rdtscll(tsc); + tsc = rdtsc(); switch (lapic_cal_loops++) { case 0: @@ -778,7 +781,7 @@ static int __init calibrate_APIC_clock(void) * Setup the apic timer manually */ levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); + lapic_timer_set_periodic(levt); lapic_cal_loops = -1; /* Let the interrupts run */ @@ -788,7 +791,8 @@ static int __init calibrate_APIC_clock(void) cpu_relax(); /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); + local_irq_disable(); + lapic_timer_shutdown(levt); /* Jiffies delta */ deltaj = lapic_cal_j2 - lapic_cal_j1; @@ -799,8 +803,8 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); else levt->features |= CLOCK_EVT_FEAT_DUMMY; - } else - local_irq_enable(); + } + local_irq_enable(); if (levt->features & CLOCK_EVT_FEAT_DUMMY) { pr_warning("APIC timer disabled due to verification failure\n"); @@ -878,7 +882,7 @@ static void local_apic_timer_interrupt(void) if (!evt->event_handler) { pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu); /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); + lapic_timer_shutdown(evt); return; } @@ -1209,7 +1213,7 @@ void setup_local_APIC(void) long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) - rdtscll(tsc); + tsc = rdtsc(); if (disable_apic) { disable_ioapic_support(); @@ -1293,7 +1297,7 @@ void setup_local_APIC(void) } if (queued) { if (cpu_has_tsc && cpu_khz) { - rdtscll(ntsc); + ntsc = rdtsc(); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else max_loops--; diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index de918c410eae..f92ab36979a2 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -191,7 +191,6 @@ static struct apic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .wait_for_init_deassert = false, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, @@ -299,7 +298,6 @@ static struct apic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .wait_for_init_deassert = false, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index b205cdbdbe6a..0d96749cfcac 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -152,7 +152,6 @@ struct apic apic_noop = { .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, - .wait_for_init_deassert = false, .inquire_remote_apic = NULL, .read = noop_apic_read, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 017149cded07..b548fd3b764b 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -92,7 +92,6 @@ static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); - atomic_set(&init_deasserted, 1); return 0; } @@ -235,7 +234,6 @@ static const struct apic apic_numachip __refconst = { .send_IPI_self = numachip_send_IPI_self, .wakeup_secondary_cpu = numachip_wakeup_secondary, - .wait_for_init_deassert = false, .inquire_remote_apic = NULL, /* REMRD not supported */ .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index c4a8d63f8220..971cf8875939 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -186,7 +186,6 @@ static struct apic apic_bigsmp = { .send_IPI_all = bigsmp_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wait_for_init_deassert = true, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 6873ab925d00..045e424fb368 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -28,146 +28,21 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) #endif #ifdef arch_trigger_all_cpu_backtrace -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -static cpumask_t printtrace_mask; - -#define NMI_BUF_SIZE 4096 - -struct nmi_seq_buf { - unsigned char buffer[NMI_BUF_SIZE]; - struct seq_buf seq; -}; - -/* Safe printing in NMI context */ -static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); - -/* "in progress" flag of arch_trigger_all_cpu_backtrace */ -static unsigned long backtrace_flag; - -static void print_seq_line(struct nmi_seq_buf *s, int start, int end) +static void nmi_raise_cpu_backtrace(cpumask_t *mask) { - const char *buf = s->buffer + start; - - printk("%.*s", (end - start) + 1, buf); + apic->send_IPI_mask(mask, NMI_VECTOR); } void arch_trigger_all_cpu_backtrace(bool include_self) { - struct nmi_seq_buf *s; - int len; - int cpu; - int i; - int this_cpu = get_cpu(); - - if (test_and_set_bit(0, &backtrace_flag)) { - /* - * If there is already a trigger_all_cpu_backtrace() in progress - * (backtrace_flag == 1), don't output double cpu dump infos. - */ - put_cpu(); - return; - } - - cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); - if (!include_self) - cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); - - cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask)); - /* - * Set up per_cpu seq_buf buffers that the NMIs running on the other - * CPUs will write to. - */ - for_each_cpu(cpu, to_cpumask(backtrace_mask)) { - s = &per_cpu(nmi_print_seq, cpu); - seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); - } - - if (!cpumask_empty(to_cpumask(backtrace_mask))) { - pr_info("sending NMI to %s CPUs:\n", - (include_self ? "all" : "other")); - apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR); - } - - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(to_cpumask(backtrace_mask))) - break; - mdelay(1); - touch_softlockup_watchdog(); - } - - /* - * Now that all the NMIs have triggered, we can dump out their - * back traces safely to the console. - */ - for_each_cpu(cpu, &printtrace_mask) { - int last_i = 0; - - s = &per_cpu(nmi_print_seq, cpu); - len = seq_buf_used(&s->seq); - if (!len) - continue; - - /* Print line by line. */ - for (i = 0; i < len; i++) { - if (s->buffer[i] == '\n') { - print_seq_line(s, last_i, i); - last_i = i + 1; - } - } - /* Check if there was a partial line. */ - if (last_i < len) { - print_seq_line(s, last_i, len - 1); - pr_cont("\n"); - } - } - - clear_bit(0, &backtrace_flag); - smp_mb__after_atomic(); - put_cpu(); -} - -/* - * It is not safe to call printk() directly from NMI handlers. - * It may be fine if the NMI detected a lock up and we have no choice - * but to do so, but doing a NMI on all other CPUs to get a back trace - * can be done with a sysrq-l. We don't want that to lock up, which - * can happen if the NMI interrupts a printk in progress. - * - * Instead, we redirect the vprintk() to this nmi_vprintk() that writes - * the content into a per cpu seq_buf buffer. Then when the NMIs are - * all done, we can safely dump the contents of the seq_buf to a printk() - * from a non NMI context. - */ -static int nmi_vprintk(const char *fmt, va_list args) -{ - struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - unsigned int len = seq_buf_used(&s->seq); - - seq_buf_vprintf(&s->seq, fmt, args); - return seq_buf_used(&s->seq) - len; + nmi_trigger_all_cpu_backtrace(include_self, nmi_raise_cpu_backtrace); } static int arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { - int cpu; - - cpu = smp_processor_id(); - - if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - printk_func_t printk_func_save = this_cpu_read(printk_func); - - /* Replace printk to write into the NMI seq */ - this_cpu_write(printk_func, nmi_vprintk); - printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); - show_regs(regs); - this_cpu_write(printk_func, printk_func_save); - - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + if (nmi_cpu_backtrace(regs)) return NMI_HANDLED; - } return NMI_DONE; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 206052e55517..38a76f826530 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2541,7 +2541,7 @@ void __init setup_ioapic_dest(void) * Honour affinities which have been set in early boot */ if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata)) - mask = idata->affinity; + mask = irq_data_get_affinity_mask(idata); else mask = apic->target_cpus(); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 1a9d735e09c6..5f1feb6854af 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -264,7 +264,7 @@ static inline int hpet_dev_id(struct irq_domain *domain) static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) { - hpet_msi_write(data->handler_data, msg); + hpet_msi_write(irq_data_get_irq_handler_data(data), msg); } static struct irq_chip hpet_msi_controller = { diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index bda488680dbc..7694ae6c1199 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -111,7 +111,6 @@ static struct apic apic_default = { .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wait_for_init_deassert = true, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 2683f36e4e0a..1bbd0fe2c806 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -169,8 +169,7 @@ next: goto next; for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { - if (per_cpu(vector_irq, new_cpu)[vector] > - VECTOR_UNDEFINED) + if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) goto next; } /* Found one! */ @@ -182,7 +181,7 @@ next: cpumask_intersects(d->old_domain, cpu_online_mask); } for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; + per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); err = 0; @@ -224,15 +223,16 @@ static int assign_irq_vector_policy(int irq, int node, static void clear_irq_vector(int irq, struct apic_chip_data *data) { - int cpu, vector; + struct irq_desc *desc; unsigned long flags; + int cpu, vector; raw_spin_lock_irqsave(&vector_lock, flags); BUG_ON(!data->cfg.vector); vector = data->cfg.vector; for_each_cpu_and(cpu, data->domain, cpu_online_mask) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; data->cfg.vector = 0; cpumask_clear(data->domain); @@ -242,12 +242,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) return; } + desc = irq_to_desc(irq); for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - if (per_cpu(vector_irq, cpu)[vector] != irq) + if (per_cpu(vector_irq, cpu)[vector] != desc) continue; - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; break; } } @@ -296,7 +297,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, struct irq_alloc_info *info = arg; struct apic_chip_data *data; struct irq_data *irq_data; - int i, err; + int i, err, node; if (disable_apic) return -ENXIO; @@ -308,12 +309,13 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, for (i = 0; i < nr_irqs; i++) { irq_data = irq_domain_get_irq_data(domain, virq + i); BUG_ON(!irq_data); + node = irq_data_get_node(irq_data); #ifdef CONFIG_X86_IO_APIC if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i]) data = legacy_irq_data[virq + i]; else #endif - data = alloc_apic_chip_data(irq_data->node); + data = alloc_apic_chip_data(node); if (!data) { err = -ENOMEM; goto error; @@ -322,8 +324,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irq_data->chip = &lapic_controller; irq_data->chip_data = data; irq_data->hwirq = virq + i; - err = assign_irq_vector_policy(virq + i, irq_data->node, data, - info); + err = assign_irq_vector_policy(virq + i, node, data, info); if (err) goto error; } @@ -403,32 +404,32 @@ int __init arch_early_irq_init(void) return arch_early_ioapic_init(); } +/* Initialize vector_irq on a new cpu */ static void __setup_vector_irq(int cpu) { - /* Initialize vector_irq on a new cpu */ - int irq, vector; struct apic_chip_data *data; + struct irq_desc *desc; + int irq, vector; /* Mark the inuse vectors */ - for_each_active_irq(irq) { - data = apic_chip_data(irq_get_irq_data(irq)); - if (!data) - continue; + for_each_irq_desc(irq, desc) { + struct irq_data *idata = irq_desc_get_irq_data(desc); - if (!cpumask_test_cpu(cpu, data->domain)) + data = apic_chip_data(idata); + if (!data || !cpumask_test_cpu(cpu, data->domain)) continue; vector = data->cfg.vector; - per_cpu(vector_irq, cpu)[vector] = irq; + per_cpu(vector_irq, cpu)[vector] = desc; } /* Mark the free vectors */ for (vector = 0; vector < NR_VECTORS; ++vector) { - irq = per_cpu(vector_irq, cpu)[vector]; - if (irq <= VECTOR_UNDEFINED) + desc = per_cpu(vector_irq, cpu)[vector]; + if (IS_ERR_OR_NULL(desc)) continue; - data = apic_chip_data(irq_get_irq_data(irq)); + data = apic_chip_data(irq_desc_get_irq_data(desc)); if (!cpumask_test_cpu(cpu, data->domain)) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; } } @@ -448,7 +449,7 @@ void setup_vector_irq(int cpu) * legacy vector to irq mapping: */ for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq; + per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq); __setup_vector_irq(cpu); } @@ -490,7 +491,8 @@ static int apic_set_affinity(struct irq_data *irq_data, if (err) { struct irq_data *top = irq_get_irq_data(irq); - if (assign_irq_vector(irq, data, top->affinity)) + if (assign_irq_vector(irq, data, + irq_data_get_affinity_mask(top))) pr_err("Failed to recover vector for irq %d\n", irq); return err; } @@ -538,27 +540,30 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) entering_ack_irq(); + /* Prevent vectors vanishing under us */ + raw_spin_lock(&vector_lock); + me = smp_processor_id(); for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - int irq; - unsigned int irr; - struct irq_desc *desc; struct apic_chip_data *data; + struct irq_desc *desc; + unsigned int irr; - irq = __this_cpu_read(vector_irq[vector]); - - if (irq <= VECTOR_UNDEFINED) + retry: + desc = __this_cpu_read(vector_irq[vector]); + if (IS_ERR_OR_NULL(desc)) continue; - desc = irq_to_desc(irq); - if (!desc) - continue; + if (!raw_spin_trylock(&desc->lock)) { + raw_spin_unlock(&vector_lock); + cpu_relax(); + raw_spin_lock(&vector_lock); + goto retry; + } - data = apic_chip_data(&desc->irq_data); + data = apic_chip_data(irq_desc_get_irq_data(desc)); if (!data) - continue; - - raw_spin_lock(&desc->lock); + goto unlock; /* * Check if the irq migration is in progress. If so, we @@ -583,11 +588,13 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); goto unlock; } - __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); unlock: raw_spin_unlock(&desc->lock); } + raw_spin_unlock(&vector_lock); + exiting_irq(); } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ab3219b3fbda..cc8311c4d298 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -182,7 +182,7 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) return notifier_from_errno(err); } -static struct notifier_block __refdata x2apic_cpu_notifier = { +static struct notifier_block x2apic_cpu_notifier = { .notifier_call = update_clusterinfo, }; @@ -272,7 +272,6 @@ static struct apic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .wait_for_init_deassert = false, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 3ffd925655e0..662e9150ea6f 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -128,7 +128,6 @@ static struct apic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .wait_for_init_deassert = false, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c8d92950bc04..4a139465f1d4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -248,7 +248,6 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) APIC_DM_STARTUP; uv_write_global_mmr64(pnode, UVH_IPI_INT, val); - atomic_set(&init_deasserted, 1); return 0; } @@ -414,7 +413,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .send_IPI_self = uv_send_IPI_self, .wakeup_secondary_cpu = uv_wakeup_secondary, - .wait_for_init_deassert = false, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 58118e207a69..145863d4d343 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -1,4 +1,4 @@ -#include <linux/module.h> +#include <linux/init.h> #include <linux/sched.h> #include <linux/kthread.h> #include <linux/workqueue.h> @@ -163,6 +163,5 @@ static int start_periodic_check_for_corruption(void) schedule_delayed_work(&bios_check_work, 0); return 0; } - -module_init(start_periodic_check_for_corruption); +device_initcall(start_periodic_check_for_corruption); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dd3a4baffe50..4a70fc6d400a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -11,6 +11,7 @@ #include <asm/cpu.h> #include <asm/smp.h> #include <asm/pci-direct.h> +#include <asm/delay.h> #ifdef CONFIG_X86_64 # include <asm/mmconfig.h> @@ -114,7 +115,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); - unsigned long d, d2; + u64 d, d2; printk(KERN_INFO "AMD K6 stepping B detected - "); @@ -125,10 +126,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c) n = K6_BUG_LOOP; f_vide = vide; - rdtscl(d); + d = rdtsc(); while (n--) f_vide(); - rdtscl(d2); + d2 = rdtsc(); d = d2-d; if (d > 20*K6_BUG_LOOP) @@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) /* A random value per boot for bit slice [12:upper_bit) */ va_align.bits = get_random_int() & va_align.mask; } + + if (cpu_has(c, X86_FEATURE_MWAITX)) + use_mwaitx_delay(); } static void early_init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb9e5df42dd2..07ce52c22ec8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,6 +13,7 @@ #include <linux/kgdb.h> #include <linux/smp.h> #include <linux/io.h> +#include <linux/syscore_ops.h> #include <asm/stackprotector.h> #include <asm/perf_event.h> @@ -1185,10 +1186,10 @@ void syscall_init(void) * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. */ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, entry_SYSCALL_64); + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION - wrmsrl(MSR_CSTAR, entry_SYSCALL_compat); + wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); /* * This only works on Intel CPUs. * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. @@ -1199,7 +1200,7 @@ void syscall_init(void) wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else - wrmsrl(MSR_CSTAR, ignore_sysret); + wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); @@ -1488,3 +1489,20 @@ inline bool __static_cpu_has_safe(u16 bit) return boot_cpu_has(bit); } EXPORT_SYMBOL_GPL(__static_cpu_has_safe); + +static void bsp_resume(void) +{ + if (this_cpu->c_bsp_resume) + this_cpu->c_bsp_resume(&boot_cpu_data); +} + +static struct syscore_ops cpu_syscore_ops = { + .resume = bsp_resume, +}; + +static int __init init_cpu_syscore(void) +{ + register_syscore_ops(&cpu_syscore_ops); + return 0; +} +core_initcall(init_cpu_syscore); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index c37dc37e8317..2584265d4745 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -13,6 +13,7 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *); void (*c_detect_tlb)(struct cpuinfo_x86 *); + void (*c_bsp_resume)(struct cpuinfo_x86 *); int c_x86_vendor; #ifdef CONFIG_X86_32 /* Optional vendor specific routine to obtain the cache size. */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 50163fa9034f..98a13db5f4be 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -371,6 +371,36 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c) } } +static void init_intel_energy_perf(struct cpuinfo_x86 *c) +{ + u64 epb; + + /* + * Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized. + * (x86_energy_perf_policy(8) is available to change it at run-time.) + */ + if (!cpu_has(c, X86_FEATURE_EPB)) + return; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE) + return; + + pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); + pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); + epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; + wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); +} + +static void intel_bsp_resume(struct cpuinfo_x86 *c) +{ + /* + * MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume, + * so reinitialize it properly like during bootup: + */ + init_intel_energy_perf(c); +} + static void init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -478,21 +508,7 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_VMX)) detect_vmx_virtcap(c); - /* - * Initialize MSR_IA32_ENERGY_PERF_BIAS if BIOS did not. - * x86_energy_perf_policy(8) is available to change it at run-time - */ - if (cpu_has(c, X86_FEATURE_EPB)) { - u64 epb; - - rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); - if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { - pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); - pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); - epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; - wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); - } - } + init_intel_energy_perf(c); } #ifdef CONFIG_X86_32 @@ -747,6 +763,7 @@ static const struct cpu_dev intel_cpu_dev = { .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, .c_init = init_intel, + .c_bsp_resume = intel_bsp_resume, .c_x86_vendor = X86_VENDOR_INTEL, }; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0f8f21c8284a..9d014b82a124 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -127,7 +127,7 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - rdtscll(m->tsc); + m->tsc = rdtsc(); /* We hope get_seconds stays lockless */ m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; @@ -974,7 +974,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) { struct mca_config *cfg = &mca_cfg; struct mce m, *final; - enum ctx_state prev_state; int i; int worst = 0; int severity; @@ -1000,7 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) int flags = MF_ACTION_REQUIRED; int lmce = 0; - prev_state = ist_enter(regs); + ist_enter(regs); this_cpu_inc(mce_exception_count); @@ -1166,7 +1165,7 @@ out: local_irq_disable(); ist_end_non_atomic(); done: - ist_exit(regs, prev_state); + ist_exit(regs); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1754,7 +1753,7 @@ static void collect_tscs(void *data) { unsigned long *cpu_tsc = (unsigned long *)data; - rdtscll(cpu_tsc[smp_processor_id()]); + cpu_tsc[smp_processor_id()] = rdtsc(); } static int mce_apei_read_done; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 737b0ad4e61a..12402e10aeff 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; u32 loaddr, hi, lotype; - prev_state = ist_enter(regs); + ist_enter(regs); rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); @@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - ist_exit(regs, prev_state); + ist_exit(regs); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 44f138296fbe..01dd8702880b 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -15,12 +15,12 @@ /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state = ist_enter(regs); + ist_enter(regs); printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - ist_exit(regs, prev_state); + ist_exit(regs); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 3c986390058a..9e3f3c7dd5d7 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -459,7 +459,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __refdata mc_cpu_notifier = { +static struct notifier_block mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index 8187b7247d1c..37ea89c11520 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c @@ -390,7 +390,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) } #ifdef DEBUG -static void __ref show_saved_mc(void) +static void show_saved_mc(void) { int i, j; unsigned int sig, pf, rev, total_size, data_size, date; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f794bfa3c138..381c8b9b3a33 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -188,6 +188,7 @@ static void __init ms_hyperv_init_platform(void) machine_ops.shutdown = hv_machine_shutdown; machine_ops.crash_shutdown = hv_machine_crash_shutdown; + mark_tsc_unstable("running on Hyper-V"); } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index e7ed0d8ebacb..f891b4750f04 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -448,7 +448,6 @@ int mtrr_add(unsigned long base, unsigned long size, unsigned int type, return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment); } -EXPORT_SYMBOL(mtrr_add); /** * mtrr_del_page - delete a memory type region @@ -537,7 +536,6 @@ int mtrr_del(int reg, unsigned long base, unsigned long size) return -EINVAL; return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); } -EXPORT_SYMBOL(mtrr_del); /** * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f56cf074d01a..66dd3fe99b82 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2179,6 +2179,7 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3; if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { +#ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; if (idx > LDT_ENTRIES) @@ -2190,6 +2191,9 @@ static unsigned long get_segment_base(unsigned int segment) return 0; desc = &ldt->entries[idx]; +#else + return 0; +#endif } else { if (idx > GDT_ENTRIES) return 0; @@ -2200,7 +2204,7 @@ static unsigned long get_segment_base(unsigned int segment) return get_desc_base(desc); } -#ifdef CONFIG_COMPAT +#ifdef CONFIG_IA32_EMULATION #include <asm/compat.h> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f41e4dc78119..3fefebfbdf4b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -12,7 +12,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/export.h> -#include <linux/watchdog.h> +#include <linux/nmi.h> #include <asm/cpufeature.h> #include <asm/hardirq.h> @@ -3630,7 +3630,10 @@ static __init int fixup_ht_bug(void) return 0; } - watchdog_nmi_disable_all(); + if (lockup_detector_suspend() != 0) { + pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n"); + return 0; + } x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); @@ -3638,7 +3641,7 @@ static __init int fixup_ht_bug(void) x86_pmu.commit_scheduling = NULL; x86_pmu.stop_scheduling = NULL; - watchdog_nmi_enable_all(); + lockup_detector_resume(); get_online_cpus(); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 83741a71558f..bd3507da39f0 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -170,7 +170,7 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb, return notifier_from_errno(err); } -static struct notifier_block __refdata cpuid_class_cpu_notifier = +static struct notifier_block cpuid_class_cpu_notifier = { .notifier_call = cpuid_class_cpu_callback, }; diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index ce95676abd60..4d38416e2a7f 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -110,7 +110,7 @@ static void init_espfix_random(void) */ if (!arch_get_random_long(&rand)) { /* The constant is an arbitrary large prime */ - rdtscll(rand); + rand = rdtsc(); rand *= 0xc345c6b72fd16123UL; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 10757d0a3fcf..88b4da373081 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -226,22 +226,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { } */ static unsigned long hpet_freq; -static void hpet_legacy_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt); -static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt); - -/* - * The hpet clock event device - */ -static struct clock_event_device hpet_clockevent = { - .name = "hpet", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = hpet_legacy_set_mode, - .set_next_event = hpet_legacy_next_event, - .irq = 0, - .rating = 50, -}; +static struct clock_event_device hpet_clockevent; static void hpet_stop_counter(void) { @@ -306,64 +291,74 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } -static void hpet_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt, int timer) +static int hpet_set_periodic(struct clock_event_device *evt, int timer) { unsigned int cfg, cmp, now; uint64_t delta; - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - hpet_stop_counter(); - delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; - delta >>= evt->shift; - now = hpet_readl(HPET_COUNTER); - cmp = now + (unsigned int) delta; - cfg = hpet_readl(HPET_Tn_CFG(timer)); - cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | - HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_Tn_CFG(timer)); - hpet_writel(cmp, HPET_Tn_CMP(timer)); - udelay(1); - /* - * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL - * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL - * bit is automatically cleared after the first write. - * (See AMD-8111 HyperTransport I/O Hub Data Sheet, - * Publication # 24674) - */ - hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer)); - hpet_start_counter(); - hpet_print_config(); - break; + hpet_stop_counter(); + delta = ((uint64_t)(NSEC_PER_SEC / HZ)) * evt->mult; + delta >>= evt->shift; + now = hpet_readl(HPET_COUNTER); + cmp = now + (unsigned int)delta; + cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | + HPET_TN_32BIT; + hpet_writel(cfg, HPET_Tn_CFG(timer)); + hpet_writel(cmp, HPET_Tn_CMP(timer)); + udelay(1); + /* + * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL + * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL + * bit is automatically cleared after the first write. + * (See AMD-8111 HyperTransport I/O Hub Data Sheet, + * Publication # 24674) + */ + hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer)); + hpet_start_counter(); + hpet_print_config(); - case CLOCK_EVT_MODE_ONESHOT: - cfg = hpet_readl(HPET_Tn_CFG(timer)); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_Tn_CFG(timer)); - break; + return 0; +} - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - cfg = hpet_readl(HPET_Tn_CFG(timer)); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_Tn_CFG(timer)); - break; +static int hpet_set_oneshot(struct clock_event_device *evt, int timer) +{ + unsigned int cfg; - case CLOCK_EVT_MODE_RESUME: - if (timer == 0) { - hpet_enable_legacy_int(); - } else { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); - enable_irq(hdev->irq); - } - hpet_print_config(); - break; + cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg &= ~HPET_TN_PERIODIC; + cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; + hpet_writel(cfg, HPET_Tn_CFG(timer)); + + return 0; +} + +static int hpet_shutdown(struct clock_event_device *evt, int timer) +{ + unsigned int cfg; + + cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg &= ~HPET_TN_ENABLE; + hpet_writel(cfg, HPET_Tn_CFG(timer)); + + return 0; +} + +static int hpet_resume(struct clock_event_device *evt, int timer) +{ + if (!timer) { + hpet_enable_legacy_int(); + } else { + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + + irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); + disable_irq(hdev->irq); + irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); + enable_irq(hdev->irq); } + hpet_print_config(); + + return 0; } static int hpet_next_event(unsigned long delta, @@ -403,10 +398,24 @@ static int hpet_next_event(unsigned long delta, return res < HPET_MIN_CYCLES ? -ETIME : 0; } -static void hpet_legacy_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +static int hpet_legacy_shutdown(struct clock_event_device *evt) +{ + return hpet_shutdown(evt, 0); +} + +static int hpet_legacy_set_oneshot(struct clock_event_device *evt) +{ + return hpet_set_oneshot(evt, 0); +} + +static int hpet_legacy_set_periodic(struct clock_event_device *evt) { - hpet_set_mode(mode, evt, 0); + return hpet_set_periodic(evt, 0); +} + +static int hpet_legacy_resume(struct clock_event_device *evt) +{ + return hpet_resume(evt, 0); } static int hpet_legacy_next_event(unsigned long delta, @@ -416,6 +425,22 @@ static int hpet_legacy_next_event(unsigned long delta, } /* + * The hpet clock event device + */ +static struct clock_event_device hpet_clockevent = { + .name = "hpet", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_state_periodic = hpet_legacy_set_periodic, + .set_state_oneshot = hpet_legacy_set_oneshot, + .set_state_shutdown = hpet_legacy_shutdown, + .tick_resume = hpet_legacy_resume, + .set_next_event = hpet_legacy_next_event, + .irq = 0, + .rating = 50, +}; + +/* * HPET MSI Support */ #ifdef CONFIG_PCI_MSI @@ -426,7 +451,7 @@ static struct irq_domain *hpet_domain; void hpet_msi_unmask(struct irq_data *data) { - struct hpet_dev *hdev = data->handler_data; + struct hpet_dev *hdev = irq_data_get_irq_handler_data(data); unsigned int cfg; /* unmask it */ @@ -437,7 +462,7 @@ void hpet_msi_unmask(struct irq_data *data) void hpet_msi_mask(struct irq_data *data) { - struct hpet_dev *hdev = data->handler_data; + struct hpet_dev *hdev = irq_data_get_irq_handler_data(data); unsigned int cfg; /* mask it */ @@ -459,11 +484,32 @@ void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg) msg->address_hi = 0; } -static void hpet_msi_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +static int hpet_msi_shutdown(struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + + return hpet_shutdown(evt, hdev->num); +} + +static int hpet_msi_set_oneshot(struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + + return hpet_set_oneshot(evt, hdev->num); +} + +static int hpet_msi_set_periodic(struct clock_event_device *evt) { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - hpet_set_mode(mode, evt, hdev->num); + + return hpet_set_periodic(evt, hdev->num); +} + +static int hpet_msi_resume(struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + + return hpet_resume(evt, hdev->num); } static int hpet_msi_next_event(unsigned long delta, @@ -523,10 +569,14 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) evt->rating = 110; evt->features = CLOCK_EVT_FEAT_ONESHOT; - if (hdev->flags & HPET_DEV_PERI_CAP) + if (hdev->flags & HPET_DEV_PERI_CAP) { evt->features |= CLOCK_EVT_FEAT_PERIODIC; + evt->set_state_periodic = hpet_msi_set_periodic; + } - evt->set_mode = hpet_msi_set_mode; + evt->set_state_shutdown = hpet_msi_shutdown; + evt->set_state_oneshot = hpet_msi_set_oneshot; + evt->tick_resume = hpet_msi_resume; evt->set_next_event = hpet_msi_next_event; evt->cpumask = cpumask_of(hdev->cpu); @@ -735,7 +785,7 @@ static int hpet_clocksource_register(void) /* Verify whether hpet counter works */ t1 = hpet_readl(HPET_COUNTER); - rdtscll(start); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -745,7 +795,7 @@ static int hpet_clocksource_register(void) */ do { rep_nop(); - rdtscll(now); + now = rdtsc(); } while ((now - start) < 200000UL); if (t1 == hpet_readl(HPET_COUNTER)) { diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index f2b96de3c7c1..efb82f07b29c 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -34,7 +34,7 @@ static int __init init_pit_clocksource(void) * - when local APIC timer is active (PIT is switched off) */ if (num_possible_cpus() > 1 || is_hpet_enabled() || - i8253_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) + !clockevent_state_periodic(&i8253_clockevent)) return 0; return clocksource_i8253_init(); diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c deleted file mode 100644 index 82f8d02f0df2..000000000000 --- a/arch/x86/kernel/iosf_mbi.c +++ /dev/null @@ -1,328 +0,0 @@ -/* - * IOSF-SB MailBox Interface Driver - * Copyright (c) 2013, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * - * The IOSF-SB is a fabric bus available on Atom based SOC's that uses a - * mailbox interface (MBI) to communicate with mutiple devices. This - * driver implements access to this interface for those platforms that can - * enumerate the device using PCI. - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/spinlock.h> -#include <linux/pci.h> -#include <linux/debugfs.h> -#include <linux/capability.h> - -#include <asm/iosf_mbi.h> - -#define PCI_DEVICE_ID_BAYTRAIL 0x0F00 -#define PCI_DEVICE_ID_BRASWELL 0x2280 -#define PCI_DEVICE_ID_QUARK_X1000 0x0958 - -static DEFINE_SPINLOCK(iosf_mbi_lock); - -static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset) -{ - return (op << 24) | (port << 16) | (offset << 8) | MBI_ENABLE; -} - -static struct pci_dev *mbi_pdev; /* one mbi device */ - -static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr) -{ - int result; - - if (!mbi_pdev) - return -ENODEV; - - if (mcrx) { - result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET, - mcrx); - if (result < 0) - goto fail_read; - } - - result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr); - if (result < 0) - goto fail_read; - - result = pci_read_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr); - if (result < 0) - goto fail_read; - - return 0; - -fail_read: - dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); - return result; -} - -static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr) -{ - int result; - - if (!mbi_pdev) - return -ENODEV; - - result = pci_write_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr); - if (result < 0) - goto fail_write; - - if (mcrx) { - result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET, - mcrx); - if (result < 0) - goto fail_write; - } - - result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr); - if (result < 0) - goto fail_write; - - return 0; - -fail_write: - dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); - return result; -} - -int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr) -{ - u32 mcr, mcrx; - unsigned long flags; - int ret; - - /*Access to the GFX unit is handled by GPU code */ - if (port == BT_MBI_UNIT_GFX) { - WARN_ON(1); - return -EPERM; - } - - mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); - mcrx = offset & MBI_MASK_HI; - - spin_lock_irqsave(&iosf_mbi_lock, flags); - ret = iosf_mbi_pci_read_mdr(mcrx, mcr, mdr); - spin_unlock_irqrestore(&iosf_mbi_lock, flags); - - return ret; -} -EXPORT_SYMBOL(iosf_mbi_read); - -int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr) -{ - u32 mcr, mcrx; - unsigned long flags; - int ret; - - /*Access to the GFX unit is handled by GPU code */ - if (port == BT_MBI_UNIT_GFX) { - WARN_ON(1); - return -EPERM; - } - - mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); - mcrx = offset & MBI_MASK_HI; - - spin_lock_irqsave(&iosf_mbi_lock, flags); - ret = iosf_mbi_pci_write_mdr(mcrx, mcr, mdr); - spin_unlock_irqrestore(&iosf_mbi_lock, flags); - - return ret; -} -EXPORT_SYMBOL(iosf_mbi_write); - -int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask) -{ - u32 mcr, mcrx; - u32 value; - unsigned long flags; - int ret; - - /*Access to the GFX unit is handled by GPU code */ - if (port == BT_MBI_UNIT_GFX) { - WARN_ON(1); - return -EPERM; - } - - mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); - mcrx = offset & MBI_MASK_HI; - - spin_lock_irqsave(&iosf_mbi_lock, flags); - - /* Read current mdr value */ - ret = iosf_mbi_pci_read_mdr(mcrx, mcr & MBI_RD_MASK, &value); - if (ret < 0) { - spin_unlock_irqrestore(&iosf_mbi_lock, flags); - return ret; - } - - /* Apply mask */ - value &= ~mask; - mdr &= mask; - value |= mdr; - - /* Write back */ - ret = iosf_mbi_pci_write_mdr(mcrx, mcr | MBI_WR_MASK, value); - - spin_unlock_irqrestore(&iosf_mbi_lock, flags); - - return ret; -} -EXPORT_SYMBOL(iosf_mbi_modify); - -bool iosf_mbi_available(void) -{ - /* Mbi isn't hot-pluggable. No remove routine is provided */ - return mbi_pdev; -} -EXPORT_SYMBOL(iosf_mbi_available); - -#ifdef CONFIG_IOSF_MBI_DEBUG -static u32 dbg_mdr; -static u32 dbg_mcr; -static u32 dbg_mcrx; - -static int mcr_get(void *data, u64 *val) -{ - *val = *(u32 *)data; - return 0; -} - -static int mcr_set(void *data, u64 val) -{ - u8 command = ((u32)val & 0xFF000000) >> 24, - port = ((u32)val & 0x00FF0000) >> 16, - offset = ((u32)val & 0x0000FF00) >> 8; - int err; - - *(u32 *)data = val; - - if (!capable(CAP_SYS_RAWIO)) - return -EACCES; - - if (command & 1u) - err = iosf_mbi_write(port, - command, - dbg_mcrx | offset, - dbg_mdr); - else - err = iosf_mbi_read(port, - command, - dbg_mcrx | offset, - &dbg_mdr); - - return err; -} -DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n"); - -static struct dentry *iosf_dbg; - -static void iosf_sideband_debug_init(void) -{ - struct dentry *d; - - iosf_dbg = debugfs_create_dir("iosf_sb", NULL); - if (IS_ERR_OR_NULL(iosf_dbg)) - return; - - /* mdr */ - d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr); - if (IS_ERR_OR_NULL(d)) - goto cleanup; - - /* mcrx */ - debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx); - if (IS_ERR_OR_NULL(d)) - goto cleanup; - - /* mcr - initiates mailbox tranaction */ - debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops); - if (IS_ERR_OR_NULL(d)) - goto cleanup; - - return; - -cleanup: - debugfs_remove_recursive(d); -} - -static void iosf_debugfs_init(void) -{ - iosf_sideband_debug_init(); -} - -static void iosf_debugfs_remove(void) -{ - debugfs_remove_recursive(iosf_dbg); -} -#else -static inline void iosf_debugfs_init(void) { } -static inline void iosf_debugfs_remove(void) { } -#endif /* CONFIG_IOSF_MBI_DEBUG */ - -static int iosf_mbi_probe(struct pci_dev *pdev, - const struct pci_device_id *unused) -{ - int ret; - - ret = pci_enable_device(pdev); - if (ret < 0) { - dev_err(&pdev->dev, "error: could not enable device\n"); - return ret; - } - - mbi_pdev = pci_dev_get(pdev); - return 0; -} - -static const struct pci_device_id iosf_mbi_pci_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BRASWELL) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) }, - { 0, }, -}; -MODULE_DEVICE_TABLE(pci, iosf_mbi_pci_ids); - -static struct pci_driver iosf_mbi_pci_driver = { - .name = "iosf_mbi_pci", - .probe = iosf_mbi_probe, - .id_table = iosf_mbi_pci_ids, -}; - -static int __init iosf_mbi_init(void) -{ - iosf_debugfs_init(); - - return pci_register_driver(&iosf_mbi_pci_driver); -} - -static void __exit iosf_mbi_exit(void) -{ - iosf_debugfs_remove(); - - pci_unregister_driver(&iosf_mbi_pci_driver); - if (mbi_pdev) { - pci_dev_put(mbi_pdev); - mbi_pdev = NULL; - } -} - -module_init(iosf_mbi_init); -module_exit(iosf_mbi_exit); - -MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>"); -MODULE_DESCRIPTION("IOSF Mailbox Interface accessor"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c7dfe1be784e..f8062aaf5df9 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -139,10 +139,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Machine check polls\n"); #endif #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) - seq_printf(p, "%*s: ", prec, "HYP"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); - seq_puts(p, " Hypervisor callback interrupts\n"); + if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) { + seq_printf(p, "%*s: ", prec, "HYP"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->irq_hv_callback_count); + seq_puts(p, " Hypervisor callback interrupts\n"); + } #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) @@ -211,24 +214,38 @@ u64 arch_irq_stat(void) __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - + struct irq_desc * desc; /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; - unsigned irq; + + /* + * NB: Unlike exception entries, IRQ entries do not reliably + * handle context tracking in the low-level entry code. This is + * because syscall entries execute briefly with IRQs on before + * updating context tracking state, so we can take an IRQ from + * kernel mode with CONTEXT_USER. The low-level entry code only + * updates the context if we came from user mode, so we won't + * switch to CONTEXT_KERNEL. We'll fix that once the syscall + * code is cleaned up enough that we can cleanly defer enabling + * IRQs. + */ entering_irq(); - irq = __this_cpu_read(vector_irq[vector]); + /* entering_irq() tells RCU that we're not quiescent. Check it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); + + desc = __this_cpu_read(vector_irq[vector]); - if (!handle_irq(irq, regs)) { + if (!handle_irq(desc, regs)) { ack_APIC_irq(); - if (irq != VECTOR_RETRIGGERED) { - pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n", + if (desc != VECTOR_RETRIGGERED) { + pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), - vector, irq); + vector); } else { - __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); } } @@ -330,10 +347,10 @@ static struct cpumask affinity_new, online_new; */ int check_irq_vectors_for_cpu_disable(void) { - int irq, cpu; unsigned int this_cpu, vector, this_count, count; struct irq_desc *desc; struct irq_data *data; + int cpu; this_cpu = smp_processor_id(); cpumask_copy(&online_new, cpu_online_mask); @@ -341,47 +358,43 @@ int check_irq_vectors_for_cpu_disable(void) this_count = 0; for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - irq = __this_cpu_read(vector_irq[vector]); - if (irq >= 0) { - desc = irq_to_desc(irq); - if (!desc) - continue; - - /* - * Protect against concurrent action removal, - * affinity changes etc. - */ - raw_spin_lock(&desc->lock); - data = irq_desc_get_irq_data(desc); - cpumask_copy(&affinity_new, data->affinity); - cpumask_clear_cpu(this_cpu, &affinity_new); - - /* Do not count inactive or per-cpu irqs. */ - if (!irq_has_action(irq) || irqd_is_per_cpu(data)) { - raw_spin_unlock(&desc->lock); - continue; - } + desc = __this_cpu_read(vector_irq[vector]); + if (IS_ERR_OR_NULL(desc)) + continue; + /* + * Protect against concurrent action removal, affinity + * changes etc. + */ + raw_spin_lock(&desc->lock); + data = irq_desc_get_irq_data(desc); + cpumask_copy(&affinity_new, + irq_data_get_affinity_mask(data)); + cpumask_clear_cpu(this_cpu, &affinity_new); + /* Do not count inactive or per-cpu irqs. */ + if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) { raw_spin_unlock(&desc->lock); - /* - * A single irq may be mapped to multiple - * cpu's vector_irq[] (for example IOAPIC cluster - * mode). In this case we have two - * possibilities: - * - * 1) the resulting affinity mask is empty; that is - * this the down'd cpu is the last cpu in the irq's - * affinity mask, or - * - * 2) the resulting affinity mask is no longer - * a subset of the online cpus but the affinity - * mask is not zero; that is the down'd cpu is the - * last online cpu in a user set affinity mask. - */ - if (cpumask_empty(&affinity_new) || - !cpumask_subset(&affinity_new, &online_new)) - this_count++; + continue; } + + raw_spin_unlock(&desc->lock); + /* + * A single irq may be mapped to multiple cpu's + * vector_irq[] (for example IOAPIC cluster mode). In + * this case we have two possibilities: + * + * 1) the resulting affinity mask is empty; that is + * this the down'd cpu is the last cpu in the irq's + * affinity mask, or + * + * 2) the resulting affinity mask is no longer a + * subset of the online cpus but the affinity mask is + * not zero; that is the down'd cpu is the last online + * cpu in a user set affinity mask. + */ + if (cpumask_empty(&affinity_new) || + !cpumask_subset(&affinity_new, &online_new)) + this_count++; } count = 0; @@ -400,8 +413,8 @@ int check_irq_vectors_for_cpu_disable(void) for (vector = FIRST_EXTERNAL_VECTOR; vector < first_system_vector; vector++) { if (!test_bit(vector, used_vectors) && - per_cpu(vector_irq, cpu)[vector] < 0) - count++; + IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) + count++; } } @@ -437,7 +450,7 @@ void fixup_irqs(void) raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); - affinity = data->affinity; + affinity = irq_data_get_affinity_mask(data); if (!irq_has_action(irq) || irqd_is_per_cpu(data) || cpumask_subset(affinity, cpu_online_mask)) { raw_spin_unlock(&desc->lock); @@ -505,14 +518,13 @@ void fixup_irqs(void) for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { unsigned int irr; - if (__this_cpu_read(vector_irq[vector]) <= VECTOR_UNDEFINED) + if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) continue; irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); if (irr & (1 << (vector % 32))) { - irq = __this_cpu_read(vector_irq[vector]); + desc = __this_cpu_read(vector_irq[vector]); - desc = irq_to_desc(irq); raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); chip = irq_data_get_irq_chip(data); @@ -523,7 +535,7 @@ void fixup_irqs(void) raw_spin_unlock(&desc->lock); } if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) - __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); } } #endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index cd74f5978ab9..c80cf6699678 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -148,21 +148,21 @@ void do_softirq_own_stack(void) call_on_stack(__do_softirq, isp); } -bool handle_irq(unsigned irq, struct pt_regs *regs) +bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { - struct irq_desc *desc; + unsigned int irq; int overflow; overflow = check_stack_overflow(); - desc = irq_to_desc(irq); - if (unlikely(!desc)) + if (IS_ERR_OR_NULL(desc)) return false; + irq = irq_desc_get_irq(desc); if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) { if (unlikely(overflow)) print_stack_overflow(); - desc->handle_irq(irq, desc); + generic_handle_irq_desc(irq, desc); } return true; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index bc4604e500a3..ff16ccb918f2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -68,16 +68,13 @@ static inline void stack_overflow_check(struct pt_regs *regs) #endif } -bool handle_irq(unsigned irq, struct pt_regs *regs) +bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { - struct irq_desc *desc; - stack_overflow_check(regs); - desc = irq_to_desc(irq); - if (unlikely(!desc)) + if (unlikely(IS_ERR_OR_NULL(desc))) return false; - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(irq_desc_get_irq(desc), desc); return true; } diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index a3a5e158ed69..1423ab1b0312 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -52,7 +52,7 @@ static struct irqaction irq2 = { }; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED, + [0 ... NR_VECTORS - 1] = VECTOR_UNUSED, }; int vector_used_by_percpu_irq(unsigned int vector) @@ -60,7 +60,7 @@ int vector_used_by_percpu_irq(unsigned int vector) int cpu; for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] > VECTOR_UNDEFINED) + if (!IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) return 1; } @@ -94,7 +94,7 @@ void __init init_IRQ(void) * irq's migrate etc. */ for (i = 0; i < nr_legacy_irqs(); i++) - per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i; + per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i); x86_init.irqs.intr_init(); } diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 26d5a55a2736..e565e0e4d216 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -45,7 +45,7 @@ static void __jump_label_transform(struct jump_entry *entry, const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { if (init) { /* * Jump label is enabled for the first time. diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index ca83f7ac388b..0f8a6bbaaa44 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -223,9 +223,6 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, memset(¶ms->hd0_info, 0, sizeof(params->hd0_info)); memset(¶ms->hd1_info, 0, sizeof(params->hd1_info)); - /* Default sysdesc table */ - params->sys_desc_table.length = 0; - if (image->type == KEXEC_TYPE_CRASH) { ret = crash_setup_memmap_entries(image, params); if (ret) @@ -536,7 +533,9 @@ static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) int ret; ret = verify_pefile_signature(kernel, kernel_len, - system_trusted_keyring, &trusted); + system_trusted_keyring, + VERIFYING_KEXEC_PE_SIGNATURE, + &trusted); if (ret < 0) return ret; if (!trusted) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 49487b488061..2c7aafa70702 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -200,7 +200,7 @@ static void kvm_setup_secondary_clock(void) * kind of shutdown from our side, we unregister the clock by writting anything * that does not have the 'enable' bit set in the msr */ -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void kvm_crash_shutdown(struct pt_regs *regs) { native_write_msr(msr_kvm_system_time, 0, 0); @@ -259,7 +259,7 @@ void __init kvmclock_init(void) x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; machine_ops.shutdown = kvm_shutdown; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE machine_ops.crash_shutdown = kvm_crash_shutdown; #endif kvm_get_preset_lpj(); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index d05bd2e2ee91..697f90db0e37 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w) a->handler, whole_msecs, decimal_msecs); } -static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) +static int nmi_handle(unsigned int type, struct pt_regs *regs) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; @@ -213,7 +213,7 @@ static void pci_serr_error(unsigned char reason, struct pt_regs *regs) { /* check to see if anyone registered against these types of errors */ - if (nmi_handle(NMI_SERR, regs, false)) + if (nmi_handle(NMI_SERR, regs)) return; pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", @@ -247,7 +247,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) unsigned long i; /* check to see if anyone registered against these types of errors */ - if (nmi_handle(NMI_IO_CHECK, regs, false)) + if (nmi_handle(NMI_IO_CHECK, regs)) return; pr_emerg( @@ -284,7 +284,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) * as only the first one is ever run (unless it can actually determine * if it caused the NMI) */ - handled = nmi_handle(NMI_UNKNOWN, regs, false); + handled = nmi_handle(NMI_UNKNOWN, regs); if (handled) { __this_cpu_add(nmi_stats.unknown, handled); return; @@ -332,7 +332,7 @@ static void default_do_nmi(struct pt_regs *regs) __this_cpu_write(last_nmi_rip, regs->ip); - handled = nmi_handle(NMI_LOCAL, regs, b2b); + handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); if (handled) { /* diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 58bcfb67c01f..f68e48f5f6c2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -351,9 +351,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, .write_msr = native_write_msr_safe, - .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .read_tscp = native_read_tscp, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index e1b013696dde..c89f50a76e97 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); -DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); @@ -52,7 +51,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_cpu_ops, clts); - PATCH_SITE(pv_cpu_ops, read_tsc); #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): if (pv_is_native_spin_unlock()) { diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 353972c1946c..84b8ef82a159 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -58,17 +58,6 @@ EXPORT_SYMBOL(x86_dma_fallback_dev); /* Number of entries preallocated for DMA-API debugging */ #define PREALLOC_DMA_DEBUG_ENTRIES 65536 -int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - - *dev->dma_mask = mask; - - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - void __init pci_iommu_alloc(void) { struct iommu_table_entry *p; @@ -140,50 +129,19 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } -void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, struct dma_attrs *attrs) +bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { - struct dma_map_ops *ops = get_dma_ops(dev); - void *memory; - - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - - if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) - return memory; - - if (!dev) - dev = &x86_dma_fallback_dev; - - if (!is_device_dma_capable(dev)) - return NULL; - - if (!ops->alloc) - return NULL; - - memory = ops->alloc(dev, size, dma_handle, - dma_alloc_coherent_gfp_flags(dev, gfp), attrs); - debug_dma_alloc_coherent(dev, size, *dma_handle, memory); - - return memory; -} -EXPORT_SYMBOL(dma_alloc_attrs); - -void dma_free_attrs(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - WARN_ON(irqs_disabled()); /* for portability */ + *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); + *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - if (dma_release_from_coherent(dev, get_order(size), vaddr)) - return; + if (!*dev) + *dev = &x86_dma_fallback_dev; + if (!is_device_dma_capable(*dev)) + return false; + return true; - debug_dma_free_coherent(dev, size, vaddr, bus); - if (ops->free) - ops->free(dev, size, vaddr, bus, attrs); } -EXPORT_SYMBOL(dma_free_attrs); +EXPORT_SYMBOL(arch_dma_alloc_attrs); /* * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c deleted file mode 100644 index d66a4fe6caee..000000000000 --- a/arch/x86/kernel/pmc_atom.c +++ /dev/null @@ -1,371 +0,0 @@ -/* - * Intel Atom SOC Power Management Controller Driver - * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <linux/device.h> -#include <linux/debugfs.h> -#include <linux/seq_file.h> -#include <linux/io.h> - -#include <asm/pmc_atom.h> - -struct pmc_dev { - u32 base_addr; - void __iomem *regmap; -#ifdef CONFIG_DEBUG_FS - struct dentry *dbgfs_dir; -#endif /* CONFIG_DEBUG_FS */ -}; - -static struct pmc_dev pmc_device; -static u32 acpi_base_addr; - -struct pmc_bit_map { - const char *name; - u32 bit_mask; -}; - -static const struct pmc_bit_map dev_map[] = { - {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, - {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, - {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, - {"3 - LPSS1_F3_HSUART1", BIT_LPSS1_F3_HSUART1}, - {"4 - LPSS1_F4_HSUART2", BIT_LPSS1_F4_HSUART2}, - {"5 - LPSS1_F5_SPI", BIT_LPSS1_F5_SPI}, - {"6 - LPSS1_F6_Reserved", BIT_LPSS1_F6_XXX}, - {"7 - LPSS1_F7_Reserved", BIT_LPSS1_F7_XXX}, - {"8 - SCC_EMMC", BIT_SCC_EMMC}, - {"9 - SCC_SDIO", BIT_SCC_SDIO}, - {"10 - SCC_SDCARD", BIT_SCC_SDCARD}, - {"11 - SCC_MIPI", BIT_SCC_MIPI}, - {"12 - HDA", BIT_HDA}, - {"13 - LPE", BIT_LPE}, - {"14 - OTG", BIT_OTG}, - {"15 - USH", BIT_USH}, - {"16 - GBE", BIT_GBE}, - {"17 - SATA", BIT_SATA}, - {"18 - USB_EHCI", BIT_USB_EHCI}, - {"19 - SEC", BIT_SEC}, - {"20 - PCIE_PORT0", BIT_PCIE_PORT0}, - {"21 - PCIE_PORT1", BIT_PCIE_PORT1}, - {"22 - PCIE_PORT2", BIT_PCIE_PORT2}, - {"23 - PCIE_PORT3", BIT_PCIE_PORT3}, - {"24 - LPSS2_F0_DMA", BIT_LPSS2_F0_DMA}, - {"25 - LPSS2_F1_I2C1", BIT_LPSS2_F1_I2C1}, - {"26 - LPSS2_F2_I2C2", BIT_LPSS2_F2_I2C2}, - {"27 - LPSS2_F3_I2C3", BIT_LPSS2_F3_I2C3}, - {"28 - LPSS2_F3_I2C4", BIT_LPSS2_F4_I2C4}, - {"29 - LPSS2_F5_I2C5", BIT_LPSS2_F5_I2C5}, - {"30 - LPSS2_F6_I2C6", BIT_LPSS2_F6_I2C6}, - {"31 - LPSS2_F7_I2C7", BIT_LPSS2_F7_I2C7}, - {"32 - SMB", BIT_SMB}, - {"33 - OTG_SS_PHY", BIT_OTG_SS_PHY}, - {"34 - USH_SS_PHY", BIT_USH_SS_PHY}, - {"35 - DFX", BIT_DFX}, -}; - -static const struct pmc_bit_map pss_map[] = { - {"0 - GBE", PMC_PSS_BIT_GBE}, - {"1 - SATA", PMC_PSS_BIT_SATA}, - {"2 - HDA", PMC_PSS_BIT_HDA}, - {"3 - SEC", PMC_PSS_BIT_SEC}, - {"4 - PCIE", PMC_PSS_BIT_PCIE}, - {"5 - LPSS", PMC_PSS_BIT_LPSS}, - {"6 - LPE", PMC_PSS_BIT_LPE}, - {"7 - DFX", PMC_PSS_BIT_DFX}, - {"8 - USH_CTRL", PMC_PSS_BIT_USH_CTRL}, - {"9 - USH_SUS", PMC_PSS_BIT_USH_SUS}, - {"10 - USH_VCCS", PMC_PSS_BIT_USH_VCCS}, - {"11 - USH_VCCA", PMC_PSS_BIT_USH_VCCA}, - {"12 - OTG_CTRL", PMC_PSS_BIT_OTG_CTRL}, - {"13 - OTG_VCCS", PMC_PSS_BIT_OTG_VCCS}, - {"14 - OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK}, - {"15 - OTG_VCCA", PMC_PSS_BIT_OTG_VCCA}, - {"16 - USB", PMC_PSS_BIT_USB}, - {"17 - USB_SUS", PMC_PSS_BIT_USB_SUS}, -}; - -static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) -{ - return readl(pmc->regmap + reg_offset); -} - -static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val) -{ - writel(val, pmc->regmap + reg_offset); -} - -static void pmc_power_off(void) -{ - u16 pm1_cnt_port; - u32 pm1_cnt_value; - - pr_info("Preparing to enter system sleep state S5\n"); - - pm1_cnt_port = acpi_base_addr + PM1_CNT; - - pm1_cnt_value = inl(pm1_cnt_port); - pm1_cnt_value &= SLEEP_TYPE_MASK; - pm1_cnt_value |= SLEEP_TYPE_S5; - pm1_cnt_value |= SLEEP_ENABLE; - - outl(pm1_cnt_value, pm1_cnt_port); -} - -static void pmc_hw_reg_setup(struct pmc_dev *pmc) -{ - /* - * Disable PMC S0IX_WAKE_EN events coming from: - * - LPC clock run - * - GPIO_SUS ored dedicated IRQs - * - GPIO_SCORE ored dedicated IRQs - * - GPIO_SUS shared IRQ - * - GPIO_SCORE shared IRQ - */ - pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING); -} - -#ifdef CONFIG_DEBUG_FS -static int pmc_dev_state_show(struct seq_file *s, void *unused) -{ - struct pmc_dev *pmc = s->private; - u32 func_dis, func_dis_2, func_dis_index; - u32 d3_sts_0, d3_sts_1, d3_sts_index; - int dev_num, dev_index, reg_index; - - func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS); - func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2); - d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0); - d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1); - - dev_num = ARRAY_SIZE(dev_map); - - for (dev_index = 0; dev_index < dev_num; dev_index++) { - reg_index = dev_index / PMC_REG_BIT_WIDTH; - if (reg_index) { - func_dis_index = func_dis_2; - d3_sts_index = d3_sts_1; - } else { - func_dis_index = func_dis; - d3_sts_index = d3_sts_0; - } - - seq_printf(s, "Dev: %-32s\tState: %s [%s]\n", - dev_map[dev_index].name, - dev_map[dev_index].bit_mask & func_dis_index ? - "Disabled" : "Enabled ", - dev_map[dev_index].bit_mask & d3_sts_index ? - "D3" : "D0"); - } - return 0; -} - -static int pmc_dev_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmc_dev_state_show, inode->i_private); -} - -static const struct file_operations pmc_dev_state_ops = { - .open = pmc_dev_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int pmc_pss_state_show(struct seq_file *s, void *unused) -{ - struct pmc_dev *pmc = s->private; - u32 pss = pmc_reg_read(pmc, PMC_PSS); - int pss_index; - - for (pss_index = 0; pss_index < ARRAY_SIZE(pss_map); pss_index++) { - seq_printf(s, "Island: %-32s\tState: %s\n", - pss_map[pss_index].name, - pss_map[pss_index].bit_mask & pss ? "Off" : "On"); - } - return 0; -} - -static int pmc_pss_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmc_pss_state_show, inode->i_private); -} - -static const struct file_operations pmc_pss_state_ops = { - .open = pmc_pss_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) -{ - struct pmc_dev *pmc = s->private; - u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr; - - s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT; - s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT; - s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT; - s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT; - s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT; - - seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr); - seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr); - seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr); - seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr); - seq_printf(s, "S0 Residency:\t%lldus\n", s0_tmr); - return 0; -} - -static int pmc_sleep_tmr_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmc_sleep_tmr_show, inode->i_private); -} - -static const struct file_operations pmc_sleep_tmr_ops = { - .open = pmc_sleep_tmr_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static void pmc_dbgfs_unregister(struct pmc_dev *pmc) -{ - debugfs_remove_recursive(pmc->dbgfs_dir); -} - -static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) -{ - struct dentry *dir, *f; - - dir = debugfs_create_dir("pmc_atom", NULL); - if (!dir) - return -ENOMEM; - - pmc->dbgfs_dir = dir; - - f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, - dir, pmc, &pmc_dev_state_ops); - if (!f) { - dev_err(&pdev->dev, "dev_state register failed\n"); - goto err; - } - - f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO, - dir, pmc, &pmc_pss_state_ops); - if (!f) { - dev_err(&pdev->dev, "pss_state register failed\n"); - goto err; - } - - f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, - dir, pmc, &pmc_sleep_tmr_ops); - if (!f) { - dev_err(&pdev->dev, "sleep_state register failed\n"); - goto err; - } - - return 0; -err: - pmc_dbgfs_unregister(pmc); - return -ENODEV; -} -#else -static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) -{ - return 0; -} -#endif /* CONFIG_DEBUG_FS */ - -static int pmc_setup_dev(struct pci_dev *pdev) -{ - struct pmc_dev *pmc = &pmc_device; - int ret; - - /* Obtain ACPI base address */ - pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr); - acpi_base_addr &= ACPI_BASE_ADDR_MASK; - - /* Install power off function */ - if (acpi_base_addr != 0 && pm_power_off == NULL) - pm_power_off = pmc_power_off; - - pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr); - pmc->base_addr &= PMC_BASE_ADDR_MASK; - - pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN); - if (!pmc->regmap) { - dev_err(&pdev->dev, "error: ioremap failed\n"); - return -ENOMEM; - } - - /* PMC hardware registers setup */ - pmc_hw_reg_setup(pmc); - - ret = pmc_dbgfs_register(pmc, pdev); - if (ret) { - iounmap(pmc->regmap); - } - - return ret; -} - -/* - * Data for PCI driver interface - * - * This data only exists for exporting the supported - * PCI ids via MODULE_DEVICE_TABLE. We do not actually - * register a pci_driver, because lpc_ich will register - * a driver on the same PCI id. - */ -static const struct pci_device_id pmc_pci_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_VLV_PMC) }, - { 0, }, -}; - -MODULE_DEVICE_TABLE(pci, pmc_pci_ids); - -static int __init pmc_atom_init(void) -{ - struct pci_dev *pdev = NULL; - const struct pci_device_id *ent; - - /* We look for our device - PCU PMC - * we assume that there is max. one device. - * - * We can't use plain pci_driver mechanism, - * as the device is really a multiple function device, - * main driver that binds to the pci_device is lpc_ich - * and have to find & bind to the device this way. - */ - for_each_pci_dev(pdev) { - ent = pci_match_id(pmc_pci_ids, pdev); - if (ent) - return pmc_setup_dev(pdev); - } - /* Device not found. */ - return -ENODEV; -} - -module_init(pmc_atom_init); -/* no module_exit, this driver shouldn't be unloaded */ - -MODULE_AUTHOR("Aubrey Li <aubrey.li@linux.intel.com>"); -MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 64f90f53bb85..4f00b63d7ff3 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -3,80 +3,17 @@ * Copyright (c) 2015, Intel Corporation. */ #include <linux/platform_device.h> -#include <linux/libnvdimm.h> #include <linux/module.h> -#include <asm/e820.h> - -static void e820_pmem_release(struct device *dev) -{ - struct nvdimm_bus *nvdimm_bus = dev->platform_data; - - if (nvdimm_bus) - nvdimm_bus_unregister(nvdimm_bus); -} - -static struct platform_device e820_pmem = { - .name = "e820_pmem", - .id = -1, - .dev = { - .release = e820_pmem_release, - }, -}; - -static const struct attribute_group *e820_pmem_attribute_groups[] = { - &nvdimm_bus_attribute_group, - NULL, -}; - -static const struct attribute_group *e820_pmem_region_attribute_groups[] = { - &nd_region_attribute_group, - &nd_device_attribute_group, - NULL, -}; static __init int register_e820_pmem(void) { - static struct nvdimm_bus_descriptor nd_desc; - struct device *dev = &e820_pmem.dev; - struct nvdimm_bus *nvdimm_bus; - int rc, i; - - rc = platform_device_register(&e820_pmem); - if (rc) - return rc; - - nd_desc.attr_groups = e820_pmem_attribute_groups; - nd_desc.provider_name = "e820"; - nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); - if (!nvdimm_bus) - goto err; - dev->platform_data = nvdimm_bus; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - struct resource res = { - .flags = IORESOURCE_MEM, - .start = ei->addr, - .end = ei->addr + ei->size - 1, - }; - struct nd_region_desc ndr_desc; - - if (ei->type != E820_PRAM) - continue; - - memset(&ndr_desc, 0, sizeof(ndr_desc)); - ndr_desc.res = &res; - ndr_desc.attr_groups = e820_pmem_region_attribute_groups; - ndr_desc.numa_node = NUMA_NO_NODE; - if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) - goto err; - } - - return 0; - - err: - dev_err(dev, "failed to register legacy persistent memory ranges\n"); - platform_device_unregister(&e820_pmem); - return -ENXIO; + struct platform_device *pdev; + + /* + * See drivers/nvdimm/e820.c for the implementation, this is + * simply here to trigger the module to load on demand. + */ + pdev = platform_device_alloc("e820_pmem", -1); + return platform_device_add(pdev); } device_initcall(register_e820_pmem); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d83740ab85b0..6d0e62ae8516 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -30,6 +30,7 @@ #include <asm/nmi.h> #include <asm/tlbflush.h> #include <asm/mce.h> +#include <asm/vm86.h> /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -111,6 +112,8 @@ void exit_thread(void) kfree(bp); } + free_vm86(t); + fpu__drop(fpu); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f73c962fe636..c13df2c735f8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -53,6 +53,7 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> #include <asm/switch_to.h> +#include <asm/vm86.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f6b916387590..3c1bbcf12924 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -121,6 +121,7 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { +#ifdef CONFIG_MODIFY_LDT_SYSCALL if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, @@ -128,6 +129,7 @@ void release_thread(struct task_struct *dead_task) dead_task->mm->context.ldt->size); BUG(); } +#endif } } @@ -248,8 +250,8 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) __USER_CS, __USER_DS, 0); } -#ifdef CONFIG_IA32_EMULATION -void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) +#ifdef CONFIG_COMPAT +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) { start_thread_common(regs, new_ip, new_sp, test_thread_flag(TIF_X32) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 9be72bc3613f..558f50edebca 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -37,12 +37,10 @@ #include <asm/proto.h> #include <asm/hw_breakpoint.h> #include <asm/traps.h> +#include <asm/syscall.h> #include "tls.h" -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - enum x86_regset { REGSET_GENERAL, REGSET_FP, @@ -1123,6 +1121,73 @@ static int genregs32_set(struct task_struct *target, return ret; } +static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + void __user *datap = compat_ptr(data); + int ret; + __u32 val; + + switch (request) { + case PTRACE_PEEKUSR: + ret = getreg32(child, addr, &val); + if (ret == 0) + ret = put_user(val, (__u32 __user *)datap); + break; + + case PTRACE_POKEUSR: + ret = putreg32(child, addr, data); + break; + + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct32), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_GENERAL, 0, + sizeof(struct user_regs_struct32), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_FP, 0, + sizeof(struct user_i387_ia32_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user( + child, &user_x86_32_view, REGSET_FP, + 0, sizeof(struct user_i387_ia32_struct), datap); + + case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_XFP, 0, + sizeof(struct user32_fxsr_struct), + datap); + + case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_XFP, 0, + sizeof(struct user32_fxsr_struct), + datap); + + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return arch_ptrace(child, request, addr, data); + + default: + return compat_ptrace_request(child, request, addr, data); + } + + return ret; +} +#endif /* CONFIG_IA32_EMULATION */ + #ifdef CONFIG_X86_X32_ABI static long x32_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, @@ -1211,78 +1276,21 @@ static long x32_arch_ptrace(struct task_struct *child, } #endif +#ifdef CONFIG_COMPAT long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { - unsigned long addr = caddr; - unsigned long data = cdata; - void __user *datap = compat_ptr(data); - int ret; - __u32 val; - #ifdef CONFIG_X86_X32_ABI if (!is_ia32_task()) return x32_arch_ptrace(child, request, caddr, cdata); #endif - - switch (request) { - case PTRACE_PEEKUSR: - ret = getreg32(child, addr, &val); - if (ret == 0) - ret = put_user(val, (__u32 __user *)datap); - break; - - case PTRACE_POKEUSR: - ret = putreg32(child, addr, data); - break; - - case PTRACE_GETREGS: /* Get all gp regs from the child. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_GENERAL, - 0, sizeof(struct user_regs_struct32), - datap); - - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_GENERAL, 0, - sizeof(struct user_regs_struct32), - datap); - - case PTRACE_GETFPREGS: /* Get the child FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_FP, 0, - sizeof(struct user_i387_ia32_struct), - datap); - - case PTRACE_SETFPREGS: /* Set the child FPU state. */ - return copy_regset_from_user( - child, &user_x86_32_view, REGSET_FP, - 0, sizeof(struct user_i387_ia32_struct), datap); - - case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_XFP, 0, - sizeof(struct user32_fxsr_struct), - datap); - - case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_XFP, 0, - sizeof(struct user32_fxsr_struct), - datap); - - case PTRACE_GET_THREAD_AREA: - case PTRACE_SET_THREAD_AREA: - return arch_ptrace(child, request, addr, data); - - default: - return compat_ptrace_request(child, request, addr, data); - } - - return ret; +#ifdef CONFIG_IA32_EMULATION + return ia32_arch_ptrace(child, request, caddr, cdata); +#else + return 0; +#endif } - -#endif /* CONFIG_IA32_EMULATION */ +#endif /* CONFIG_COMPAT */ #ifdef CONFIG_X86_64 @@ -1434,201 +1442,3 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, /* Send us the fake SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); } - -static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) -{ -#ifdef CONFIG_X86_64 - if (arch == AUDIT_ARCH_X86_64) { - audit_syscall_entry(regs->orig_ax, regs->di, - regs->si, regs->dx, regs->r10); - } else -#endif - { - audit_syscall_entry(regs->orig_ax, regs->bx, - regs->cx, regs->dx, regs->si); - } -} - -/* - * We can return 0 to resume the syscall or anything else to go to phase - * 2. If we resume the syscall, we need to put something appropriate in - * regs->orig_ax. - * - * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax - * are fully functional. - * - * For phase 2's benefit, our return value is: - * 0: resume the syscall - * 1: go to phase 2; no seccomp phase 2 needed - * anything else: go to phase 2; pass return value to seccomp - */ -unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) -{ - unsigned long ret = 0; - u32 work; - - BUG_ON(regs != task_pt_regs(current)); - - work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; - - /* - * If TIF_NOHZ is set, we are required to call user_exit() before - * doing anything that could touch RCU. - */ - if (work & _TIF_NOHZ) { - user_exit(); - work &= ~_TIF_NOHZ; - } - -#ifdef CONFIG_SECCOMP - /* - * Do seccomp first -- it should minimize exposure of other - * code, and keeping seccomp fast is probably more valuable - * than the rest of this. - */ - if (work & _TIF_SECCOMP) { - struct seccomp_data sd; - - sd.arch = arch; - sd.nr = regs->orig_ax; - sd.instruction_pointer = regs->ip; -#ifdef CONFIG_X86_64 - if (arch == AUDIT_ARCH_X86_64) { - sd.args[0] = regs->di; - sd.args[1] = regs->si; - sd.args[2] = regs->dx; - sd.args[3] = regs->r10; - sd.args[4] = regs->r8; - sd.args[5] = regs->r9; - } else -#endif - { - sd.args[0] = regs->bx; - sd.args[1] = regs->cx; - sd.args[2] = regs->dx; - sd.args[3] = regs->si; - sd.args[4] = regs->di; - sd.args[5] = regs->bp; - } - - BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); - BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); - - ret = seccomp_phase1(&sd); - if (ret == SECCOMP_PHASE1_SKIP) { - regs->orig_ax = -1; - ret = 0; - } else if (ret != SECCOMP_PHASE1_OK) { - return ret; /* Go directly to phase 2 */ - } - - work &= ~_TIF_SECCOMP; - } -#endif - - /* Do our best to finish without phase 2. */ - if (work == 0) - return ret; /* seccomp and/or nohz only (ret == 0 here) */ - -#ifdef CONFIG_AUDITSYSCALL - if (work == _TIF_SYSCALL_AUDIT) { - /* - * If there is no more work to be done except auditing, - * then audit in phase 1. Phase 2 always audits, so, if - * we audit here, then we can't go on to phase 2. - */ - do_audit_syscall_entry(regs, arch); - return 0; - } -#endif - - return 1; /* Something is enabled that we can't handle in phase 1 */ -} - -/* Returns the syscall nr to run (which should match regs->orig_ax). */ -long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, - unsigned long phase1_result) -{ - long ret = 0; - u32 work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; - - BUG_ON(regs != task_pt_regs(current)); - - /* - * If we stepped into a sysenter/syscall insn, it trapped in - * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. - * If user-mode had set TF itself, then it's still clear from - * do_debug() and we need to set it again to restore the user - * state. If we entered on the slow path, TF was already set. - */ - if (work & _TIF_SINGLESTEP) - regs->flags |= X86_EFLAGS_TF; - -#ifdef CONFIG_SECCOMP - /* - * Call seccomp_phase2 before running the other hooks so that - * they can see any changes made by a seccomp tracer. - */ - if (phase1_result > 1 && seccomp_phase2(phase1_result)) { - /* seccomp failures shouldn't expose any additional code. */ - return -1; - } -#endif - - if (unlikely(work & _TIF_SYSCALL_EMU)) - ret = -1L; - - if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - ret = -1L; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->orig_ax); - - do_audit_syscall_entry(regs, arch); - - return ret ?: regs->orig_ax; -} - -long syscall_trace_enter(struct pt_regs *regs) -{ - u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; - unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); - - if (phase1_result == 0) - return regs->orig_ax; - else - return syscall_trace_enter_phase2(regs, arch, phase1_result); -} - -void syscall_trace_leave(struct pt_regs *regs) -{ - bool step; - - /* - * We may come here right after calling schedule_user() - * or do_notify_resume(), in which case we can be in RCU - * user mode. - */ - user_exit(); - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->ax); - - /* - * If TIF_SYSCALL_EMU is set, we only get here because of - * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). - * We already reported this syscall instruction in - * syscall_trace_enter(). - */ - step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && - !test_thread_flag(TIF_SYSCALL_EMU); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); - - user_enter(); -} diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 86db4bcd7ce5..02693dd9a079 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -673,7 +673,7 @@ struct machine_ops machine_ops = { .emergency_restart = native_machine_emergency_restart, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -703,7 +703,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 80f874bf999e..fdb7f2a2d328 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -317,15 +317,12 @@ static u64 __init get_ramdisk_size(void) return ramdisk_size; } -#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) static void __init relocate_initrd(void) { /* Assume only end is not page aligned */ u64 ramdisk_image = get_ramdisk_image(); u64 ramdisk_size = get_ramdisk_size(); u64 area_size = PAGE_ALIGN(ramdisk_size); - unsigned long slop, clen, mapaddr; - char *p, *q; /* We need to move the initrd down into directly mapped mem */ relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), @@ -343,25 +340,8 @@ static void __init relocate_initrd(void) printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1); - q = (char *)initrd_start; - - /* Copy the initrd */ - while (ramdisk_size) { - slop = ramdisk_image & ~PAGE_MASK; - clen = ramdisk_size; - if (clen > MAX_MAP_CHUNK-slop) - clen = MAX_MAP_CHUNK-slop; - mapaddr = ramdisk_image & PAGE_MASK; - p = early_memremap(mapaddr, clen+slop); - memcpy(q, p+slop, clen); - early_memunmap(p, clen+slop); - q += clen; - ramdisk_image += clen; - ramdisk_size -= clen; - } + copy_from_early_mem((void *)initrd_start, ramdisk_image, ramdisk_size); - ramdisk_image = get_ramdisk_image(); - ramdisk_size = get_ramdisk_size(); printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" " [mem %#010llx-%#010llx]\n", ramdisk_image, ramdisk_image + ramdisk_size - 1, @@ -498,7 +478,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) * --------- Crashkernel reservation ------------------------------ */ -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* * Keep the crash kernel below this limit. On 32 bits earlier kernels @@ -916,11 +896,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 apm_info.bios = boot_params.apm_bios_info; ist_info = boot_params.ist_info; - if (boot_params.sys_desc_table.length != 0) { - machine_id = boot_params.sys_desc_table.table[0]; - machine_submodel_id = boot_params.sys_desc_table.table[1]; - BIOS_revision = boot_params.sys_desc_table.table[2]; - } #endif saved_video_mode = boot_params.hdr.vid_mode; bootloader_type = boot_params.hdr.type_of_loader; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 71820c42b6ce..da52e6bb5c7f 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -31,11 +31,11 @@ #include <asm/vdso.h> #include <asm/mce.h> #include <asm/sighandling.h> +#include <asm/vm86.h> #ifdef CONFIG_X86_64 #include <asm/proto.h> #include <asm/ia32_unistd.h> -#include <asm/sys_ia32.h> #endif /* CONFIG_X86_64 */ #include <asm/syscall.h> @@ -632,6 +632,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) bool stepping, failed; struct fpu *fpu = ¤t->thread.fpu; + if (v8086_mode(regs)) + save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL); + /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ @@ -697,7 +700,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -static void do_signal(struct pt_regs *regs) +void do_signal(struct pt_regs *regs) { struct ksignal ksig; @@ -732,32 +735,6 @@ static void do_signal(struct pt_regs *regs) restore_saved_sigmask(); } -/* - * notification of userspace execution resumption - * - triggered by the TIF_WORK_MASK flags - */ -__visible void -do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) -{ - user_exit(); - - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - - /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } - if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) - fire_user_return_notifiers(); - - user_enter(); -} - void signal_fault(struct pt_regs *regs, void __user *frame, char *where) { struct task_struct *me = current; diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c new file mode 100644 index 000000000000..dc3c0b1c816f --- /dev/null +++ b/arch/x86/kernel/signal_compat.c @@ -0,0 +1,95 @@ +#include <linux/compat.h> +#include <linux/uaccess.h> + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) +{ + int err = 0; + bool ia32 = test_thread_flag(TIF_IA32); + + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + return -EFAULT; + + put_user_try { + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + put_user_ex(from->si_signo, &to->si_signo); + put_user_ex(from->si_errno, &to->si_errno); + put_user_ex((short)from->si_code, &to->si_code); + + if (from->si_code < 0) { + put_user_ex(from->si_pid, &to->si_pid); + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + put_user_ex(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_SYS >> 16: + put_user_ex(from->si_syscall, &to->si_syscall); + put_user_ex(from->si_arch, &to->si_arch); + break; + case __SI_CHLD >> 16: + if (ia32) { + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + } else { + put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); + put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); + } + put_user_ex(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + put_user_ex(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + put_user_ex(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + put_user_ex(from->si_overrun, &to->si_overrun); + put_user_ex(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(from->si_int, &to->si_int); + break; + } + } + } put_user_catch(err); + + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + int err = 0; + u32 ptr32; + + if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + get_user_try { + get_user_ex(to->si_signo, &from->si_signo); + get_user_ex(to->si_errno, &from->si_errno); + get_user_ex(to->si_code, &from->si_code); + + get_user_ex(to->si_pid, &from->si_pid); + get_user_ex(to->si_uid, &from->si_uid); + get_user_ex(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + } get_user_catch(err); + + return err; +} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b1f3ed9c7a9e..e0c198e5f920 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -97,8 +97,6 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); -atomic_t init_deasserted; - static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -146,16 +144,11 @@ static void smp_callin(void) /* * If waken up by an INIT in an 82489DX configuration - * we may get here before an INIT-deassert IPI reaches - * our local APIC. We have to wait for the IPI or we'll - * lock up on an APIC access. - * - * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. + * cpu_callout_mask guarantees we don't get here before + * an INIT_deassert IPI reaches our local APIC, so it is + * now safe to touch our local APIC. */ cpuid = smp_processor_id(); - if (apic->wait_for_init_deassert && cpuid) - while (!atomic_read(&init_deasserted)) - cpu_relax(); /* * (This works even if the APIC is not enabled.) @@ -620,7 +613,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) send_status = safe_apic_wait_icr_idle(); mb(); - atomic_set(&init_deasserted, 1); /* * Should we send STARTUP IPIs ? @@ -665,7 +657,8 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* * Give the other CPU some time to accept the IPI. */ - udelay(300); + if (init_udelay) + udelay(300); pr_debug("Startup point 1\n"); @@ -675,7 +668,8 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* * Give the other CPU some time to accept the IPI. */ - udelay(200); + if (init_udelay) + udelay(200); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); @@ -859,8 +853,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) * the targeted processor. */ - atomic_set(&init_deasserted, 0); - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { pr_debug("Setting warm reset code and vector.\n"); @@ -898,7 +890,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) if (!boot_error) { /* - * Wait 10s total for a response from AP + * Wait 10s total for first sign of life from AP */ boot_error = -1; timeout = jiffies + 10*HZ; @@ -911,7 +903,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) boot_error = 0; break; } - udelay(100); schedule(); } } @@ -927,7 +918,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) * for the MTRR work(triggered by the AP coming online) * to be completed in the stop machine context. */ - udelay(100); schedule(); } } @@ -1358,7 +1348,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); } -static void __ref remove_cpu_from_maps(int cpu) +static void remove_cpu_from_maps(int cpu) { set_cpu_online(cpu, false); cpumask_clear_cpu(cpu, cpu_callout_mask); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 0ccb53a9fcd9..c9a073866ca7 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -18,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re return addr; } +#ifdef CONFIG_MODIFY_LDT_SYSCALL /* * We'll assume that the code segments in the GDT * are all zero-based. That is largely true: the @@ -45,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re } mutex_unlock(&child->mm->context.lock); } +#endif return addr; } diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 649b010da00b..12cbe2b88c0f 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -57,7 +57,7 @@ __setup("cpu0_hotplug", enable_cpu0_hotplug); * * This is only called for debugging CPU offline/online feature. */ -int __ref _debug_hotplug_cpu(int cpu, int action) +int _debug_hotplug_cpu(int cpu, int action) { struct device *dev = get_cpu_device(cpu); int ret; @@ -104,7 +104,7 @@ static int __init debug_hotplug_cpu(void) late_initcall_sync(debug_hotplug_cpu); #endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */ -int __ref arch_register_cpu(int num) +int arch_register_cpu(int num) { struct cpuinfo_x86 *c = &cpu_data(num); diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c index 25b993729f9b..80bb24d9b880 100644 --- a/arch/x86/kernel/trace_clock.c +++ b/arch/x86/kernel/trace_clock.c @@ -12,10 +12,5 @@ */ u64 notrace trace_clock_x86_tsc(void) { - u64 ret; - - rdtsc_barrier(); - rdtscll(ret); - - return ret; + return rdtsc_ordered(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c5a5231d1d11..346eec73f7db 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -62,6 +62,7 @@ #include <asm/fpu/xstate.h> #include <asm/trace/mpx.h> #include <asm/mpx.h> +#include <asm/vm86.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -108,13 +109,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } -enum ctx_state ist_enter(struct pt_regs *regs) +void ist_enter(struct pt_regs *regs) { - enum ctx_state prev_state; - if (user_mode(regs)) { - /* Other than that, we're just an exception. */ - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); } else { /* * We might have interrupted pretty much anything. In @@ -123,32 +121,25 @@ enum ctx_state ist_enter(struct pt_regs *regs) * but we need to notify RCU. */ rcu_nmi_enter(); - prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */ } /* - * We are atomic because we're on the IST stack (or we're on x86_32, - * in which case we still shouldn't schedule). - * - * This must be after exception_enter(), because exception_enter() - * won't do anything if in_interrupt() returns true. + * We are atomic because we're on the IST stack; or we're on + * x86_32, in which case we still shouldn't schedule; or we're + * on x86_64 and entered from user mode, in which case we're + * still atomic unless ist_begin_non_atomic is called. */ preempt_count_add(HARDIRQ_OFFSET); /* This code is a bit fragile. Test it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); - - return prev_state; } -void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) +void ist_exit(struct pt_regs *regs) { - /* Must be before exception_exit. */ preempt_count_sub(HARDIRQ_OFFSET); - if (user_mode(regs)) - return exception_exit(prev_state); - else + if (!user_mode(regs)) rcu_nmi_exit(); } @@ -162,7 +153,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) * a double fault, it can be safe to schedule. ist_begin_non_atomic() * begins a non-atomic section within an ist_enter()/ist_exit() region. * Callers are responsible for enabling interrupts themselves inside - * the non-atomic section, and callers must call is_end_non_atomic() + * the non-atomic section, and callers must call ist_end_non_atomic() * before ist_exit(). */ void ist_begin_non_atomic(struct pt_regs *regs) @@ -289,17 +280,16 @@ NOKPROBE_SYMBOL(do_trap); static void do_error_trap(struct pt_regs *regs, long error_code, char *str, unsigned long trapnr, int signr) { - enum ctx_state prev_state = exception_enter(); siginfo_t info; + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { conditional_sti(regs); do_trap(trapnr, signr, str, regs, error_code, fill_trap_info(regs, signr, trapnr, &info)); } - - exception_exit(prev_state); } #define DO_ERROR(trapnr, signr, str, name) \ @@ -351,7 +341,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) } #endif - ist_enter(regs); /* Discard prev_state because we won't return. */ + ist_enter(regs); notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); tsk->thread.error_code = error_code; @@ -371,14 +361,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; const struct bndcsr *bndcsr; siginfo_t *info; - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) - goto exit; + return; conditional_sti(regs); if (!user_mode(regs)) @@ -435,9 +424,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) die("bounds", regs, error_code); } -exit: - exception_exit(prev_state); return; + exit_trap: /* * This path out is for all the cases where we could not @@ -447,35 +435,33 @@ exit_trap: * time.. */ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); - exception_exit(prev_state); } dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; - enum ctx_state prev_state; - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); conditional_sti(regs); if (v8086_mode(regs)) { local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - goto exit; + return; } tsk = current; if (!user_mode(regs)) { if (fixup_exception(regs)) - goto exit; + return; tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; if (notify_die(DIE_GPF, "general protection fault", regs, error_code, X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) die("general protection fault", regs, error_code); - goto exit; + return; } tsk->thread.error_code = error_code; @@ -491,16 +477,12 @@ do_general_protection(struct pt_regs *regs, long error_code) } force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); -exit: - exception_exit(prev_state); } NOKPROBE_SYMBOL(do_general_protection); /* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - #ifdef CONFIG_DYNAMIC_FTRACE /* * ftrace must be first, everything else may cause a recursive crash. @@ -513,7 +495,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; - prev_state = ist_enter(regs); + ist_enter(regs); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -539,7 +522,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: - ist_exit(regs, prev_state); + ist_exit(regs); } NOKPROBE_SYMBOL(do_int3); @@ -615,12 +598,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret); dotraplinkage void do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - enum ctx_state prev_state; int user_icebp = 0; unsigned long dr6; int si_code; - prev_state = ist_enter(regs); + ist_enter(regs); get_debugreg(dr6, 6); @@ -695,7 +677,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - ist_exit(regs, prev_state); + ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); @@ -747,21 +729,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); math_error(regs, error_code, X86_TRAP_MF); - exception_exit(prev_state); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); math_error(regs, error_code, X86_TRAP_XF); - exception_exit(prev_state); } dotraplinkage void @@ -773,9 +749,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION @@ -786,7 +760,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); - exception_exit(prev_state); return; } #endif @@ -794,7 +767,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif - exception_exit(prev_state); } NOKPROBE_SYMBOL(do_device_not_available); @@ -802,9 +774,8 @@ NOKPROBE_SYMBOL(do_device_not_available); dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; - enum ctx_state prev_state; - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); local_irq_enable(); info.si_signo = SIGILL; @@ -816,7 +787,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, &info); } - exception_exit(prev_state); } #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 88e9a38c71a5..c8d52cb4cb6e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -38,7 +38,7 @@ static int __read_mostly tsc_unstable; erroneous rdtsc usage on !cpu_has_tsc processors */ static int __read_mostly tsc_disabled = -1; -static struct static_key __use_tsc = STATIC_KEY_INIT; +static DEFINE_STATIC_KEY_FALSE(__use_tsc); int tsc_clocksource_reliable; @@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) data = cyc2ns_write_begin(cpu); - rdtscll(tsc_now); + tsc_now = rdtsc(); ns_now = cycles_2_ns(tsc_now); /* @@ -274,7 +274,12 @@ done: */ u64 native_sched_clock(void) { - u64 tsc_now; + if (static_branch_likely(&__use_tsc)) { + u64 tsc_now = rdtsc(); + + /* return the value in ns */ + return cycles_2_ns(tsc_now); + } /* * Fall back to jiffies if there's no TSC available: @@ -284,16 +289,9 @@ u64 native_sched_clock(void) * very important for it to be as fast as the platform * can achieve it. ) */ - if (!static_key_false(&__use_tsc)) { - /* No locking but a rare wrong value is not a big deal: */ - return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); - } - - /* read the Time Stamp Counter: */ - rdtscll(tsc_now); - /* return the value in ns */ - return cycles_2_ns(tsc_now); + /* No locking but a rare wrong value is not a big deal: */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); } /* @@ -316,12 +314,6 @@ unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); #endif -unsigned long long native_read_tsc(void) -{ - return __native_read_tsc(); -} -EXPORT_SYMBOL(native_read_tsc); - int check_tsc_unstable(void) { return tsc_unstable; @@ -984,7 +976,7 @@ static struct clocksource clocksource_tsc; */ static cycle_t read_tsc(struct clocksource *cs) { - return (cycle_t)get_cycles(); + return (cycle_t)rdtsc_ordered(); } /* @@ -1218,7 +1210,7 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; - static_key_slow_inc(&__use_tsc); + static_branch_enable(&__use_tsc); if (!no_sched_irq_time) enable_sched_clock_irqtime(); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index dd8d0791dfb5..78083bf23ed1 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -39,16 +39,15 @@ static cycles_t max_warp; static int nr_warps; /* - * TSC-warp measurement loop running on both CPUs: + * TSC-warp measurement loop running on both CPUs. This is not called + * if there is no TSC. */ static void check_tsc_warp(unsigned int timeout) { cycles_t start, now, prev, end; int i; - rdtsc_barrier(); - start = get_cycles(); - rdtsc_barrier(); + start = rdtsc_ordered(); /* * The measurement runs for 'timeout' msecs: */ @@ -63,9 +62,7 @@ static void check_tsc_warp(unsigned int timeout) */ arch_spin_lock(&sync_lock); prev = last_tsc; - rdtsc_barrier(); - now = get_cycles(); - rdtsc_barrier(); + now = rdtsc_ordered(); last_tsc = now; arch_spin_unlock(&sync_lock); @@ -126,7 +123,7 @@ void check_tsc_sync_source(int cpu) /* * No need to check if we already know that the TSC is not - * synchronized: + * synchronized or if we have no TSC. */ if (unsynchronized_tsc()) return; @@ -190,6 +187,7 @@ void check_tsc_sync_target(void) { int cpus = 2; + /* Also aborts if there is no TSC. */ if (unsynchronized_tsc() || tsc_clocksource_reliable) return; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index fc9db6ef2a95..abd8b856bd2b 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -44,11 +44,14 @@ #include <linux/ptrace.h> #include <linux/audit.h> #include <linux/stddef.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/tlbflush.h> #include <asm/irq.h> +#include <asm/traps.h> +#include <asm/vm86.h> /* * Known problems: @@ -66,10 +69,6 @@ */ -#define KVM86 ((struct kernel_vm86_struct *)regs) -#define VMPI KVM86->vm86plus - - /* * 8- and 16-bit register defines.. */ @@ -81,8 +80,8 @@ /* * virtual flags (16 and 32-bit versions) */ -#define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) -#define VEFLAGS (current->thread.v86flags) +#define VFLAGS (*(unsigned short *)&(current->thread.vm86->veflags)) +#define VEFLAGS (current->thread.vm86->veflags) #define set_flags(X, new, mask) \ ((X) = ((X) & ~(mask)) | ((new) & (mask))) @@ -90,46 +89,13 @@ #define SAFE_MASK (0xDD5) #define RETURN_MASK (0xDFF) -/* convert kernel_vm86_regs to vm86_regs */ -static int copy_vm86_regs_to_user(struct vm86_regs __user *user, - const struct kernel_vm86_regs *regs) -{ - int ret = 0; - - /* - * kernel_vm86_regs is missing gs, so copy everything up to - * (but not including) orig_eax, and then rest including orig_eax. - */ - ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax)); - ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_ax)); - - return ret; -} - -/* convert vm86_regs to kernel_vm86_regs */ -static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, - const struct vm86_regs __user *user, - unsigned extra) -{ - int ret = 0; - - /* copy ax-fs inclusive */ - ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax)); - /* copy orig_ax-__gsh+extra */ - ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_ax) + - extra); - return ret; -} - -struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) +void save_v86_state(struct kernel_vm86_regs *regs, int retval) { struct tss_struct *tss; - struct pt_regs *ret; - unsigned long tmp; + struct task_struct *tsk = current; + struct vm86plus_struct __user *user; + struct vm86 *vm86 = current->thread.vm86; + long err = 0; /* * This gets called from entry.S with interrupts disabled, but @@ -138,31 +104,57 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) */ local_irq_enable(); - if (!current->thread.vm86_info) { - pr_alert("no vm86_info: BAD\n"); + if (!vm86 || !vm86->user_vm86) { + pr_alert("no user_vm86: BAD\n"); do_exit(SIGSEGV); } - set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); - tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); - tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); - if (tmp) { - pr_alert("could not access userspace vm86_info\n"); + set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask); + user = vm86->user_vm86; + + if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ? + sizeof(struct vm86plus_struct) : + sizeof(struct vm86_struct))) { + pr_alert("could not access userspace vm86 info\n"); + do_exit(SIGSEGV); + } + + put_user_try { + put_user_ex(regs->pt.bx, &user->regs.ebx); + put_user_ex(regs->pt.cx, &user->regs.ecx); + put_user_ex(regs->pt.dx, &user->regs.edx); + put_user_ex(regs->pt.si, &user->regs.esi); + put_user_ex(regs->pt.di, &user->regs.edi); + put_user_ex(regs->pt.bp, &user->regs.ebp); + put_user_ex(regs->pt.ax, &user->regs.eax); + put_user_ex(regs->pt.ip, &user->regs.eip); + put_user_ex(regs->pt.cs, &user->regs.cs); + put_user_ex(regs->pt.flags, &user->regs.eflags); + put_user_ex(regs->pt.sp, &user->regs.esp); + put_user_ex(regs->pt.ss, &user->regs.ss); + put_user_ex(regs->es, &user->regs.es); + put_user_ex(regs->ds, &user->regs.ds); + put_user_ex(regs->fs, &user->regs.fs); + put_user_ex(regs->gs, &user->regs.gs); + + put_user_ex(vm86->screen_bitmap, &user->screen_bitmap); + } put_user_catch(err); + if (err) { + pr_alert("could not access userspace vm86 info\n"); do_exit(SIGSEGV); } tss = &per_cpu(cpu_tss, get_cpu()); - current->thread.sp0 = current->thread.saved_sp0; - current->thread.sysenter_cs = __KERNEL_CS; - load_sp0(tss, ¤t->thread); - current->thread.saved_sp0 = 0; + tsk->thread.sp0 = vm86->saved_sp0; + tsk->thread.sysenter_cs = __KERNEL_CS; + load_sp0(tss, &tsk->thread); + vm86->saved_sp0 = 0; put_cpu(); - ret = KVM86->regs32; + memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); - ret->fs = current->thread.saved_fs; - set_user_gs(ret, current->thread.saved_gs); + lazy_load_gs(vm86->regs32.gs); - return ret; + regs->pt.ax = retval; } static void mark_screen_rdonly(struct mm_struct *mm) @@ -200,45 +192,16 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); +static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus); -SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86) +SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86) { - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk = current; - int tmp; - - if (tsk->thread.saved_sp0) - return -EPERM; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, vm86plus) - - sizeof(info.regs)); - if (tmp) - return -EFAULT; - memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = current_pt_regs(); - tsk->thread.vm86_info = v86; - do_sys_vm86(&info, tsk); - return 0; /* we never return here */ + return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false); } SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) { - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk; - int tmp; - struct vm86plus_struct __user *v86; - - tsk = current; switch (cmd) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: @@ -256,114 +219,133 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) } /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ - if (tsk->thread.saved_sp0) - return -EPERM; - v86 = (struct vm86plus_struct __user *)arg; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, regs32) - - sizeof(info.regs)); - if (tmp) - return -EFAULT; - info.regs32 = current_pt_regs(); - info.vm86plus.is_vm86pus = 1; - tsk->thread.vm86_info = (struct vm86_struct __user *)v86; - do_sys_vm86(&info, tsk); - return 0; /* we never return here */ + return do_sys_vm86((struct vm86plus_struct __user *) arg, true); } -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) +static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) { struct tss_struct *tss; -/* - * make sure the vm86() system call doesn't try to do anything silly - */ - info->regs.pt.ds = 0; - info->regs.pt.es = 0; - info->regs.pt.fs = 0; -#ifndef CONFIG_X86_32_LAZY_GS - info->regs.pt.gs = 0; -#endif + struct task_struct *tsk = current; + struct vm86 *vm86 = tsk->thread.vm86; + struct kernel_vm86_regs vm86regs; + struct pt_regs *regs = current_pt_regs(); + unsigned long err = 0; + + if (!vm86) { + if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) + return -ENOMEM; + tsk->thread.vm86 = vm86; + } + if (vm86->saved_sp0) + return -EPERM; + + if (!access_ok(VERIFY_READ, user_vm86, plus ? + sizeof(struct vm86_struct) : + sizeof(struct vm86plus_struct))) + return -EFAULT; + + memset(&vm86regs, 0, sizeof(vm86regs)); + get_user_try { + unsigned short seg; + get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx); + get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx); + get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx); + get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi); + get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi); + get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp); + get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax); + get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip); + get_user_ex(seg, &user_vm86->regs.cs); + vm86regs.pt.cs = seg; + get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags); + get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp); + get_user_ex(seg, &user_vm86->regs.ss); + vm86regs.pt.ss = seg; + get_user_ex(vm86regs.es, &user_vm86->regs.es); + get_user_ex(vm86regs.ds, &user_vm86->regs.ds); + get_user_ex(vm86regs.fs, &user_vm86->regs.fs); + get_user_ex(vm86regs.gs, &user_vm86->regs.gs); + + get_user_ex(vm86->flags, &user_vm86->flags); + get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap); + get_user_ex(vm86->cpu_type, &user_vm86->cpu_type); + } get_user_catch(err); + if (err) + return err; + + if (copy_from_user(&vm86->int_revectored, + &user_vm86->int_revectored, + sizeof(struct revectored_struct))) + return -EFAULT; + if (copy_from_user(&vm86->int21_revectored, + &user_vm86->int21_revectored, + sizeof(struct revectored_struct))) + return -EFAULT; + if (plus) { + if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus, + sizeof(struct vm86plus_info_struct))) + return -EFAULT; + vm86->vm86plus.is_vm86pus = 1; + } else + memset(&vm86->vm86plus, 0, + sizeof(struct vm86plus_info_struct)); + + memcpy(&vm86->regs32, regs, sizeof(struct pt_regs)); + vm86->user_vm86 = user_vm86; /* * The flags register is also special: we cannot trust that the user * has set it up safely, so this makes sure interrupt etc flags are * inherited from protected mode. */ - VEFLAGS = info->regs.pt.flags; - info->regs.pt.flags &= SAFE_MASK; - info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK; - info->regs.pt.flags |= X86_VM_MASK; + VEFLAGS = vm86regs.pt.flags; + vm86regs.pt.flags &= SAFE_MASK; + vm86regs.pt.flags |= regs->flags & ~SAFE_MASK; + vm86regs.pt.flags |= X86_VM_MASK; + + vm86regs.pt.orig_ax = regs->orig_ax; - switch (info->cpu_type) { + switch (vm86->cpu_type) { case CPU_286: - tsk->thread.v86mask = 0; + vm86->veflags_mask = 0; break; case CPU_386: - tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; case CPU_486: - tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; default: - tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; } /* - * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) + * Save old state */ - info->regs32->ax = VM86_SIGNAL; - tsk->thread.saved_sp0 = tsk->thread.sp0; - tsk->thread.saved_fs = info->regs32->fs; - tsk->thread.saved_gs = get_user_gs(info->regs32); + vm86->saved_sp0 = tsk->thread.sp0; + lazy_save_gs(vm86->regs32.gs); tss = &per_cpu(cpu_tss, get_cpu()); - tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; + /* make room for real-mode segments */ + tsk->thread.sp0 += 16; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; load_sp0(tss, &tsk->thread); put_cpu(); - tsk->thread.screen_bitmap = info->screen_bitmap; - if (info->flags & VM86_SCREEN_BITMAP) + if (vm86->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); - /*call __audit_syscall_exit since we do not exit via the normal paths */ -#ifdef CONFIG_AUDITSYSCALL - if (unlikely(current->audit_context)) - __audit_syscall_exit(1, 0); -#endif - - __asm__ __volatile__( - "movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" -#ifdef CONFIG_X86_32_LAZY_GS - "mov %2, %%gs\n\t" -#endif - "jmp resume_userspace" - : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); - /* we never return here */ -} - -static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval) -{ - struct pt_regs *regs32; - - regs32 = save_v86_state(regs16); - regs32->ax = retval; - __asm__ __volatile__("movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" - "jmp resume_userspace" - : : "r" (regs32), "r" (current_thread_info())); + memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs)); + force_iret(); + return regs->ax; } static inline void set_IF(struct kernel_vm86_regs *regs) { VEFLAGS |= X86_EFLAGS_VIF; - if (VEFLAGS & X86_EFLAGS_VIP) - return_to_32bit(regs, VM86_STI); } static inline void clear_IF(struct kernel_vm86_regs *regs) @@ -395,7 +377,7 @@ static inline void clear_AC(struct kernel_vm86_regs *regs) static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs) { - set_flags(VEFLAGS, flags, current->thread.v86mask); + set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask); set_flags(regs->pt.flags, flags, SAFE_MASK); if (flags & X86_EFLAGS_IF) set_IF(regs); @@ -405,7 +387,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs) { - set_flags(VFLAGS, flags, current->thread.v86mask); + set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask); set_flags(regs->pt.flags, flags, SAFE_MASK); if (flags & X86_EFLAGS_IF) set_IF(regs); @@ -420,7 +402,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) if (VEFLAGS & X86_EFLAGS_VIF) flags |= X86_EFLAGS_IF; flags |= X86_EFLAGS_IOPL; - return flags | (VEFLAGS & current->thread.v86mask); + return flags | (VEFLAGS & current->thread.vm86->veflags_mask); } static inline int is_revectored(int nr, struct revectored_struct *bitmap) @@ -518,12 +500,13 @@ static void do_int(struct kernel_vm86_regs *regs, int i, { unsigned long __user *intr_ptr; unsigned long segoffs; + struct vm86 *vm86 = current->thread.vm86; if (regs->pt.cs == BIOSSEG) goto cannot_handle; - if (is_revectored(i, &KVM86->int_revectored)) + if (is_revectored(i, &vm86->int_revectored)) goto cannot_handle; - if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored)) + if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored)) goto cannot_handle; intr_ptr = (unsigned long __user *) (i << 2); if (get_user(segoffs, intr_ptr)) @@ -542,18 +525,16 @@ static void do_int(struct kernel_vm86_regs *regs, int i, return; cannot_handle: - return_to_32bit(regs, VM86_INTx + (i << 8)); + save_v86_state(regs, VM86_INTx + (i << 8)); } int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) { - if (VMPI.is_vm86pus) { + struct vm86 *vm86 = current->thread.vm86; + + if (vm86->vm86plus.is_vm86pus) { if ((trapno == 3) || (trapno == 1)) { - KVM86->regs32->ax = VM86_TRAP + (trapno << 8); - /* setting this flag forces the code in entry_32.S to - the path where we call save_v86_state() and change - the stack pointer to KVM86->regs32 */ - set_thread_flag(TIF_NOTIFY_RESUME); + save_v86_state(regs, VM86_TRAP + (trapno << 8)); return 0; } do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); @@ -574,16 +555,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) unsigned char __user *ssp; unsigned short ip, sp, orig_flags; int data32, pref_done; + struct vm86plus_info_struct *vmpi = ¤t->thread.vm86->vm86plus; #define CHECK_IF_IN_TRAP \ - if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ + if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \ newflags |= X86_EFLAGS_TF -#define VM86_FAULT_RETURN do { \ - if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \ - return_to_32bit(regs, VM86_PICRETURN); \ - if (orig_flags & X86_EFLAGS_TF) \ - handle_vm86_trap(regs, 0, 1); \ - return; } while (0) orig_flags = *(unsigned short *)®s->pt.flags; @@ -622,7 +598,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) SP(regs) -= 2; } IP(regs) = ip; - VM86_FAULT_RETURN; + goto vm86_fault_return; /* popf */ case 0x9d: @@ -642,16 +618,18 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) else set_vflags_short(newflags, regs); - VM86_FAULT_RETURN; + goto check_vip; } /* int xx */ case 0xcd: { int intno = popb(csp, ip, simulate_sigsegv); IP(regs) = ip; - if (VMPI.vm86dbg_active) { - if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3]) - return_to_32bit(regs, VM86_INTx + (intno << 8)); + if (vmpi->vm86dbg_active) { + if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) { + save_v86_state(regs, VM86_INTx + (intno << 8)); + return; + } } do_int(regs, intno, ssp, sp); return; @@ -682,14 +660,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) } else { set_vflags_short(newflags, regs); } - VM86_FAULT_RETURN; + goto check_vip; } /* cli */ case 0xfa: IP(regs) = ip; clear_IF(regs); - VM86_FAULT_RETURN; + goto vm86_fault_return; /* sti */ /* @@ -701,14 +679,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) case 0xfb: IP(regs) = ip; set_IF(regs); - VM86_FAULT_RETURN; + goto check_vip; default: - return_to_32bit(regs, VM86_UNKNOWN); + save_v86_state(regs, VM86_UNKNOWN); } return; +check_vip: + if (VEFLAGS & X86_EFLAGS_VIP) { + save_v86_state(regs, VM86_STI); + return; + } + +vm86_fault_return: + if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) { + save_v86_state(regs, VM86_PICRETURN); + return; + } + if (orig_flags & X86_EFLAGS_TF) + handle_vm86_trap(regs, 0, X86_TRAP_DB); + return; + simulate_sigsegv: /* FIXME: After a long discussion with Stas we finally * agreed, that this is wrong. Here we should @@ -720,7 +713,7 @@ simulate_sigsegv: * should be a mixture of the two, but how do we * get the information? [KD] */ - return_to_32bit(regs, VM86_UNKNOWN); + save_v86_state(regs, VM86_UNKNOWN); } /* ---------------- vm86 special IRQ passing stuff ----------------- */ diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 00bf300fd846..74e4bf11f562 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -364,7 +364,7 @@ INIT_PER_CPU(irq_stack_union); #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #include <asm/kexec.h> . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, |