diff options
Diffstat (limited to 'arch')
787 files changed, 26093 insertions, 23359 deletions
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 42aa078a5e4d..5a621c6d22ab 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs) switch (which) { case IPI_RESCHEDULE: - /* Reschedule callback. Everything to be done - is done by the interrupt return path. */ + scheduler_ipi(); break; case IPI_CALL_FUNC: diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 433be2a24f31..3d890a98a08b 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -46,6 +46,7 @@ SECTIONS __init_end = .; /* Freed after init ends here */ + _sdata = .; /* Start of rw data section */ _data = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c index 113085a77123..7aa4262ada7a 100644 --- a/arch/arm/common/vic.c +++ b/arch/arm/common/vic.c @@ -22,17 +22,16 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/io.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/device.h> #include <linux/amba/bus.h> #include <asm/mach/irq.h> #include <asm/hardware/vic.h> -#if defined(CONFIG_PM) +#ifdef CONFIG_PM /** * struct vic_device - VIC PM device - * @sysdev: The system device which is registered. * @irq: The IRQ number for the base of the VIC. * @base: The register base for the VIC. * @resume_sources: A bitmask of interrupts for resume. @@ -43,8 +42,6 @@ * @protect: Save for VIC_PROTECT. */ struct vic_device { - struct sys_device sysdev; - void __iomem *base; int irq; u32 resume_sources; @@ -59,11 +56,6 @@ struct vic_device { static struct vic_device vic_devices[CONFIG_ARM_VIC_NR]; static int vic_id; - -static inline struct vic_device *to_vic(struct sys_device *sys) -{ - return container_of(sys, struct vic_device, sysdev); -} #endif /* CONFIG_PM */ /** @@ -85,10 +77,9 @@ static void vic_init2(void __iomem *base) writel(32, base + VIC_PL190_DEF_VECT_ADDR); } -#if defined(CONFIG_PM) -static int vic_class_resume(struct sys_device *dev) +#ifdef CONFIG_PM +static void resume_one_vic(struct vic_device *vic) { - struct vic_device *vic = to_vic(dev); void __iomem *base = vic->base; printk(KERN_DEBUG "%s: resuming vic at %p\n", __func__, base); @@ -107,13 +98,18 @@ static int vic_class_resume(struct sys_device *dev) writel(vic->soft_int, base + VIC_INT_SOFT); writel(~vic->soft_int, base + VIC_INT_SOFT_CLEAR); +} - return 0; +static void vic_resume(void) +{ + int id; + + for (id = vic_id - 1; id >= 0; id--) + resume_one_vic(vic_devices + id); } -static int vic_class_suspend(struct sys_device *dev, pm_message_t state) +static void suspend_one_vic(struct vic_device *vic) { - struct vic_device *vic = to_vic(dev); void __iomem *base = vic->base; printk(KERN_DEBUG "%s: suspending vic at %p\n", __func__, base); @@ -128,14 +124,21 @@ static int vic_class_suspend(struct sys_device *dev, pm_message_t state) writel(vic->resume_irqs, base + VIC_INT_ENABLE); writel(~vic->resume_irqs, base + VIC_INT_ENABLE_CLEAR); +} + +static int vic_suspend(void) +{ + int id; + + for (id = 0; id < vic_id; id++) + suspend_one_vic(vic_devices + id); return 0; } -struct sysdev_class vic_class = { - .name = "vic", - .suspend = vic_class_suspend, - .resume = vic_class_resume, +struct syscore_ops vic_syscore_ops = { + .suspend = vic_suspend, + .resume = vic_resume, }; /** @@ -147,30 +150,8 @@ struct sysdev_class vic_class = { */ static int __init vic_pm_init(void) { - struct vic_device *dev = vic_devices; - int err; - int id; - - if (vic_id == 0) - return 0; - - err = sysdev_class_register(&vic_class); - if (err) { - printk(KERN_ERR "%s: cannot register class\n", __func__); - return err; - } - - for (id = 0; id < vic_id; id++, dev++) { - dev->sysdev.id = id; - dev->sysdev.cls = &vic_class; - - err = sysdev_register(&dev->sysdev); - if (err) { - printk(KERN_ERR "%s: failed to register device\n", - __func__); - return err; - } - } + if (vic_id > 0) + register_syscore_ops(&vic_syscore_ops); return 0; } diff --git a/arch/arm/include/asm/i8253.h b/arch/arm/include/asm/i8253.h new file mode 100644 index 000000000000..70656b69d5ce --- /dev/null +++ b/arch/arm/include/asm/i8253.h @@ -0,0 +1,15 @@ +#ifndef __ASMARM_I8253_H +#define __ASMARM_I8253_H + +/* i8253A PIT registers */ +#define PIT_MODE 0x43 +#define PIT_CH0 0x40 + +#define PIT_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ) + +extern raw_spinlock_t i8253_lock; + +#define outb_pit outb_p +#define inb_pit inb_p + +#endif diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h index 883f6be5117a..d5adaae5ee2c 100644 --- a/arch/arm/include/asm/mach/time.h +++ b/arch/arm/include/asm/mach/time.h @@ -34,7 +34,6 @@ * timer interrupt which may be pending. */ struct sys_timer { - struct sys_device dev; void (*init)(void); void (*suspend)(void); void (*resume)(void); diff --git a/arch/arm/kernel/leds.c b/arch/arm/kernel/leds.c index 31a316c1777b..0f107dcb0347 100644 --- a/arch/arm/kernel/leds.c +++ b/arch/arm/kernel/leds.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <asm/leds.h> @@ -69,36 +70,37 @@ static ssize_t leds_store(struct sys_device *dev, static SYSDEV_ATTR(event, 0200, NULL, leds_store); -static int leds_suspend(struct sys_device *dev, pm_message_t state) +static struct sysdev_class leds_sysclass = { + .name = "leds", +}; + +static struct sys_device leds_device = { + .id = 0, + .cls = &leds_sysclass, +}; + +static int leds_suspend(void) { leds_event(led_stop); return 0; } -static int leds_resume(struct sys_device *dev) +static void leds_resume(void) { leds_event(led_start); - return 0; } -static int leds_shutdown(struct sys_device *dev) +static void leds_shutdown(void) { leds_event(led_halted); - return 0; } -static struct sysdev_class leds_sysclass = { - .name = "leds", +static struct syscore_ops leds_syscore_ops = { .shutdown = leds_shutdown, .suspend = leds_suspend, .resume = leds_resume, }; -static struct sys_device leds_device = { - .id = 0, - .cls = &leds_sysclass, -}; - static int __init leds_init(void) { int ret; @@ -107,6 +109,8 @@ static int __init leds_init(void) ret = sysdev_register(&leds_device); if (ret == 0) ret = sysdev_create_file(&leds_device, &attr_event); + if (ret == 0) + register_syscore_ops(&leds_syscore_ops); return ret; } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index f29b8a29b174..007a0a950e75 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) break; case IPI_RESCHEDULE: - /* - * nothing more to do - eveything is - * done on the interrupt return path - */ + scheduler_ipi(); break; case IPI_CALL_FUNC: diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 1ff46cabc7ef..cb634c3e28e9 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -21,7 +21,7 @@ #include <linux/timex.h> #include <linux/errno.h> #include <linux/profile.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/timer.h> #include <linux/irq.h> @@ -115,48 +115,37 @@ void timer_tick(void) #endif #if defined(CONFIG_PM) && !defined(CONFIG_GENERIC_CLOCKEVENTS) -static int timer_suspend(struct sys_device *dev, pm_message_t state) +static int timer_suspend(void) { - struct sys_timer *timer = container_of(dev, struct sys_timer, dev); - - if (timer->suspend != NULL) - timer->suspend(); + if (system_timer->suspend) + system_timer->suspend(); return 0; } -static int timer_resume(struct sys_device *dev) +static void timer_resume(void) { - struct sys_timer *timer = container_of(dev, struct sys_timer, dev); - - if (timer->resume != NULL) - timer->resume(); - - return 0; + if (system_timer->resume) + system_timer->resume(); } #else #define timer_suspend NULL #define timer_resume NULL #endif -static struct sysdev_class timer_sysclass = { - .name = "timer", +static struct syscore_ops timer_syscore_ops = { .suspend = timer_suspend, .resume = timer_resume, }; -static int __init timer_init_sysfs(void) +static int __init timer_init_syscore_ops(void) { - int ret = sysdev_class_register(&timer_sysclass); - if (ret == 0) { - system_timer->dev.cls = &timer_sysclass; - ret = sysdev_register(&system_timer->dev); - } + register_syscore_ops(&timer_syscore_ops); - return ret; + return 0; } -device_initcall(timer_init_sysfs); +device_initcall(timer_init_syscore_ops); void __init time_init(void) { diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3b54ad19d489..d52eec268b47 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -234,7 +234,6 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n", str, err, ++die_counter); - sysfs_printk_last_file(); /* trap and error numbers are mostly meaningless on ARM */ ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV); diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c index 0a95be1512bb..41669ecc1f91 100644 --- a/arch/arm/mach-davinci/cpufreq.c +++ b/arch/arm/mach-davinci/cpufreq.c @@ -94,9 +94,7 @@ static int davinci_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return ret; - cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, - dev_driver_string(cpufreq.dev), - "transition: %u --> %u\n", freqs.old, freqs.new); + dev_dbg(&cpufreq.dev, "transition: %u --> %u\n", freqs.old, freqs.new); ret = cpufreq_frequency_table_target(policy, pdata->freq_table, freqs.new, relation, &idx); diff --git a/arch/arm/mach-exynos4/pm.c b/arch/arm/mach-exynos4/pm.c index 10d917d9e3ad..8755ca8dd48d 100644 --- a/arch/arm/mach-exynos4/pm.c +++ b/arch/arm/mach-exynos4/pm.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/suspend.h> +#include <linux/syscore_ops.h> #include <linux/io.h> #include <asm/cacheflush.h> @@ -372,7 +373,27 @@ void exynos4_scu_enable(void __iomem *scu_base) flush_cache_all(); } -static int exynos4_pm_resume(struct sys_device *dev) +static struct sysdev_driver exynos4_pm_driver = { + .add = exynos4_pm_add, +}; + +static __init int exynos4_pm_drvinit(void) +{ + unsigned int tmp; + + s3c_pm_init(); + + /* All wakeup disable */ + + tmp = __raw_readl(S5P_WAKEUP_MASK); + tmp |= ((0xFF << 8) | (0x1F << 1)); + __raw_writel(tmp, S5P_WAKEUP_MASK); + + return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver); +} +arch_initcall(exynos4_pm_drvinit); + +static void exynos4_pm_resume(void) { /* For release retention */ @@ -394,27 +415,15 @@ static int exynos4_pm_resume(struct sys_device *dev) /* enable L2X0*/ writel_relaxed(1, S5P_VA_L2CC + L2X0_CTRL); #endif - - return 0; } -static struct sysdev_driver exynos4_pm_driver = { - .add = exynos4_pm_add, +static struct syscore_ops exynos4_pm_syscore_ops = { .resume = exynos4_pm_resume, }; -static __init int exynos4_pm_drvinit(void) +static __init int exynos4_pm_syscore_init(void) { - unsigned int tmp; - - s3c_pm_init(); - - /* All wakeup disable */ - - tmp = __raw_readl(S5P_WAKEUP_MASK); - tmp |= ((0xFF << 8) | (0x1F << 1)); - __raw_writel(tmp, S5P_WAKEUP_MASK); - - return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver); + register_syscore_ops(&exynos4_pm_syscore_ops); + return 0; } -arch_initcall(exynos4_pm_drvinit); +arch_initcall(exynos4_pm_syscore_init); diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index bdd257921cfb..46adca068f2c 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -4,6 +4,7 @@ menu "Footbridge Implementations" config ARCH_CATS bool "CATS" + select CLKSRC_I8253 select FOOTBRIDGE_HOST select ISA select ISA_DMA @@ -59,6 +60,7 @@ config ARCH_EBSA285_HOST config ARCH_NETWINDER bool "NetWinder" + select CLKSRC_I8253 select FOOTBRIDGE_HOST select ISA select ISA_DMA diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c index 441c6ce0d555..7020f1a3feca 100644 --- a/arch/arm/mach-footbridge/isa-timer.c +++ b/arch/arm/mach-footbridge/isa-timer.c @@ -10,53 +10,16 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/io.h> +#include <linux/spinlock.h> #include <linux/timex.h> #include <asm/irq.h> - +#include <asm/i8253.h> #include <asm/mach/time.h> #include "common.h" -#define PIT_MODE 0x43 -#define PIT_CH0 0x40 - -#define PIT_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ) - -static cycle_t pit_read(struct clocksource *cs) -{ - unsigned long flags; - static int old_count; - static u32 old_jifs; - int count; - u32 jifs; - - raw_local_irq_save(flags); - - jifs = jiffies; - outb_p(0x00, PIT_MODE); /* latch the count */ - count = inb_p(PIT_CH0); /* read the latched count */ - count |= inb_p(PIT_CH0) << 8; - - if (count > old_count && jifs == old_jifs) - count = old_count; - - old_count = count; - old_jifs = jifs; - - raw_local_irq_restore(flags); - - count = (PIT_LATCH - 1) - count; - - return (cycle_t)(jifs * PIT_LATCH) + count; -} - -static struct clocksource pit_cs = { - .name = "pit", - .rating = 110, - .read = pit_read, - .mask = CLOCKSOURCE_MASK(32), -}; +DEFINE_RAW_SPINLOCK(i8253_lock); static void pit_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) @@ -121,7 +84,7 @@ static void __init isa_timer_init(void) pit_ce.max_delta_ns = clockevent_delta2ns(0x7fff, &pit_ce); pit_ce.min_delta_ns = clockevent_delta2ns(0x000f, &pit_ce); - clocksource_register_hz(&pit_cs, PIT_TICK_RATE); + clocksource_i8253_init(); setup_irq(pit_ce.irq, &pit_timer_irq); clockevents_register_device(&pit_ce); diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index 980803ff348c..d3e96451529c 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -24,7 +24,7 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/string.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/amba/bus.h> #include <linux/amba/kmi.h> #include <linux/clocksource.h> @@ -180,13 +180,13 @@ static void __init ap_init_irq(void) #ifdef CONFIG_PM static unsigned long ic_irq_enable; -static int irq_suspend(struct sys_device *dev, pm_message_t state) +static int irq_suspend(void) { ic_irq_enable = readl(VA_IC_BASE + IRQ_ENABLE); return 0; } -static int irq_resume(struct sys_device *dev) +static void irq_resume(void) { /* disable all irq sources */ writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR); @@ -194,33 +194,25 @@ static int irq_resume(struct sys_device *dev) writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR); writel(ic_irq_enable, VA_IC_BASE + IRQ_ENABLE_SET); - return 0; } #else #define irq_suspend NULL #define irq_resume NULL #endif -static struct sysdev_class irq_class = { - .name = "irq", +static struct syscore_ops irq_syscore_ops = { .suspend = irq_suspend, .resume = irq_resume, }; -static struct sys_device irq_device = { - .id = 0, - .cls = &irq_class, -}; - -static int __init irq_init_sysfs(void) +static int __init irq_syscore_init(void) { - int ret = sysdev_class_register(&irq_class); - if (ret == 0) - ret = sysdev_register(&irq_device); - return ret; + register_syscore_ops(&irq_syscore_ops); + + return 0; } -device_initcall(irq_init_sysfs); +device_initcall(irq_syscore_init); /* * Flash handling. diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c index 6588c22b8a64..fe31d933f0ed 100644 --- a/arch/arm/mach-omap1/pm_bus.c +++ b/arch/arm/mach-omap1/pm_bus.c @@ -24,75 +24,50 @@ #ifdef CONFIG_PM_RUNTIME static int omap1_pm_runtime_suspend(struct device *dev) { - struct clk *iclk, *fclk; - int ret = 0; + int ret; dev_dbg(dev, "%s\n", __func__); ret = pm_generic_runtime_suspend(dev); + if (ret) + return ret; - fclk = clk_get(dev, "fck"); - if (!IS_ERR(fclk)) { - clk_disable(fclk); - clk_put(fclk); - } - - iclk = clk_get(dev, "ick"); - if (!IS_ERR(iclk)) { - clk_disable(iclk); - clk_put(iclk); + ret = pm_runtime_clk_suspend(dev); + if (ret) { + pm_generic_runtime_resume(dev); + return ret; } return 0; -}; +} static int omap1_pm_runtime_resume(struct device *dev) { - struct clk *iclk, *fclk; - dev_dbg(dev, "%s\n", __func__); - iclk = clk_get(dev, "ick"); - if (!IS_ERR(iclk)) { - clk_enable(iclk); - clk_put(iclk); - } + pm_runtime_clk_resume(dev); + return pm_generic_runtime_resume(dev); +} - fclk = clk_get(dev, "fck"); - if (!IS_ERR(fclk)) { - clk_enable(fclk); - clk_put(fclk); - } +static struct dev_power_domain default_power_domain = { + .ops = { + .runtime_suspend = omap1_pm_runtime_suspend, + .runtime_resume = omap1_pm_runtime_resume, + USE_PLATFORM_PM_SLEEP_OPS + }, +}; - return pm_generic_runtime_resume(dev); +static struct pm_clk_notifier_block platform_bus_notifier = { + .pwr_domain = &default_power_domain, + .con_ids = { "ick", "fck", NULL, }, }; static int __init omap1_pm_runtime_init(void) { - const struct dev_pm_ops *pm; - struct dev_pm_ops *omap_pm; - if (!cpu_class_is_omap1()) return -ENODEV; - pm = platform_bus_get_pm_ops(); - if (!pm) { - pr_err("%s: unable to get dev_pm_ops from platform_bus\n", - __func__); - return -ENODEV; - } - - omap_pm = kmemdup(pm, sizeof(struct dev_pm_ops), GFP_KERNEL); - if (!omap_pm) { - pr_err("%s: unable to alloc memory for new dev_pm_ops\n", - __func__); - return -ENOMEM; - } - - omap_pm->runtime_suspend = omap1_pm_runtime_suspend; - omap_pm->runtime_resume = omap1_pm_runtime_resume; - - platform_bus_set_pm_ops(omap_pm); + pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier); return 0; } diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 512b15204450..66dfbccacd25 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -59,10 +59,10 @@ endif # Power Management ifeq ($(CONFIG_PM),y) obj-$(CONFIG_ARCH_OMAP2) += pm24xx.o -obj-$(CONFIG_ARCH_OMAP2) += sleep24xx.o pm_bus.o +obj-$(CONFIG_ARCH_OMAP2) += sleep24xx.o obj-$(CONFIG_ARCH_OMAP3) += pm34xx.o sleep34xx.o \ - cpuidle34xx.o pm_bus.o -obj-$(CONFIG_ARCH_OMAP4) += pm44xx.o pm_bus.o + cpuidle34xx.o +obj-$(CONFIG_ARCH_OMAP4) += pm44xx.o obj-$(CONFIG_PM_DEBUG) += pm-debug.o obj-$(CONFIG_OMAP_SMARTREFLEX) += sr_device.o smartreflex.o obj-$(CONFIG_OMAP_SMARTREFLEX_CLASS3) += smartreflex-class3.o diff --git a/arch/arm/mach-omap2/pm_bus.c b/arch/arm/mach-omap2/pm_bus.c deleted file mode 100644 index 5acd2ab298b1..000000000000 --- a/arch/arm/mach-omap2/pm_bus.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Runtime PM support code for OMAP - * - * Author: Kevin Hilman, Deep Root Systems, LLC - * - * Copyright (C) 2010 Texas Instruments, Inc. - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/io.h> -#include <linux/pm_runtime.h> -#include <linux/platform_device.h> -#include <linux/mutex.h> - -#include <plat/omap_device.h> -#include <plat/omap-pm.h> - -#ifdef CONFIG_PM_RUNTIME -static int omap_pm_runtime_suspend(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - int r, ret = 0; - - dev_dbg(dev, "%s\n", __func__); - - ret = pm_generic_runtime_suspend(dev); - - if (!ret && dev->parent == &omap_device_parent) { - r = omap_device_idle(pdev); - WARN_ON(r); - } - - return ret; -}; - -static int omap_pm_runtime_resume(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - int r; - - dev_dbg(dev, "%s\n", __func__); - - if (dev->parent == &omap_device_parent) { - r = omap_device_enable(pdev); - WARN_ON(r); - } - - return pm_generic_runtime_resume(dev); -}; -#else -#define omap_pm_runtime_suspend NULL -#define omap_pm_runtime_resume NULL -#endif /* CONFIG_PM_RUNTIME */ - -static int __init omap_pm_runtime_init(void) -{ - const struct dev_pm_ops *pm; - struct dev_pm_ops *omap_pm; - - pm = platform_bus_get_pm_ops(); - if (!pm) { - pr_err("%s: unable to get dev_pm_ops from platform_bus\n", - __func__); - return -ENODEV; - } - - omap_pm = kmemdup(pm, sizeof(struct dev_pm_ops), GFP_KERNEL); - if (!omap_pm) { - pr_err("%s: unable to alloc memory for new dev_pm_ops\n", - __func__); - return -ENOMEM; - } - - omap_pm->runtime_suspend = omap_pm_runtime_suspend; - omap_pm->runtime_resume = omap_pm_runtime_resume; - - platform_bus_set_pm_ops(omap_pm); - - return 0; -} -core_initcall(omap_pm_runtime_init); diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c index bfbecec6d05f..810a982a66f8 100644 --- a/arch/arm/mach-pxa/balloon3.c +++ b/arch/arm/mach-pxa/balloon3.c @@ -15,7 +15,6 @@ #include <linux/init.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/bitops.h> diff --git a/arch/arm/mach-pxa/clock-pxa2xx.c b/arch/arm/mach-pxa/clock-pxa2xx.c index 1ce090448493..1d5859d9a0e3 100644 --- a/arch/arm/mach-pxa/clock-pxa2xx.c +++ b/arch/arm/mach-pxa/clock-pxa2xx.c @@ -9,7 +9,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <mach/pxa2xx-regs.h> @@ -33,32 +33,22 @@ const struct clkops clk_pxa2xx_cken_ops = { #ifdef CONFIG_PM static uint32_t saved_cken; -static int pxa2xx_clock_suspend(struct sys_device *d, pm_message_t state) +static int pxa2xx_clock_suspend(void) { saved_cken = CKEN; return 0; } -static int pxa2xx_clock_resume(struct sys_device *d) +static void pxa2xx_clock_resume(void) { CKEN = saved_cken; - return 0; } #else #define pxa2xx_clock_suspend NULL #define pxa2xx_clock_resume NULL #endif -struct sysdev_class pxa2xx_clock_sysclass = { - .name = "pxa2xx-clock", +struct syscore_ops pxa2xx_clock_syscore_ops = { .suspend = pxa2xx_clock_suspend, .resume = pxa2xx_clock_resume, }; - -static int __init pxa2xx_clock_init(void) -{ - if (cpu_is_pxa2xx()) - return sysdev_class_register(&pxa2xx_clock_sysclass); - return 0; -} -postcore_initcall(pxa2xx_clock_init); diff --git a/arch/arm/mach-pxa/clock-pxa3xx.c b/arch/arm/mach-pxa/clock-pxa3xx.c index 3f864cd0bd28..2a37a9a8f621 100644 --- a/arch/arm/mach-pxa/clock-pxa3xx.c +++ b/arch/arm/mach-pxa/clock-pxa3xx.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/io.h> +#include <linux/syscore_ops.h> #include <mach/smemc.h> #include <mach/pxa3xx-regs.h> @@ -182,7 +183,7 @@ const struct clkops clk_pxa3xx_pout_ops = { static uint32_t cken[2]; static uint32_t accr; -static int pxa3xx_clock_suspend(struct sys_device *d, pm_message_t state) +static int pxa3xx_clock_suspend(void) { cken[0] = CKENA; cken[1] = CKENB; @@ -190,28 +191,18 @@ static int pxa3xx_clock_suspend(struct sys_device *d, pm_message_t state) return 0; } -static int pxa3xx_clock_resume(struct sys_device *d) +static void pxa3xx_clock_resume(void) { ACCR = accr; CKENA = cken[0]; CKENB = cken[1]; - return 0; } #else #define pxa3xx_clock_suspend NULL #define pxa3xx_clock_resume NULL #endif -struct sysdev_class pxa3xx_clock_sysclass = { - .name = "pxa3xx-clock", +struct syscore_ops pxa3xx_clock_syscore_ops = { .suspend = pxa3xx_clock_suspend, .resume = pxa3xx_clock_resume, }; - -static int __init pxa3xx_clock_init(void) -{ - if (cpu_is_pxa3xx() || cpu_is_pxa95x()) - return sysdev_class_register(&pxa3xx_clock_sysclass); - return 0; -} -postcore_initcall(pxa3xx_clock_init); diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h index f9f349a21b54..1f2fb9c43f06 100644 --- a/arch/arm/mach-pxa/clock.h +++ b/arch/arm/mach-pxa/clock.h @@ -1,5 +1,5 @@ #include <linux/clkdev.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> struct clkops { void (*enable)(struct clk *); @@ -54,7 +54,7 @@ extern const struct clkops clk_pxa2xx_cken_ops; void clk_pxa2xx_cken_enable(struct clk *clk); void clk_pxa2xx_cken_disable(struct clk *clk); -extern struct sysdev_class pxa2xx_clock_sysclass; +extern struct syscore_ops pxa2xx_clock_syscore_ops; #if defined(CONFIG_PXA3xx) || defined(CONFIG_PXA95x) #define DEFINE_PXA3_CKEN(_name, _cken, _rate, _delay) \ @@ -74,5 +74,6 @@ extern const struct clkops clk_pxa3xx_smemc_ops; extern void clk_pxa3xx_cken_enable(struct clk *); extern void clk_pxa3xx_cken_disable(struct clk *); -extern struct sysdev_class pxa3xx_clock_sysclass; +extern struct syscore_ops pxa3xx_clock_syscore_ops; + #endif diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c index b88d601a8090..13518a705399 100644 --- a/arch/arm/mach-pxa/cm-x270.c +++ b/arch/arm/mach-pxa/cm-x270.c @@ -10,7 +10,6 @@ */ #include <linux/platform_device.h> -#include <linux/sysdev.h> #include <linux/irq.h> #include <linux/gpio.h> #include <linux/delay.h> diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c index 8225e2e58c6e..a10996782476 100644 --- a/arch/arm/mach-pxa/cm-x2xx.c +++ b/arch/arm/mach-pxa/cm-x2xx.c @@ -10,7 +10,7 @@ */ #include <linux/platform_device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/irq.h> #include <linux/gpio.h> @@ -388,7 +388,7 @@ static inline void cmx2xx_init_display(void) {} #ifdef CONFIG_PM static unsigned long sleep_save_msc[10]; -static int cmx2xx_suspend(struct sys_device *dev, pm_message_t state) +static int cmx2xx_suspend(void) { cmx2xx_pci_suspend(); @@ -412,7 +412,7 @@ static int cmx2xx_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int cmx2xx_resume(struct sys_device *dev) +static void cmx2xx_resume(void) { cmx2xx_pci_resume(); @@ -420,27 +420,18 @@ static int cmx2xx_resume(struct sys_device *dev) __raw_writel(sleep_save_msc[0], MSC0); __raw_writel(sleep_save_msc[1], MSC1); __raw_writel(sleep_save_msc[2], MSC2); - - return 0; } -static struct sysdev_class cmx2xx_pm_sysclass = { - .name = "pm", +static struct syscore_ops cmx2xx_pm_syscore_ops = { .resume = cmx2xx_resume, .suspend = cmx2xx_suspend, }; -static struct sys_device cmx2xx_pm_device = { - .cls = &cmx2xx_pm_sysclass, -}; - static int __init cmx2xx_pm_init(void) { - int error; - error = sysdev_class_register(&cmx2xx_pm_sysclass); - if (error == 0) - error = sysdev_register(&cmx2xx_pm_device); - return error; + register_syscore_ops(&cmx2xx_pm_syscore_ops); + + return 0; } #else static int __init cmx2xx_pm_init(void) { return 0; } diff --git a/arch/arm/mach-pxa/colibri-evalboard.c b/arch/arm/mach-pxa/colibri-evalboard.c index 81c3c433e2d6..d28e802e2448 100644 --- a/arch/arm/mach-pxa/colibri-evalboard.c +++ b/arch/arm/mach-pxa/colibri-evalboard.c @@ -13,7 +13,6 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> #include <linux/interrupt.h> #include <linux/gpio.h> #include <asm/mach-types.h> diff --git a/arch/arm/mach-pxa/colibri-pxa270-income.c b/arch/arm/mach-pxa/colibri-pxa270-income.c index 44c1b77ece67..80538b8806ed 100644 --- a/arch/arm/mach-pxa/colibri-pxa270-income.c +++ b/arch/arm/mach-pxa/colibri-pxa270-income.c @@ -22,7 +22,6 @@ #include <linux/platform_device.h> #include <linux/pwm_backlight.h> #include <linux/i2c/pxa-i2c.h> -#include <linux/sysdev.h> #include <asm/irq.h> #include <asm/mach-types.h> diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c index 6fc5d328ba7f..7545a48ed88b 100644 --- a/arch/arm/mach-pxa/colibri-pxa270.c +++ b/arch/arm/mach-pxa/colibri-pxa270.c @@ -17,7 +17,6 @@ #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> #include <linux/ucb1400.h> #include <asm/mach/arch.h> diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h index a079d8baa45a..e6c9344a95ae 100644 --- a/arch/arm/mach-pxa/generic.h +++ b/arch/arm/mach-pxa/generic.h @@ -61,10 +61,10 @@ extern unsigned pxa3xx_get_clk_frequency_khz(int); #define pxa3xx_get_clk_frequency_khz(x) (0) #endif -extern struct sysdev_class pxa_irq_sysclass; -extern struct sysdev_class pxa_gpio_sysclass; -extern struct sysdev_class pxa2xx_mfp_sysclass; -extern struct sysdev_class pxa3xx_mfp_sysclass; +extern struct syscore_ops pxa_irq_syscore_ops; +extern struct syscore_ops pxa_gpio_syscore_ops; +extern struct syscore_ops pxa2xx_mfp_syscore_ops; +extern struct syscore_ops pxa3xx_mfp_syscore_ops; void __init pxa_set_ffuart_info(void *info); void __init pxa_set_btuart_info(void *info); diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 6251e3f5c62c..32ed551bf9c5 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -15,7 +15,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/io.h> #include <linux/irq.h> @@ -183,7 +183,7 @@ void __init pxa_init_irq(int irq_nr, set_wake_t fn) static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32]; static unsigned long saved_ipr[MAX_INTERNAL_IRQS]; -static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state) +static int pxa_irq_suspend(void) { int i; @@ -202,7 +202,7 @@ static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int pxa_irq_resume(struct sys_device *dev) +static void pxa_irq_resume(void) { int i; @@ -218,22 +218,13 @@ static int pxa_irq_resume(struct sys_device *dev) __raw_writel(saved_ipr[i], IRQ_BASE + IPR(i)); __raw_writel(1, IRQ_BASE + ICCR); - return 0; } #else #define pxa_irq_suspend NULL #define pxa_irq_resume NULL #endif -struct sysdev_class pxa_irq_sysclass = { - .name = "irq", +struct syscore_ops pxa_irq_syscore_ops = { .suspend = pxa_irq_suspend, .resume = pxa_irq_resume, }; - -static int __init pxa_irq_init(void) -{ - return sysdev_class_register(&pxa_irq_sysclass); -} - -core_initcall(pxa_irq_init); diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index f5de541725b1..6cf8180bf5bd 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -15,7 +15,7 @@ #include <linux/init.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/bitops.h> @@ -159,30 +159,22 @@ static void __init lpd270_init_irq(void) #ifdef CONFIG_PM -static int lpd270_irq_resume(struct sys_device *dev) +static void lpd270_irq_resume(void) { __raw_writew(lpd270_irq_enabled, LPD270_INT_MASK); - return 0; } -static struct sysdev_class lpd270_irq_sysclass = { - .name = "cpld_irq", +static struct syscore_ops lpd270_irq_syscore_ops = { .resume = lpd270_irq_resume, }; -static struct sys_device lpd270_irq_device = { - .cls = &lpd270_irq_sysclass, -}; - static int __init lpd270_irq_device_init(void) { - int ret = -ENODEV; if (machine_is_logicpd_pxa270()) { - ret = sysdev_class_register(&lpd270_irq_sysclass); - if (ret == 0) - ret = sysdev_register(&lpd270_irq_device); + register_syscore_ops(&lpd270_irq_syscore_ops); + return 0; } - return ret; + return -ENODEV; } device_initcall(lpd270_irq_device_init); diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index 3ede978c83d9..e10ddb827147 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -15,7 +15,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/major.h> #include <linux/fb.h> #include <linux/interrupt.h> @@ -176,31 +176,22 @@ static void __init lubbock_init_irq(void) #ifdef CONFIG_PM -static int lubbock_irq_resume(struct sys_device *dev) +static void lubbock_irq_resume(void) { LUB_IRQ_MASK_EN = lubbock_irq_enabled; - return 0; } -static struct sysdev_class lubbock_irq_sysclass = { - .name = "cpld_irq", +static struct syscore_ops lubbock_irq_syscore_ops = { .resume = lubbock_irq_resume, }; -static struct sys_device lubbock_irq_device = { - .cls = &lubbock_irq_sysclass, -}; - static int __init lubbock_irq_device_init(void) { - int ret = -ENODEV; - if (machine_is_lubbock()) { - ret = sysdev_class_register(&lubbock_irq_sysclass); - if (ret == 0) - ret = sysdev_register(&lubbock_irq_device); + register_syscore_ops(&lubbock_irq_syscore_ops); + return 0; } - return ret; + return -ENODEV; } device_initcall(lubbock_irq_device_init); diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index 95163baca29e..3479e2b3b511 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -15,7 +15,7 @@ #include <linux/init.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/bitops.h> @@ -185,31 +185,21 @@ static void __init mainstone_init_irq(void) #ifdef CONFIG_PM -static int mainstone_irq_resume(struct sys_device *dev) +static void mainstone_irq_resume(void) { MST_INTMSKENA = mainstone_irq_enabled; - return 0; } -static struct sysdev_class mainstone_irq_sysclass = { - .name = "cpld_irq", +static struct syscore_ops mainstone_irq_syscore_ops = { .resume = mainstone_irq_resume, }; -static struct sys_device mainstone_irq_device = { - .cls = &mainstone_irq_sysclass, -}; - static int __init mainstone_irq_device_init(void) { - int ret = -ENODEV; + if (machine_is_mainstone()) + register_syscore_ops(&mainstone_irq_syscore_ops); - if (machine_is_mainstone()) { - ret = sysdev_class_register(&mainstone_irq_sysclass); - if (ret == 0) - ret = sysdev_register(&mainstone_irq_device); - } - return ret; + return 0; } device_initcall(mainstone_irq_device_init); diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c index 1d1419b73457..87ae3129f4f7 100644 --- a/arch/arm/mach-pxa/mfp-pxa2xx.c +++ b/arch/arm/mach-pxa/mfp-pxa2xx.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <mach/gpio.h> #include <mach/pxa2xx-regs.h> @@ -338,7 +338,7 @@ static unsigned long saved_gafr[2][4]; static unsigned long saved_gpdr[4]; static unsigned long saved_pgsr[4]; -static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state) +static int pxa2xx_mfp_suspend(void) { int i; @@ -365,7 +365,7 @@ static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state) return 0; } -static int pxa2xx_mfp_resume(struct sys_device *d) +static void pxa2xx_mfp_resume(void) { int i; @@ -376,15 +376,13 @@ static int pxa2xx_mfp_resume(struct sys_device *d) PGSR(i) = saved_pgsr[i]; } PSSR = PSSR_RDH | PSSR_PH; - return 0; } #else #define pxa2xx_mfp_suspend NULL #define pxa2xx_mfp_resume NULL #endif -struct sysdev_class pxa2xx_mfp_sysclass = { - .name = "mfp", +struct syscore_ops pxa2xx_mfp_syscore_ops = { .suspend = pxa2xx_mfp_suspend, .resume = pxa2xx_mfp_resume, }; @@ -409,6 +407,6 @@ static int __init pxa2xx_mfp_init(void) for (i = 0; i <= gpio_to_bank(pxa_last_gpio); i++) gpdr_lpm[i] = GPDR(i * 32); - return sysdev_class_register(&pxa2xx_mfp_sysclass); + return 0; } postcore_initcall(pxa2xx_mfp_init); diff --git a/arch/arm/mach-pxa/mfp-pxa3xx.c b/arch/arm/mach-pxa/mfp-pxa3xx.c index 7a270eecd480..89863a01ecd7 100644 --- a/arch/arm/mach-pxa/mfp-pxa3xx.c +++ b/arch/arm/mach-pxa/mfp-pxa3xx.c @@ -17,7 +17,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/io.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <mach/hardware.h> #include <mach/mfp-pxa3xx.h> @@ -31,13 +31,13 @@ * a pull-down mode if they're an active low chip select, and we're * just entering standby. */ -static int pxa3xx_mfp_suspend(struct sys_device *d, pm_message_t state) +static int pxa3xx_mfp_suspend(void) { mfp_config_lpm(); return 0; } -static int pxa3xx_mfp_resume(struct sys_device *d) +static void pxa3xx_mfp_resume(void) { mfp_config_run(); @@ -47,24 +47,13 @@ static int pxa3xx_mfp_resume(struct sys_device *d) * preserve them here in case they will be referenced later */ ASCR &= ~(ASCR_RDH | ASCR_D1S | ASCR_D2S | ASCR_D3S); - return 0; } #else #define pxa3xx_mfp_suspend NULL #define pxa3xx_mfp_resume NULL #endif -struct sysdev_class pxa3xx_mfp_sysclass = { - .name = "mfp", +struct syscore_ops pxa3xx_mfp_syscore_ops = { .suspend = pxa3xx_mfp_suspend, - .resume = pxa3xx_mfp_resume, + .resume = pxa3xx_mfp_resume, }; - -static int __init mfp_init_devicefs(void) -{ - if (cpu_is_pxa3xx()) - return sysdev_class_register(&pxa3xx_mfp_sysclass); - - return 0; -} -postcore_initcall(mfp_init_devicefs); diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c index 23925db8ff74..e3470137c934 100644 --- a/arch/arm/mach-pxa/mioa701.c +++ b/arch/arm/mach-pxa/mioa701.c @@ -22,7 +22,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/input.h> #include <linux/delay.h> #include <linux/gpio_keys.h> @@ -488,7 +488,7 @@ static void install_bootstrap(void) } -static int mioa701_sys_suspend(struct sys_device *sysdev, pm_message_t state) +static int mioa701_sys_suspend(void) { int i = 0, is_bt_on; u32 *mem_resume_vector = phys_to_virt(RESUME_VECTOR_ADDR); @@ -514,7 +514,7 @@ static int mioa701_sys_suspend(struct sys_device *sysdev, pm_message_t state) return 0; } -static int mioa701_sys_resume(struct sys_device *sysdev) +static void mioa701_sys_resume(void) { int i = 0; u32 *mem_resume_vector = phys_to_virt(RESUME_VECTOR_ADDR); @@ -527,43 +527,18 @@ static int mioa701_sys_resume(struct sys_device *sysdev) *mem_resume_enabler = save_buffer[i++]; *mem_resume_bt = save_buffer[i++]; *mem_resume_unknown = save_buffer[i++]; - - return 0; } -static struct sysdev_class mioa701_sysclass = { - .name = "mioa701", -}; - -static struct sys_device sysdev_bootstrap = { - .cls = &mioa701_sysclass, -}; - -static struct sysdev_driver driver_bootstrap = { - .suspend = &mioa701_sys_suspend, - .resume = &mioa701_sys_resume, +static struct syscore_ops mioa701_syscore_ops = { + .suspend = mioa701_sys_suspend, + .resume = mioa701_sys_resume, }; static int __init bootstrap_init(void) { - int rc; int save_size = mioa701_bootstrap_lg + (sizeof(u32) * 3); - rc = sysdev_class_register(&mioa701_sysclass); - if (rc) { - printk(KERN_ERR "Failed registering mioa701 sys class\n"); - return -ENODEV; - } - rc = sysdev_register(&sysdev_bootstrap); - if (rc) { - printk(KERN_ERR "Failed registering mioa701 sys device\n"); - return -ENODEV; - } - rc = sysdev_driver_register(&mioa701_sysclass, &driver_bootstrap); - if (rc) { - printk(KERN_ERR "Failed registering PMU sys driver\n"); - return -ENODEV; - } + register_syscore_ops(&mioa701_syscore_ops); save_buffer = kmalloc(save_size, GFP_KERNEL); if (!save_buffer) @@ -576,9 +551,7 @@ static int __init bootstrap_init(void) static void bootstrap_exit(void) { kfree(save_buffer); - sysdev_driver_unregister(&mioa701_sysclass, &driver_bootstrap); - sysdev_unregister(&sysdev_bootstrap); - sysdev_class_unregister(&mioa701_sysclass); + unregister_syscore_ops(&mioa701_syscore_ops); printk(KERN_CRIT "Unregistering mioa701 suspend will hang next" "resume !!!\n"); diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c index a6f898cbfac9..4061ecddee70 100644 --- a/arch/arm/mach-pxa/palmld.c +++ b/arch/arm/mach-pxa/palmld.c @@ -24,7 +24,6 @@ #include <linux/gpio.h> #include <linux/wm97xx.h> #include <linux/power_supply.h> -#include <linux/sysdev.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c index 8aadad55fbe4..20d1b18b1733 100644 --- a/arch/arm/mach-pxa/palmtreo.c +++ b/arch/arm/mach-pxa/palmtreo.c @@ -25,7 +25,6 @@ #include <linux/pwm_backlight.h> #include <linux/gpio.h> #include <linux/power_supply.h> -#include <linux/sysdev.h> #include <linux/w1-gpio.h> #include <asm/mach-types.h> diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c index 3b8a4f37dbbe..65f24f0b77e8 100644 --- a/arch/arm/mach-pxa/palmz72.c +++ b/arch/arm/mach-pxa/palmz72.c @@ -19,7 +19,7 @@ */ #include <linux/platform_device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/delay.h> #include <linux/irq.h> #include <linux/gpio_keys.h> @@ -233,9 +233,9 @@ static struct palmz72_resume_info palmz72_resume_info = { static unsigned long store_ptr; -/* sys_device for Palm Zire 72 PM */ +/* syscore_ops for Palm Zire 72 PM */ -static int palmz72_pm_suspend(struct sys_device *dev, pm_message_t msg) +static int palmz72_pm_suspend(void) { /* setup the resume_info struct for the original bootloader */ palmz72_resume_info.resume_addr = (u32) cpu_resume; @@ -249,31 +249,23 @@ static int palmz72_pm_suspend(struct sys_device *dev, pm_message_t msg) return 0; } -static int palmz72_pm_resume(struct sys_device *dev) +static void palmz72_pm_resume(void) { *PALMZ72_SAVE_DWORD = store_ptr; - return 0; } -static struct sysdev_class palmz72_pm_sysclass = { - .name = "palmz72_pm", +static struct syscore_ops palmz72_pm_syscore_ops = { .suspend = palmz72_pm_suspend, .resume = palmz72_pm_resume, }; -static struct sys_device palmz72_pm_device = { - .cls = &palmz72_pm_sysclass, -}; - static int __init palmz72_pm_init(void) { - int ret = -ENODEV; if (machine_is_palmz72()) { - ret = sysdev_class_register(&palmz72_pm_sysclass); - if (ret == 0) - ret = sysdev_register(&palmz72_pm_device); + register_syscore_ops(&palmz72_pm_syscore_ops); + return 0; } - return ret; + return -ENODEV; } device_initcall(palmz72_pm_init); diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index a4af8c52d7ee..fed363cec9c6 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -21,7 +21,7 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/suspend.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/irq.h> #include <asm/mach/map.h> @@ -350,21 +350,9 @@ static struct platform_device *pxa25x_devices[] __initdata = { &pxa_device_asoc_platform, }; -static struct sys_device pxa25x_sysdev[] = { - { - .cls = &pxa_irq_sysclass, - }, { - .cls = &pxa2xx_mfp_sysclass, - }, { - .cls = &pxa_gpio_sysclass, - }, { - .cls = &pxa2xx_clock_sysclass, - } -}; - static int __init pxa25x_init(void) { - int i, ret = 0; + int ret = 0; if (cpu_is_pxa25x()) { @@ -377,11 +365,10 @@ static int __init pxa25x_init(void) pxa25x_init_pm(); - for (i = 0; i < ARRAY_SIZE(pxa25x_sysdev); i++) { - ret = sysdev_register(&pxa25x_sysdev[i]); - if (ret) - pr_err("failed to register sysdev[%d]\n", i); - } + register_syscore_ops(&pxa_irq_syscore_ops); + register_syscore_ops(&pxa2xx_mfp_syscore_ops); + register_syscore_ops(&pxa_gpio_syscore_ops); + register_syscore_ops(&pxa2xx_clock_syscore_ops); ret = platform_add_devices(pxa25x_devices, ARRAY_SIZE(pxa25x_devices)); diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 909756eaf4b7..2fecbec58d88 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -16,7 +16,7 @@ #include <linux/init.h> #include <linux/suspend.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/io.h> #include <linux/irq.h> #include <linux/i2c/pxa-i2c.h> @@ -428,21 +428,9 @@ static struct platform_device *devices[] __initdata = { &pxa27x_device_pwm1, }; -static struct sys_device pxa27x_sysdev[] = { - { - .cls = &pxa_irq_sysclass, - }, { - .cls = &pxa2xx_mfp_sysclass, - }, { - .cls = &pxa_gpio_sysclass, - }, { - .cls = &pxa2xx_clock_sysclass, - } -}; - static int __init pxa27x_init(void) { - int i, ret = 0; + int ret = 0; if (cpu_is_pxa27x()) { @@ -455,11 +443,10 @@ static int __init pxa27x_init(void) pxa27x_init_pm(); - for (i = 0; i < ARRAY_SIZE(pxa27x_sysdev); i++) { - ret = sysdev_register(&pxa27x_sysdev[i]); - if (ret) - pr_err("failed to register sysdev[%d]\n", i); - } + register_syscore_ops(&pxa_irq_syscore_ops); + register_syscore_ops(&pxa2xx_mfp_syscore_ops); + register_syscore_ops(&pxa_gpio_syscore_ops); + register_syscore_ops(&pxa2xx_clock_syscore_ops); ret = platform_add_devices(devices, ARRAY_SIZE(devices)); } diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 8dd107391157..8521d7d6f1da 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -20,7 +20,7 @@ #include <linux/platform_device.h> #include <linux/irq.h> #include <linux/io.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/i2c/pxa-i2c.h> #include <asm/mach/map.h> @@ -427,21 +427,9 @@ static struct platform_device *devices[] __initdata = { &pxa27x_device_pwm1, }; -static struct sys_device pxa3xx_sysdev[] = { - { - .cls = &pxa_irq_sysclass, - }, { - .cls = &pxa3xx_mfp_sysclass, - }, { - .cls = &pxa_gpio_sysclass, - }, { - .cls = &pxa3xx_clock_sysclass, - } -}; - static int __init pxa3xx_init(void) { - int i, ret = 0; + int ret = 0; if (cpu_is_pxa3xx()) { @@ -462,11 +450,10 @@ static int __init pxa3xx_init(void) pxa3xx_init_pm(); - for (i = 0; i < ARRAY_SIZE(pxa3xx_sysdev); i++) { - ret = sysdev_register(&pxa3xx_sysdev[i]); - if (ret) - pr_err("failed to register sysdev[%d]\n", i); - } + register_syscore_ops(&pxa_irq_syscore_ops); + register_syscore_ops(&pxa3xx_mfp_syscore_ops); + register_syscore_ops(&pxa_gpio_syscore_ops); + register_syscore_ops(&pxa3xx_clock_syscore_ops); ret = platform_add_devices(devices, ARRAY_SIZE(devices)); } diff --git a/arch/arm/mach-pxa/pxa95x.c b/arch/arm/mach-pxa/pxa95x.c index 23b229bd06e9..ecc82a330fad 100644 --- a/arch/arm/mach-pxa/pxa95x.c +++ b/arch/arm/mach-pxa/pxa95x.c @@ -18,7 +18,7 @@ #include <linux/i2c/pxa-i2c.h> #include <linux/irq.h> #include <linux/io.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <mach/hardware.h> #include <mach/gpio.h> @@ -260,16 +260,6 @@ static struct platform_device *devices[] __initdata = { &pxa27x_device_pwm1, }; -static struct sys_device pxa95x_sysdev[] = { - { - .cls = &pxa_irq_sysclass, - }, { - .cls = &pxa_gpio_sysclass, - }, { - .cls = &pxa3xx_clock_sysclass, - } -}; - static int __init pxa95x_init(void) { int ret = 0, i; @@ -293,11 +283,9 @@ static int __init pxa95x_init(void) if ((ret = pxa_init_dma(IRQ_DMA, 32))) return ret; - for (i = 0; i < ARRAY_SIZE(pxa95x_sysdev); i++) { - ret = sysdev_register(&pxa95x_sysdev[i]); - if (ret) - pr_err("failed to register sysdev[%d]\n", i); - } + register_syscore_ops(&pxa_irq_syscore_ops); + register_syscore_ops(&pxa_gpio_syscore_ops); + register_syscore_ops(&pxa3xx_clock_syscore_ops); ret = platform_add_devices(devices, ARRAY_SIZE(devices)); } diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c index cd1861351f75..d130f77b6d11 100644 --- a/arch/arm/mach-pxa/raumfeld.c +++ b/arch/arm/mach-pxa/raumfeld.c @@ -18,7 +18,6 @@ #include <linux/init.h> #include <linux/kernel.h> -#include <linux/sysdev.h> #include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/gpio.h> diff --git a/arch/arm/mach-pxa/smemc.c b/arch/arm/mach-pxa/smemc.c index 232b7316ec08..79923058d10f 100644 --- a/arch/arm/mach-pxa/smemc.c +++ b/arch/arm/mach-pxa/smemc.c @@ -6,7 +6,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/io.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <mach/hardware.h> #include <mach/smemc.h> @@ -16,7 +16,7 @@ static unsigned long msc[2]; static unsigned long sxcnfg, memclkcfg; static unsigned long csadrcfg[4]; -static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state) +static int pxa3xx_smemc_suspend(void) { msc[0] = __raw_readl(MSC0); msc[1] = __raw_readl(MSC1); @@ -30,7 +30,7 @@ static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int pxa3xx_smemc_resume(struct sys_device *dev) +static void pxa3xx_smemc_resume(void) { __raw_writel(msc[0], MSC0); __raw_writel(msc[1], MSC1); @@ -40,34 +40,19 @@ static int pxa3xx_smemc_resume(struct sys_device *dev) __raw_writel(csadrcfg[1], CSADRCFG1); __raw_writel(csadrcfg[2], CSADRCFG2); __raw_writel(csadrcfg[3], CSADRCFG3); - - return 0; } -static struct sysdev_class smemc_sysclass = { - .name = "smemc", +static struct syscore_ops smemc_syscore_ops = { .suspend = pxa3xx_smemc_suspend, .resume = pxa3xx_smemc_resume, }; -static struct sys_device smemc_sysdev = { - .id = 0, - .cls = &smemc_sysclass, -}; - static int __init smemc_init(void) { - int ret = 0; + if (cpu_is_pxa3xx()) + register_syscore_ops(&smemc_syscore_ops); - if (cpu_is_pxa3xx()) { - ret = sysdev_class_register(&smemc_sysclass); - if (ret) - return ret; - - ret = sysdev_register(&smemc_sysdev); - } - - return ret; + return 0; } subsys_initcall(smemc_init); #endif diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index b9cfbebdfe9c..687417a93698 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -15,7 +15,6 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/bitops.h> diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c index b523f119e0f0..903218eab56d 100644 --- a/arch/arm/mach-pxa/viper.c +++ b/arch/arm/mach-pxa/viper.c @@ -44,6 +44,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> +#include <linux/syscore_ops.h> #include <mach/pxa25x.h> #include <mach/audio.h> @@ -130,20 +131,19 @@ static u8 viper_hw_version(void) return v1; } -/* CPU sysdev */ -static int viper_cpu_suspend(struct sys_device *sysdev, pm_message_t state) +/* CPU system core operations. */ +static int viper_cpu_suspend(void) { viper_icr_set_bit(VIPER_ICR_R_DIS); return 0; } -static int viper_cpu_resume(struct sys_device *sysdev) +static void viper_cpu_resume(void) { viper_icr_clear_bit(VIPER_ICR_R_DIS); - return 0; } -static struct sysdev_driver viper_cpu_sysdev_driver = { +static struct syscore_ops viper_cpu_syscore_ops = { .suspend = viper_cpu_suspend, .resume = viper_cpu_resume, }; @@ -945,7 +945,7 @@ static void __init viper_init(void) viper_init_vcore_gpios(); viper_init_cpufreq(); - sysdev_driver_register(&cpu_sysdev_class, &viper_cpu_sysdev_driver); + register_syscore_ops(&viper_cpu_syscore_ops); if (version) { pr_info("viper: hardware v%di%d detected. " diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c index f71d377c8640..67bd41488bf8 100644 --- a/arch/arm/mach-pxa/vpac270.c +++ b/arch/arm/mach-pxa/vpac270.c @@ -16,7 +16,6 @@ #include <linux/gpio_keys.h> #include <linux/input.h> #include <linux/gpio.h> -#include <linux/sysdev.h> #include <linux/usb/gpio_vbus.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> diff --git a/arch/arm/mach-s3c2410/irq.c b/arch/arm/mach-s3c2410/irq.c index 5e2f35332056..2854129f8cc7 100644 --- a/arch/arm/mach-s3c2410/irq.c +++ b/arch/arm/mach-s3c2410/irq.c @@ -23,38 +23,12 @@ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/ioport.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <plat/cpu.h> #include <plat/pm.h> -static int s3c2410_irq_add(struct sys_device *sysdev) -{ - return 0; -} - -static struct sysdev_driver s3c2410_irq_driver = { - .add = s3c2410_irq_add, +struct syscore_ops s3c24xx_irq_syscore_ops = { .suspend = s3c24xx_irq_suspend, .resume = s3c24xx_irq_resume, }; - -static int __init s3c2410_irq_init(void) -{ - return sysdev_driver_register(&s3c2410_sysclass, &s3c2410_irq_driver); -} - -arch_initcall(s3c2410_irq_init); - -static struct sysdev_driver s3c2410a_irq_driver = { - .add = s3c2410_irq_add, - .suspend = s3c24xx_irq_suspend, - .resume = s3c24xx_irq_resume, -}; - -static int __init s3c2410a_irq_init(void) -{ - return sysdev_driver_register(&s3c2410a_sysclass, &s3c2410a_irq_driver); -} - -arch_initcall(s3c2410a_irq_init); diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index 2970ea9f7c2b..1e2d536adda9 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -17,7 +17,7 @@ #include <linux/timer.h> #include <linux/init.h> #include <linux/gpio.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/dm9000.h> @@ -214,17 +214,16 @@ static struct s3c2410_uartcfg bast_uartcfgs[] __initdata = { /* NAND Flash on BAST board */ #ifdef CONFIG_PM -static int bast_pm_suspend(struct sys_device *sd, pm_message_t state) +static int bast_pm_suspend(void) { /* ensure that an nRESET is not generated on resume. */ gpio_direction_output(S3C2410_GPA(21), 1); return 0; } -static int bast_pm_resume(struct sys_device *sd) +static void bast_pm_resume(void) { s3c_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT); - return 0; } #else @@ -232,16 +231,11 @@ static int bast_pm_resume(struct sys_device *sd) #define bast_pm_resume NULL #endif -static struct sysdev_class bast_pm_sysclass = { - .name = "mach-bast", +static struct syscore_ops bast_pm_syscore_ops = { .suspend = bast_pm_suspend, .resume = bast_pm_resume, }; -static struct sys_device bast_pm_sysdev = { - .cls = &bast_pm_sysclass, -}; - static int smartmedia_map[] = { 0 }; static int chip0_map[] = { 1 }; static int chip1_map[] = { 2 }; @@ -642,8 +636,7 @@ static void __init bast_map_io(void) static void __init bast_init(void) { - sysdev_class_register(&bast_pm_sysclass); - sysdev_register(&bast_pm_sysdev); + register_syscore_ops(&bast_pm_syscore_ops); s3c_i2c0_set_platdata(&bast_i2c_info); s3c_nand_set_platdata(&bast_nand_info); diff --git a/arch/arm/mach-s3c2410/pm.c b/arch/arm/mach-s3c2410/pm.c index 725636fc4dc3..4728f9aa7df1 100644 --- a/arch/arm/mach-s3c2410/pm.c +++ b/arch/arm/mach-s3c2410/pm.c @@ -25,6 +25,7 @@ #include <linux/errno.h> #include <linux/time.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/gpio.h> #include <linux/io.h> @@ -92,7 +93,7 @@ static void s3c2410_pm_prepare(void) } } -static int s3c2410_pm_resume(struct sys_device *dev) +static void s3c2410_pm_resume(void) { unsigned long tmp; @@ -104,10 +105,12 @@ static int s3c2410_pm_resume(struct sys_device *dev) if ( machine_is_aml_m5900() ) s3c2410_gpio_setpin(S3C2410_GPF(2), 0); - - return 0; } +struct syscore_ops s3c2410_pm_syscore_ops = { + .resume = s3c2410_pm_resume, +}; + static int s3c2410_pm_add(struct sys_device *dev) { pm_cpu_prep = s3c2410_pm_prepare; @@ -119,7 +122,6 @@ static int s3c2410_pm_add(struct sys_device *dev) #if defined(CONFIG_CPU_S3C2410) static struct sysdev_driver s3c2410_pm_driver = { .add = s3c2410_pm_add, - .resume = s3c2410_pm_resume, }; /* register ourselves */ @@ -133,7 +135,6 @@ arch_initcall(s3c2410_pm_drvinit); static struct sysdev_driver s3c2410a_pm_driver = { .add = s3c2410_pm_add, - .resume = s3c2410_pm_resume, }; static int __init s3c2410a_pm_drvinit(void) @@ -147,7 +148,6 @@ arch_initcall(s3c2410a_pm_drvinit); #if defined(CONFIG_CPU_S3C2440) static struct sysdev_driver s3c2440_pm_driver = { .add = s3c2410_pm_add, - .resume = s3c2410_pm_resume, }; static int __init s3c2440_pm_drvinit(void) @@ -161,7 +161,6 @@ arch_initcall(s3c2440_pm_drvinit); #if defined(CONFIG_CPU_S3C2442) static struct sysdev_driver s3c2442_pm_driver = { .add = s3c2410_pm_add, - .resume = s3c2410_pm_resume, }; static int __init s3c2442_pm_drvinit(void) diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c index adc90a3c5890..f1d3bd8f6f17 100644 --- a/arch/arm/mach-s3c2410/s3c2410.c +++ b/arch/arm/mach-s3c2410/s3c2410.c @@ -19,6 +19,7 @@ #include <linux/gpio.h> #include <linux/clk.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/io.h> @@ -40,6 +41,7 @@ #include <plat/devs.h> #include <plat/clock.h> #include <plat/pll.h> +#include <plat/pm.h> #include <plat/gpio-core.h> #include <plat/gpio-cfg.h> @@ -168,6 +170,9 @@ int __init s3c2410_init(void) { printk("S3C2410: Initialising architecture\n"); + register_syscore_ops(&s3c2410_pm_syscore_ops); + register_syscore_ops(&s3c24xx_irq_syscore_ops); + return sysdev_register(&s3c2410_sysdev); } diff --git a/arch/arm/mach-s3c2412/irq.c b/arch/arm/mach-s3c2412/irq.c index f3355d2ec634..1a1aa220972b 100644 --- a/arch/arm/mach-s3c2412/irq.c +++ b/arch/arm/mach-s3c2412/irq.c @@ -202,8 +202,6 @@ static int s3c2412_irq_add(struct sys_device *sysdev) static struct sysdev_driver s3c2412_irq_driver = { .add = s3c2412_irq_add, - .suspend = s3c24xx_irq_suspend, - .resume = s3c24xx_irq_resume, }; static int s3c2412_irq_init(void) diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c index 923e01bdf017..85dcaeb9e62f 100644 --- a/arch/arm/mach-s3c2412/mach-jive.c +++ b/arch/arm/mach-s3c2412/mach-jive.c @@ -17,7 +17,7 @@ #include <linux/timer.h> #include <linux/init.h> #include <linux/gpio.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/i2c.h> @@ -486,7 +486,7 @@ static struct s3c2410_udc_mach_info jive_udc_cfg __initdata = { /* Jive power management device */ #ifdef CONFIG_PM -static int jive_pm_suspend(struct sys_device *sd, pm_message_t state) +static int jive_pm_suspend(void) { /* Write the magic value u-boot uses to check for resume into * the INFORM0 register, and ensure INFORM1 is set to the @@ -498,10 +498,9 @@ static int jive_pm_suspend(struct sys_device *sd, pm_message_t state) return 0; } -static int jive_pm_resume(struct sys_device *sd) +static void jive_pm_resume(void) { __raw_writel(0x0, S3C2412_INFORM0); - return 0; } #else @@ -509,16 +508,11 @@ static int jive_pm_resume(struct sys_device *sd) #define jive_pm_resume NULL #endif -static struct sysdev_class jive_pm_sysclass = { - .name = "jive-pm", +static struct syscore_ops jive_pm_syscore_ops = { .suspend = jive_pm_suspend, .resume = jive_pm_resume, }; -static struct sys_device jive_pm_sysdev = { - .cls = &jive_pm_sysclass, -}; - static void __init jive_map_io(void) { s3c24xx_init_io(jive_iodesc, ARRAY_SIZE(jive_iodesc)); @@ -536,10 +530,9 @@ static void jive_power_off(void) static void __init jive_machine_init(void) { - /* register system devices for managing low level suspend */ + /* register system core operations for managing low level suspend */ - sysdev_class_register(&jive_pm_sysclass); - sysdev_register(&jive_pm_sysdev); + register_syscore_ops(&jive_pm_syscore_ops); /* write our sleep configurations for the IO. Pull down all unused * IO, ensure that we have turned off all peripherals we do not diff --git a/arch/arm/mach-s3c2412/pm.c b/arch/arm/mach-s3c2412/pm.c index a7417c479ffe..752b13a7b3db 100644 --- a/arch/arm/mach-s3c2412/pm.c +++ b/arch/arm/mach-s3c2412/pm.c @@ -17,6 +17,7 @@ #include <linux/timer.h> #include <linux/init.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/platform_device.h> #include <linux/io.h> @@ -86,13 +87,24 @@ static struct sleep_save s3c2412_sleep[] = { SAVE_ITEM(S3C2413_GPJSLPCON), }; -static int s3c2412_pm_suspend(struct sys_device *dev, pm_message_t state) +static struct sysdev_driver s3c2412_pm_driver = { + .add = s3c2412_pm_add, +}; + +static __init int s3c2412_pm_init(void) +{ + return sysdev_driver_register(&s3c2412_sysclass, &s3c2412_pm_driver); +} + +arch_initcall(s3c2412_pm_init); + +static int s3c2412_pm_suspend(void) { s3c_pm_do_save(s3c2412_sleep, ARRAY_SIZE(s3c2412_sleep)); return 0; } -static int s3c2412_pm_resume(struct sys_device *dev) +static void s3c2412_pm_resume(void) { unsigned long tmp; @@ -102,18 +114,9 @@ static int s3c2412_pm_resume(struct sys_device *dev) __raw_writel(tmp, S3C2412_PWRCFG); s3c_pm_do_restore(s3c2412_sleep, ARRAY_SIZE(s3c2412_sleep)); - return 0; } -static struct sysdev_driver s3c2412_pm_driver = { - .add = s3c2412_pm_add, +struct syscore_ops s3c2412_pm_syscore_ops = { .suspend = s3c2412_pm_suspend, .resume = s3c2412_pm_resume, }; - -static __init int s3c2412_pm_init(void) -{ - return sysdev_driver_register(&s3c2412_sysclass, &s3c2412_pm_driver); -} - -arch_initcall(s3c2412_pm_init); diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c index 4c6df51ddf33..ef0958d3e5c6 100644 --- a/arch/arm/mach-s3c2412/s3c2412.c +++ b/arch/arm/mach-s3c2412/s3c2412.c @@ -19,6 +19,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/io.h> @@ -244,5 +245,8 @@ int __init s3c2412_init(void) { printk("S3C2412: Initialising architecture\n"); + register_syscore_ops(&s3c2412_pm_syscore_ops); + register_syscore_ops(&s3c24xx_irq_syscore_ops); + return sysdev_register(&s3c2412_sysdev); } diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c index 77b38f2381c1..28ad20d42445 100644 --- a/arch/arm/mach-s3c2416/irq.c +++ b/arch/arm/mach-s3c2416/irq.c @@ -236,8 +236,6 @@ static int __init s3c2416_irq_add(struct sys_device *sysdev) static struct sysdev_driver s3c2416_irq_driver = { .add = s3c2416_irq_add, - .suspend = s3c24xx_irq_suspend, - .resume = s3c24xx_irq_resume, }; static int __init s3c2416_irq_init(void) diff --git a/arch/arm/mach-s3c2416/pm.c b/arch/arm/mach-s3c2416/pm.c index 4a04205b04d5..41db2b21e213 100644 --- a/arch/arm/mach-s3c2416/pm.c +++ b/arch/arm/mach-s3c2416/pm.c @@ -11,6 +11,7 @@ */ #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/io.h> #include <asm/cacheflush.h> @@ -55,30 +56,26 @@ static int s3c2416_pm_add(struct sys_device *sysdev) return 0; } -static int s3c2416_pm_suspend(struct sys_device *dev, pm_message_t state) +static struct sysdev_driver s3c2416_pm_driver = { + .add = s3c2416_pm_add, +}; + +static __init int s3c2416_pm_init(void) { - return 0; + return sysdev_driver_register(&s3c2416_sysclass, &s3c2416_pm_driver); } -static int s3c2416_pm_resume(struct sys_device *dev) +arch_initcall(s3c2416_pm_init); + + +static void s3c2416_pm_resume(void) { /* unset the return-from-sleep amd inform flags */ __raw_writel(0x0, S3C2443_PWRMODE); __raw_writel(0x0, S3C2412_INFORM0); __raw_writel(0x0, S3C2412_INFORM1); - - return 0; } -static struct sysdev_driver s3c2416_pm_driver = { - .add = s3c2416_pm_add, - .suspend = s3c2416_pm_suspend, +struct syscore_ops s3c2416_pm_syscore_ops = { .resume = s3c2416_pm_resume, }; - -static __init int s3c2416_pm_init(void) -{ - return sysdev_driver_register(&s3c2416_sysclass, &s3c2416_pm_driver); -} - -arch_initcall(s3c2416_pm_init); diff --git a/arch/arm/mach-s3c2416/s3c2416.c b/arch/arm/mach-s3c2416/s3c2416.c index ba7fd8737434..494ce913dc95 100644 --- a/arch/arm/mach-s3c2416/s3c2416.c +++ b/arch/arm/mach-s3c2416/s3c2416.c @@ -32,6 +32,7 @@ #include <linux/platform_device.h> #include <linux/serial_core.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/clk.h> #include <linux/io.h> @@ -54,6 +55,7 @@ #include <plat/devs.h> #include <plat/cpu.h> #include <plat/sdhci.h> +#include <plat/pm.h> #include <plat/iic-core.h> #include <plat/fb-core.h> @@ -95,6 +97,9 @@ int __init s3c2416_init(void) s3c_fb_setname("s3c2443-fb"); + register_syscore_ops(&s3c2416_pm_syscore_ops); + register_syscore_ops(&s3c24xx_irq_syscore_ops); + return sysdev_register(&s3c2416_sysdev); } diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c index 14dc67897757..d88536393310 100644 --- a/arch/arm/mach-s3c2440/mach-osiris.c +++ b/arch/arm/mach-s3c2440/mach-osiris.c @@ -17,7 +17,7 @@ #include <linux/init.h> #include <linux/gpio.h> #include <linux/device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/serial_core.h> #include <linux/clk.h> #include <linux/i2c.h> @@ -284,7 +284,7 @@ static struct platform_device osiris_pcmcia = { #ifdef CONFIG_PM static unsigned char pm_osiris_ctrl0; -static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state) +static int osiris_pm_suspend(void) { unsigned int tmp; @@ -304,7 +304,7 @@ static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state) return 0; } -static int osiris_pm_resume(struct sys_device *sd) +static void osiris_pm_resume(void) { if (pm_osiris_ctrl0 & OSIRIS_CTRL0_FIX8) __raw_writeb(OSIRIS_CTRL1_FIX8, OSIRIS_VA_CTRL1); @@ -312,8 +312,6 @@ static int osiris_pm_resume(struct sys_device *sd) __raw_writeb(pm_osiris_ctrl0, OSIRIS_VA_CTRL0); s3c_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT); - - return 0; } #else @@ -321,16 +319,11 @@ static int osiris_pm_resume(struct sys_device *sd) #define osiris_pm_resume NULL #endif -static struct sysdev_class osiris_pm_sysclass = { - .name = "mach-osiris", +static struct syscore_ops osiris_pm_syscore_ops = { .suspend = osiris_pm_suspend, .resume = osiris_pm_resume, }; -static struct sys_device osiris_pm_sysdev = { - .cls = &osiris_pm_sysclass, -}; - /* Link for DVS driver to TPS65011 */ static void osiris_tps_release(struct device *dev) @@ -439,8 +432,7 @@ static void __init osiris_map_io(void) static void __init osiris_init(void) { - sysdev_class_register(&osiris_pm_sysclass); - sysdev_register(&osiris_pm_sysdev); + register_syscore_ops(&osiris_pm_syscore_ops); s3c_i2c0_set_platdata(NULL); s3c_nand_set_platdata(&osiris_nand_info); diff --git a/arch/arm/mach-s3c2440/s3c2440.c b/arch/arm/mach-s3c2440/s3c2440.c index f7663f731ea0..ce99ff72838d 100644 --- a/arch/arm/mach-s3c2440/s3c2440.c +++ b/arch/arm/mach-s3c2440/s3c2440.c @@ -19,6 +19,7 @@ #include <linux/platform_device.h> #include <linux/serial_core.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/gpio.h> #include <linux/clk.h> #include <linux/io.h> @@ -33,6 +34,7 @@ #include <plat/devs.h> #include <plat/cpu.h> #include <plat/s3c244x.h> +#include <plat/pm.h> #include <plat/gpio-core.h> #include <plat/gpio-cfg.h> @@ -51,6 +53,12 @@ int __init s3c2440_init(void) s3c_device_wdt.resource[1].start = IRQ_S3C2440_WDT; s3c_device_wdt.resource[1].end = IRQ_S3C2440_WDT; + /* register suspend/resume handlers */ + + register_syscore_ops(&s3c2410_pm_syscore_ops); + register_syscore_ops(&s3c244x_pm_syscore_ops); + register_syscore_ops(&s3c24xx_irq_syscore_ops); + /* register our system device for everything else */ return sysdev_register(&s3c2440_sysdev); diff --git a/arch/arm/mach-s3c2440/s3c2442.c b/arch/arm/mach-s3c2440/s3c2442.c index ecf813546554..6224bad4d604 100644 --- a/arch/arm/mach-s3c2440/s3c2442.c +++ b/arch/arm/mach-s3c2440/s3c2442.c @@ -29,6 +29,7 @@ #include <linux/err.h> #include <linux/device.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/mutex.h> @@ -45,6 +46,7 @@ #include <plat/clock.h> #include <plat/cpu.h> #include <plat/s3c244x.h> +#include <plat/pm.h> #include <plat/gpio-core.h> #include <plat/gpio-cfg.h> @@ -167,6 +169,10 @@ int __init s3c2442_init(void) { printk("S3C2442: Initialising architecture\n"); + register_syscore_ops(&s3c2410_pm_syscore_ops); + register_syscore_ops(&s3c244x_pm_syscore_ops); + register_syscore_ops(&s3c24xx_irq_syscore_ops); + return sysdev_register(&s3c2442_sysdev); } diff --git a/arch/arm/mach-s3c2440/s3c244x-irq.c b/arch/arm/mach-s3c2440/s3c244x-irq.c index de07c2feaa32..c63e8f26d901 100644 --- a/arch/arm/mach-s3c2440/s3c244x-irq.c +++ b/arch/arm/mach-s3c2440/s3c244x-irq.c @@ -116,8 +116,6 @@ static int s3c244x_irq_add(struct sys_device *sysdev) static struct sysdev_driver s3c2440_irq_driver = { .add = s3c244x_irq_add, - .suspend = s3c24xx_irq_suspend, - .resume = s3c24xx_irq_resume, }; static int s3c2440_irq_init(void) @@ -129,8 +127,6 @@ arch_initcall(s3c2440_irq_init); static struct sysdev_driver s3c2442_irq_driver = { .add = s3c244x_irq_add, - .suspend = s3c24xx_irq_suspend, - .resume = s3c24xx_irq_resume, }; diff --git a/arch/arm/mach-s3c2440/s3c244x.c b/arch/arm/mach-s3c2440/s3c244x.c index 90c1707b9c95..7e8a23d2098a 100644 --- a/arch/arm/mach-s3c2440/s3c244x.c +++ b/arch/arm/mach-s3c2440/s3c244x.c @@ -19,6 +19,7 @@ #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/clk.h> #include <linux/io.h> @@ -134,45 +135,14 @@ void __init s3c244x_init_clocks(int xtal) s3c2410_baseclk_add(); } -#ifdef CONFIG_PM - -static struct sleep_save s3c244x_sleep[] = { - SAVE_ITEM(S3C2440_DSC0), - SAVE_ITEM(S3C2440_DSC1), - SAVE_ITEM(S3C2440_GPJDAT), - SAVE_ITEM(S3C2440_GPJCON), - SAVE_ITEM(S3C2440_GPJUP) -}; - -static int s3c244x_suspend(struct sys_device *dev, pm_message_t state) -{ - s3c_pm_do_save(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep)); - return 0; -} - -static int s3c244x_resume(struct sys_device *dev) -{ - s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep)); - return 0; -} - -#else -#define s3c244x_suspend NULL -#define s3c244x_resume NULL -#endif - /* Since the S3C2442 and S3C2440 share items, put both sysclasses here */ struct sysdev_class s3c2440_sysclass = { .name = "s3c2440-core", - .suspend = s3c244x_suspend, - .resume = s3c244x_resume }; struct sysdev_class s3c2442_sysclass = { .name = "s3c2442-core", - .suspend = s3c244x_suspend, - .resume = s3c244x_resume }; /* need to register class before we actually register the device, and @@ -194,3 +164,33 @@ static int __init s3c2442_core_init(void) } core_initcall(s3c2442_core_init); + + +#ifdef CONFIG_PM +static struct sleep_save s3c244x_sleep[] = { + SAVE_ITEM(S3C2440_DSC0), + SAVE_ITEM(S3C2440_DSC1), + SAVE_ITEM(S3C2440_GPJDAT), + SAVE_ITEM(S3C2440_GPJCON), + SAVE_ITEM(S3C2440_GPJUP) +}; + +static int s3c244x_suspend(void) +{ + s3c_pm_do_save(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep)); + return 0; +} + +static void s3c244x_resume(void) +{ + s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep)); +} +#else +#define s3c244x_suspend NULL +#define s3c244x_resume NULL +#endif + +struct syscore_ops s3c244x_pm_syscore_ops = { + .suspend = s3c244x_suspend, + .resume = s3c244x_resume, +}; diff --git a/arch/arm/mach-s3c64xx/irq-pm.c b/arch/arm/mach-s3c64xx/irq-pm.c index da1bec64b9da..8bec61e242c7 100644 --- a/arch/arm/mach-s3c64xx/irq-pm.c +++ b/arch/arm/mach-s3c64xx/irq-pm.c @@ -13,7 +13,7 @@ */ #include <linux/kernel.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/serial_core.h> #include <linux/irq.h> @@ -54,7 +54,7 @@ static struct irq_grp_save { static u32 irq_uart_mask[CONFIG_SERIAL_SAMSUNG_UARTS]; -static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state) +static int s3c64xx_irq_pm_suspend(void) { struct irq_grp_save *grp = eint_grp_save; int i; @@ -75,7 +75,7 @@ static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int s3c64xx_irq_pm_resume(struct sys_device *dev) +static void s3c64xx_irq_pm_resume(void) { struct irq_grp_save *grp = eint_grp_save; int i; @@ -94,18 +94,18 @@ static int s3c64xx_irq_pm_resume(struct sys_device *dev) } S3C_PMDBG("%s: IRQ configuration restored\n", __func__); - return 0; } -static struct sysdev_driver s3c64xx_irq_driver = { +struct syscore_ops s3c64xx_irq_syscore_ops = { .suspend = s3c64xx_irq_pm_suspend, .resume = s3c64xx_irq_pm_resume, }; -static int __init s3c64xx_irq_pm_init(void) +static __init int s3c64xx_syscore_init(void) { - return sysdev_driver_register(&s3c64xx_sysclass, &s3c64xx_irq_driver); -} + register_syscore_ops(&s3c64xx_irq_syscore_ops); -arch_initcall(s3c64xx_irq_pm_init); + return 0; +} +core_initcall(s3c64xx_syscore_init); diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c index 549d7924fd4c..24febae3d4c0 100644 --- a/arch/arm/mach-s5pv210/pm.c +++ b/arch/arm/mach-s5pv210/pm.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/suspend.h> +#include <linux/syscore_ops.h> #include <linux/io.h> #include <plat/cpu.h> @@ -140,7 +141,17 @@ static int s5pv210_pm_add(struct sys_device *sysdev) return 0; } -static int s5pv210_pm_resume(struct sys_device *dev) +static struct sysdev_driver s5pv210_pm_driver = { + .add = s5pv210_pm_add, +}; + +static __init int s5pv210_pm_drvinit(void) +{ + return sysdev_driver_register(&s5pv210_sysclass, &s5pv210_pm_driver); +} +arch_initcall(s5pv210_pm_drvinit); + +static void s5pv210_pm_resume(void) { u32 tmp; @@ -150,17 +161,15 @@ static int s5pv210_pm_resume(struct sys_device *dev) __raw_writel(tmp , S5P_OTHERS); s3c_pm_do_restore_core(s5pv210_core_save, ARRAY_SIZE(s5pv210_core_save)); - - return 0; } -static struct sysdev_driver s5pv210_pm_driver = { - .add = s5pv210_pm_add, +static struct syscore_ops s5pv210_pm_syscore_ops = { .resume = s5pv210_pm_resume, }; -static __init int s5pv210_pm_drvinit(void) +static __init int s5pv210_pm_syscore_init(void) { - return sysdev_driver_register(&s5pv210_sysclass, &s5pv210_pm_driver); + register_syscore_ops(&s5pv210_pm_syscore_ops); + return 0; } -arch_initcall(s5pv210_pm_drvinit); +arch_initcall(s5pv210_pm_syscore_init); diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index 423ddb3d65e9..dfbf824a69fa 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -14,7 +14,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/ioport.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <mach/hardware.h> #include <asm/mach/irq.h> @@ -234,7 +234,7 @@ static struct sa1100irq_state { unsigned int iccr; } sa1100irq_state; -static int sa1100irq_suspend(struct sys_device *dev, pm_message_t state) +static int sa1100irq_suspend(void) { struct sa1100irq_state *st = &sa1100irq_state; @@ -264,7 +264,7 @@ static int sa1100irq_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int sa1100irq_resume(struct sys_device *dev) +static void sa1100irq_resume(void) { struct sa1100irq_state *st = &sa1100irq_state; @@ -277,24 +277,17 @@ static int sa1100irq_resume(struct sys_device *dev) ICMR = st->icmr; } - return 0; } -static struct sysdev_class sa1100irq_sysclass = { - .name = "sa11x0-irq", +static struct syscore_ops sa1100irq_syscore_ops = { .suspend = sa1100irq_suspend, .resume = sa1100irq_resume, }; -static struct sys_device sa1100irq_device = { - .id = 0, - .cls = &sa1100irq_sysclass, -}; - static int __init sa1100irq_init_devicefs(void) { - sysdev_class_register(&sa1100irq_sysclass); - return sysdev_register(&sa1100irq_device); + register_syscore_ops(&sa1100irq_syscore_ops); + return 0; } device_initcall(sa1100irq_init_devicefs); diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c index 94912d3944d3..2d1b67a59e4a 100644 --- a/arch/arm/mach-shmobile/pm_runtime.c +++ b/arch/arm/mach-shmobile/pm_runtime.c @@ -18,152 +18,41 @@ #include <linux/clk.h> #include <linux/sh_clk.h> #include <linux/bitmap.h> +#include <linux/slab.h> #ifdef CONFIG_PM_RUNTIME -#define BIT_ONCE 0 -#define BIT_ACTIVE 1 -#define BIT_CLK_ENABLED 2 -struct pm_runtime_data { - unsigned long flags; - struct clk *clk; -}; - -static void __devres_release(struct device *dev, void *res) -{ - struct pm_runtime_data *prd = res; - - dev_dbg(dev, "__devres_release()\n"); - - if (test_bit(BIT_CLK_ENABLED, &prd->flags)) - clk_disable(prd->clk); - - if (test_bit(BIT_ACTIVE, &prd->flags)) - clk_put(prd->clk); -} - -static struct pm_runtime_data *__to_prd(struct device *dev) -{ - return devres_find(dev, __devres_release, NULL, NULL); -} - -static void platform_pm_runtime_init(struct device *dev, - struct pm_runtime_data *prd) -{ - if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) { - prd->clk = clk_get(dev, NULL); - if (!IS_ERR(prd->clk)) { - set_bit(BIT_ACTIVE, &prd->flags); - dev_info(dev, "clocks managed by runtime pm\n"); - } - } -} - -static void platform_pm_runtime_bug(struct device *dev, - struct pm_runtime_data *prd) -{ - if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) - dev_err(dev, "runtime pm suspend before resume\n"); -} - -int platform_pm_runtime_suspend(struct device *dev) -{ - struct pm_runtime_data *prd = __to_prd(dev); - - dev_dbg(dev, "platform_pm_runtime_suspend()\n"); - - platform_pm_runtime_bug(dev, prd); - - if (prd && test_bit(BIT_ACTIVE, &prd->flags)) { - clk_disable(prd->clk); - clear_bit(BIT_CLK_ENABLED, &prd->flags); - } - - return 0; -} - -int platform_pm_runtime_resume(struct device *dev) -{ - struct pm_runtime_data *prd = __to_prd(dev); - - dev_dbg(dev, "platform_pm_runtime_resume()\n"); - - platform_pm_runtime_init(dev, prd); - - if (prd && test_bit(BIT_ACTIVE, &prd->flags)) { - clk_enable(prd->clk); - set_bit(BIT_CLK_ENABLED, &prd->flags); - } - - return 0; -} - -int platform_pm_runtime_idle(struct device *dev) +static int default_platform_runtime_idle(struct device *dev) { /* suspend synchronously to disable clocks immediately */ return pm_runtime_suspend(dev); } -static int platform_bus_notify(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - struct pm_runtime_data *prd; - - dev_dbg(dev, "platform_bus_notify() %ld !\n", action); - - if (action == BUS_NOTIFY_BIND_DRIVER) { - prd = devres_alloc(__devres_release, sizeof(*prd), GFP_KERNEL); - if (prd) - devres_add(dev, prd); - else - dev_err(dev, "unable to alloc memory for runtime pm\n"); - } - - return 0; -} - -#else /* CONFIG_PM_RUNTIME */ - -static int platform_bus_notify(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - struct clk *clk; +static struct dev_power_domain default_power_domain = { + .ops = { + .runtime_suspend = pm_runtime_clk_suspend, + .runtime_resume = pm_runtime_clk_resume, + .runtime_idle = default_platform_runtime_idle, + USE_PLATFORM_PM_SLEEP_OPS + }, +}; - dev_dbg(dev, "platform_bus_notify() %ld !\n", action); +#define DEFAULT_PWR_DOMAIN_PTR (&default_power_domain) - switch (action) { - case BUS_NOTIFY_BIND_DRIVER: - clk = clk_get(dev, NULL); - if (!IS_ERR(clk)) { - clk_enable(clk); - clk_put(clk); - dev_info(dev, "runtime pm disabled, clock forced on\n"); - } - break; - case BUS_NOTIFY_UNBOUND_DRIVER: - clk = clk_get(dev, NULL); - if (!IS_ERR(clk)) { - clk_disable(clk); - clk_put(clk); - dev_info(dev, "runtime pm disabled, clock forced off\n"); - } - break; - } +#else - return 0; -} +#define DEFAULT_PWR_DOMAIN_PTR NULL #endif /* CONFIG_PM_RUNTIME */ -static struct notifier_block platform_bus_notifier = { - .notifier_call = platform_bus_notify +static struct pm_clk_notifier_block platform_bus_notifier = { + .pwr_domain = DEFAULT_PWR_DOMAIN_PTR, + .con_ids = { NULL, }, }; static int __init sh_pm_runtime_init(void) { - bus_register_notifier(&platform_bus_type, &platform_bus_notifier); + pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier); return 0; } core_initcall(sh_pm_runtime_init); diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 3cdeffc97b44..5ec1846aa1d0 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -27,12 +27,14 @@ comment "Tegra board type" config MACH_HARMONY bool "Harmony board" + select MACH_HAS_SND_SOC_TEGRA_WM8903 help Support for nVidia Harmony development platform config MACH_KAEN bool "Kaen board" select MACH_SEABOARD + select MACH_HAS_SND_SOC_TEGRA_WM8903 help Support for the Kaen version of Seaboard @@ -43,6 +45,7 @@ config MACH_PAZ00 config MACH_SEABOARD bool "Seaboard board" + select MACH_HAS_SND_SOC_TEGRA_WM8903 help Support for nVidia Seaboard development platform. It will also be included for some of the derivative boards that diff --git a/arch/arm/mach-tegra/board-harmony.c b/arch/arm/mach-tegra/board-harmony.c index 75c918a86a31..30e18bc60647 100644 --- a/arch/arm/mach-tegra/board-harmony.c +++ b/arch/arm/mach-tegra/board-harmony.c @@ -34,7 +34,7 @@ #include <asm/mach/time.h> #include <asm/setup.h> -#include <mach/harmony_audio.h> +#include <mach/tegra_wm8903_pdata.h> #include <mach/iomap.h> #include <mach/irqs.h> #include <mach/sdhci.h> @@ -67,15 +67,16 @@ static struct platform_device debug_uart = { }, }; -static struct harmony_audio_platform_data harmony_audio_pdata = { +static struct tegra_wm8903_platform_data harmony_audio_pdata = { .gpio_spkr_en = TEGRA_GPIO_SPKR_EN, .gpio_hp_det = TEGRA_GPIO_HP_DET, + .gpio_hp_mute = -1, .gpio_int_mic_en = TEGRA_GPIO_INT_MIC_EN, .gpio_ext_mic_en = TEGRA_GPIO_EXT_MIC_EN, }; static struct platform_device harmony_audio_device = { - .name = "tegra-snd-harmony", + .name = "tegra-snd-wm8903", .id = 0, .dev = { .platform_data = &harmony_audio_pdata, diff --git a/arch/arm/mach-tegra/include/mach/harmony_audio.h b/arch/arm/mach-tegra/include/mach/tegra_wm8903_pdata.h index af086500ab7d..9d293344a7ff 100644 --- a/arch/arm/mach-tegra/include/mach/harmony_audio.h +++ b/arch/arm/mach-tegra/include/mach/tegra_wm8903_pdata.h @@ -1,5 +1,5 @@ /* - * arch/arm/mach-tegra/include/mach/harmony_audio.h + * arch/arm/mach-tegra/include/mach/tegra_wm8903_pdata.h * * Copyright 2011 NVIDIA, Inc. * @@ -14,9 +14,10 @@ * */ -struct harmony_audio_platform_data { +struct tegra_wm8903_platform_data { int gpio_spkr_en; int gpio_hp_det; + int gpio_hp_mute; int gpio_int_mic_en; int gpio_ext_mic_en; }; diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index d2adcdda23cf..bd9e32187eab 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -17,7 +17,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/err.h> #include <linux/clk.h> #include <linux/io.h> @@ -1372,9 +1372,7 @@ static const struct dev_pm_ops omap_mpuio_dev_pm_ops = { .resume_noirq = omap_mpuio_resume_noirq, }; -/* use platform_driver for this, now that there's no longer any - * point to sys_device (other than not disturbing old code). - */ +/* use platform_driver for this. */ static struct platform_driver omap_mpuio_driver = { .driver = { .name = "mpuio", @@ -1745,7 +1743,7 @@ static int __devinit omap_gpio_probe(struct platform_device *pdev) } #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS) -static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg) +static int omap_gpio_suspend(void) { int i; @@ -1795,12 +1793,12 @@ static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg) return 0; } -static int omap_gpio_resume(struct sys_device *dev) +static void omap_gpio_resume(void) { int i; if (!cpu_class_is_omap2() && !cpu_is_omap16xx()) - return 0; + return; for (i = 0; i < gpio_bank_count; i++) { struct gpio_bank *bank = &gpio_bank[i]; @@ -1836,21 +1834,13 @@ static int omap_gpio_resume(struct sys_device *dev) __raw_writel(bank->saved_wakeup, wake_set); spin_unlock_irqrestore(&bank->lock, flags); } - - return 0; } -static struct sysdev_class omap_gpio_sysclass = { - .name = "gpio", +static struct syscore_ops omap_gpio_syscore_ops = { .suspend = omap_gpio_suspend, .resume = omap_gpio_resume, }; -static struct sys_device omap_gpio_device = { - .id = 0, - .cls = &omap_gpio_sysclass, -}; - #endif #ifdef CONFIG_ARCH_OMAP2PLUS @@ -2108,21 +2098,14 @@ postcore_initcall(omap_gpio_drv_reg); static int __init omap_gpio_sysinit(void) { - int ret = 0; - mpuio_init(); #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS) - if (cpu_is_omap16xx() || cpu_class_is_omap2()) { - if (ret == 0) { - ret = sysdev_class_register(&omap_gpio_sysclass); - if (ret == 0) - ret = sysdev_register(&omap_gpio_device); - } - } + if (cpu_is_omap16xx() || cpu_class_is_omap2()) + register_syscore_ops(&omap_gpio_syscore_ops); #endif - return ret; + return 0; } arch_initcall(omap_gpio_sysinit); diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c index 9bbda9acb73b..a37b8eb65b76 100644 --- a/arch/arm/plat-omap/omap_device.c +++ b/arch/arm/plat-omap/omap_device.c @@ -536,6 +536,28 @@ int omap_early_device_register(struct omap_device *od) return 0; } +static int _od_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + return omap_device_idle(pdev); +} + +static int _od_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + return omap_device_enable(pdev); +} + +static struct dev_power_domain omap_device_power_domain = { + .ops = { + .runtime_suspend = _od_runtime_suspend, + .runtime_resume = _od_runtime_resume, + USE_PLATFORM_PM_SLEEP_OPS + } +}; + /** * omap_device_register - register an omap_device with one omap_hwmod * @od: struct omap_device * to register @@ -549,6 +571,7 @@ int omap_device_register(struct omap_device *od) pr_debug("omap_device: %s: registering\n", od->pdev.name); od->pdev.dev.parent = &omap_device_parent; + od->pdev.dev.pwr_domain = &omap_device_power_domain; return platform_device_register(&od->pdev); } diff --git a/arch/arm/plat-pxa/gpio.c b/arch/arm/plat-pxa/gpio.c index dce088f45678..48ebb9479b61 100644 --- a/arch/arm/plat-pxa/gpio.c +++ b/arch/arm/plat-pxa/gpio.c @@ -15,7 +15,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <linux/io.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/slab.h> #include <mach/gpio.h> @@ -295,7 +295,7 @@ void __init pxa_init_gpio(int mux_irq, int start, int end, set_wake_t fn) } #ifdef CONFIG_PM -static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state) +static int pxa_gpio_suspend(void) { struct pxa_gpio_chip *c; int gpio; @@ -312,7 +312,7 @@ static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int pxa_gpio_resume(struct sys_device *dev) +static void pxa_gpio_resume(void) { struct pxa_gpio_chip *c; int gpio; @@ -326,22 +326,13 @@ static int pxa_gpio_resume(struct sys_device *dev) __raw_writel(c->saved_gfer, c->regbase + GFER_OFFSET); __raw_writel(c->saved_gpdr, c->regbase + GPDR_OFFSET); } - return 0; } #else #define pxa_gpio_suspend NULL #define pxa_gpio_resume NULL #endif -struct sysdev_class pxa_gpio_sysclass = { - .name = "gpio", +struct syscore_ops pxa_gpio_syscore_ops = { .suspend = pxa_gpio_suspend, .resume = pxa_gpio_resume, }; - -static int __init pxa_gpio_init(void) -{ - return sysdev_class_register(&pxa_gpio_sysclass); -} - -core_initcall(pxa_gpio_init); diff --git a/arch/arm/plat-pxa/mfp.c b/arch/arm/plat-pxa/mfp.c index a9aa5ad3f4eb..be12eadcce20 100644 --- a/arch/arm/plat-pxa/mfp.c +++ b/arch/arm/plat-pxa/mfp.c @@ -17,7 +17,6 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/io.h> -#include <linux/sysdev.h> #include <plat/mfp.h> diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index 27ea852e3370..c10d10c56e2e 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -22,7 +22,7 @@ #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/interrupt.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/io.h> @@ -1195,19 +1195,12 @@ int s3c2410_dma_getposition(unsigned int channel, dma_addr_t *src, dma_addr_t *d EXPORT_SYMBOL(s3c2410_dma_getposition); -static inline struct s3c2410_dma_chan *to_dma_chan(struct sys_device *dev) -{ - return container_of(dev, struct s3c2410_dma_chan, dev); -} - -/* system device class */ +/* system core operations */ #ifdef CONFIG_PM -static int s3c2410_dma_suspend(struct sys_device *dev, pm_message_t state) +static void s3c2410_dma_suspend_chan(s3c2410_dma_chan *cp) { - struct s3c2410_dma_chan *cp = to_dma_chan(dev); - printk(KERN_DEBUG "suspending dma channel %d\n", cp->number); if (dma_rdreg(cp, S3C2410_DMA_DMASKTRIG) & S3C2410_DMASKTRIG_ON) { @@ -1222,13 +1215,21 @@ static int s3c2410_dma_suspend(struct sys_device *dev, pm_message_t state) s3c2410_dma_dostop(cp); } +} + +static int s3c2410_dma_suspend(void) +{ + struct s3c2410_dma_chan *cp = s3c2410_chans; + int channel; + + for (channel = 0; channel < dma_channels; cp++, channel++) + s3c2410_dma_suspend_chan(cp); return 0; } -static int s3c2410_dma_resume(struct sys_device *dev) +static void s3c2410_dma_resume_chan(struct s3c2410_dma_chan *cp) { - struct s3c2410_dma_chan *cp = to_dma_chan(dev); unsigned int no = cp->number | DMACH_LOW_LEVEL; /* restore channel's hardware configuration */ @@ -1249,13 +1250,21 @@ static int s3c2410_dma_resume(struct sys_device *dev) return 0; } +static void s3c2410_dma_resume(void) +{ + struct s3c2410_dma_chan *cp = s3c2410_chans + dma_channels - 1; + int channel; + + for (channel = dma_channels - 1; channel >= 0; cp++, channel--) + s3c2410_dma_resume_chan(cp); +} + #else #define s3c2410_dma_suspend NULL #define s3c2410_dma_resume NULL #endif /* CONFIG_PM */ -struct sysdev_class dma_sysclass = { - .name = "s3c24xx-dma", +struct syscore_ops dma_syscore_ops = { .suspend = s3c2410_dma_suspend, .resume = s3c2410_dma_resume, }; @@ -1269,39 +1278,14 @@ static void s3c2410_dma_cache_ctor(void *p) /* initialisation code */ -static int __init s3c24xx_dma_sysclass_init(void) +static int __init s3c24xx_dma_syscore_init(void) { - int ret = sysdev_class_register(&dma_sysclass); - - if (ret != 0) - printk(KERN_ERR "dma sysclass registration failed\n"); - - return ret; -} - -core_initcall(s3c24xx_dma_sysclass_init); - -static int __init s3c24xx_dma_sysdev_register(void) -{ - struct s3c2410_dma_chan *cp = s3c2410_chans; - int channel, ret; - - for (channel = 0; channel < dma_channels; cp++, channel++) { - cp->dev.cls = &dma_sysclass; - cp->dev.id = channel; - ret = sysdev_register(&cp->dev); - - if (ret) { - printk(KERN_ERR "error registering dev for dma %d\n", - channel); - return ret; - } - } + register_syscore_ops(&dma_syscore_ops); return 0; } -late_initcall(s3c24xx_dma_sysdev_register); +late_initcall(s3c24xx_dma_syscore_init); int __init s3c24xx_dma_init(unsigned int channels, unsigned int irq, unsigned int stride) diff --git a/arch/arm/plat-s3c24xx/irq-pm.c b/arch/arm/plat-s3c24xx/irq-pm.c index c3624d898630..0efb2e2848c8 100644 --- a/arch/arm/plat-s3c24xx/irq-pm.c +++ b/arch/arm/plat-s3c24xx/irq-pm.c @@ -14,7 +14,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> -#include <linux/sysdev.h> #include <linux/irq.h> #include <plat/cpu.h> @@ -65,7 +64,7 @@ static unsigned long save_extint[3]; static unsigned long save_eintflt[4]; static unsigned long save_eintmask; -int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state) +int s3c24xx_irq_suspend(void) { unsigned int i; @@ -81,7 +80,7 @@ int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state) return 0; } -int s3c24xx_irq_resume(struct sys_device *dev) +void s3c24xx_irq_resume(void) { unsigned int i; @@ -93,6 +92,4 @@ int s3c24xx_irq_resume(struct sys_device *dev) s3c_pm_do_restore(irq_save, ARRAY_SIZE(irq_save)); __raw_writel(save_eintmask, S3C24XX_EINTMASK); - - return 0; } diff --git a/arch/arm/plat-s5p/irq-pm.c b/arch/arm/plat-s5p/irq-pm.c index 5259ad458bc8..327acb3a4464 100644 --- a/arch/arm/plat-s5p/irq-pm.c +++ b/arch/arm/plat-s5p/irq-pm.c @@ -16,7 +16,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> -#include <linux/sysdev.h> #include <plat/cpu.h> #include <plat/irqs.h> @@ -77,17 +76,15 @@ static struct sleep_save eint_save[] = { SAVE_ITEM(S5P_EINT_MASK(3)), }; -int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state) +int s3c24xx_irq_suspend(void) { s3c_pm_do_save(eint_save, ARRAY_SIZE(eint_save)); return 0; } -int s3c24xx_irq_resume(struct sys_device *dev) +void s3c24xx_irq_resume(void) { s3c_pm_do_restore(eint_save, ARRAY_SIZE(eint_save)); - - return 0; } diff --git a/arch/arm/plat-samsung/include/plat/cpu.h b/arch/arm/plat-samsung/include/plat/cpu.h index cedfff51c82b..3aedac0034ba 100644 --- a/arch/arm/plat-samsung/include/plat/cpu.h +++ b/arch/arm/plat-samsung/include/plat/cpu.h @@ -68,6 +68,12 @@ extern void s3c24xx_init_uartdevs(char *name, struct sys_timer; extern struct sys_timer s3c24xx_timer; +extern struct syscore_ops s3c2410_pm_syscore_ops; +extern struct syscore_ops s3c2412_pm_syscore_ops; +extern struct syscore_ops s3c2416_pm_syscore_ops; +extern struct syscore_ops s3c244x_pm_syscore_ops; +extern struct syscore_ops s3c64xx_irq_syscore_ops; + /* system device classes */ extern struct sysdev_class s3c2410_sysclass; diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h index 937cc2ace517..7fb6f6be8c81 100644 --- a/arch/arm/plat-samsung/include/plat/pm.h +++ b/arch/arm/plat-samsung/include/plat/pm.h @@ -103,14 +103,16 @@ extern void s3c_pm_do_restore_core(struct sleep_save *ptr, int count); #ifdef CONFIG_PM extern int s3c_irqext_wake(struct irq_data *data, unsigned int state); -extern int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state); -extern int s3c24xx_irq_resume(struct sys_device *dev); +extern int s3c24xx_irq_suspend(void); +extern void s3c24xx_irq_resume(void); #else #define s3c_irqext_wake NULL #define s3c24xx_irq_suspend NULL #define s3c24xx_irq_resume NULL #endif +extern struct syscore_ops s3c24xx_irq_syscore_ops; + /* PM debug functions */ #ifdef CONFIG_SAMSUNG_PM_DEBUG diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index f74695075e64..f25e7ec89416 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -398,9 +398,9 @@ static void vfp_enable(void *unused) } #ifdef CONFIG_PM -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> -static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state) +static int vfp_pm_suspend(void) { struct thread_info *ti = current_thread_info(); u32 fpexc = fmrx(FPEXC); @@ -420,34 +420,25 @@ static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int vfp_pm_resume(struct sys_device *dev) +static void vfp_pm_resume(void) { /* ensure we have access to the vfp */ vfp_enable(NULL); /* and disable it to ensure the next usage restores the state */ fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); - - return 0; } -static struct sysdev_class vfp_pm_sysclass = { - .name = "vfp", +static struct syscore_ops vfp_pm_syscore_ops = { .suspend = vfp_pm_suspend, .resume = vfp_pm_resume, }; -static struct sys_device vfp_pm_sysdev = { - .cls = &vfp_pm_sysclass, -}; - static void vfp_pm_init(void) { - sysdev_class_register(&vfp_pm_sysclass); - sysdev_register(&vfp_pm_sysdev); + register_syscore_ops(&vfp_pm_syscore_ops); } - #else static inline void vfp_pm_init(void) { } #endif /* CONFIG_PM */ diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c index 21ce35f33aa5..3e3646186c9f 100644 --- a/arch/avr32/mach-at32ap/intc.c +++ b/arch/avr32/mach-at32ap/intc.c @@ -12,7 +12,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/platform_device.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <asm/io.h> @@ -21,7 +21,6 @@ struct intc { void __iomem *regs; struct irq_chip chip; - struct sys_device sysdev; #ifdef CONFIG_PM unsigned long suspend_ipr; unsigned long saved_ipr[64]; @@ -146,9 +145,8 @@ void intc_set_suspend_handler(unsigned long offset) intc0.suspend_ipr = offset; } -static int intc_suspend(struct sys_device *sdev, pm_message_t state) +static int intc_suspend(void) { - struct intc *intc = container_of(sdev, struct intc, sysdev); int i; if (unlikely(!irqs_disabled())) { @@ -156,28 +154,25 @@ static int intc_suspend(struct sys_device *sdev, pm_message_t state) return -EINVAL; } - if (unlikely(!intc->suspend_ipr)) { + if (unlikely(!intc0.suspend_ipr)) { pr_err("intc_suspend: suspend_ipr not initialized\n"); return -EINVAL; } for (i = 0; i < 64; i++) { - intc->saved_ipr[i] = intc_readl(intc, INTPR0 + 4 * i); - intc_writel(intc, INTPR0 + 4 * i, intc->suspend_ipr); + intc0.saved_ipr[i] = intc_readl(&intc0, INTPR0 + 4 * i); + intc_writel(&intc0, INTPR0 + 4 * i, intc0.suspend_ipr); } return 0; } -static int intc_resume(struct sys_device *sdev) +static int intc_resume(void) { - struct intc *intc = container_of(sdev, struct intc, sysdev); int i; - WARN_ON(!irqs_disabled()); - for (i = 0; i < 64; i++) - intc_writel(intc, INTPR0 + 4 * i, intc->saved_ipr[i]); + intc_writel(&intc0, INTPR0 + 4 * i, intc0.saved_ipr[i]); return 0; } @@ -186,27 +181,18 @@ static int intc_resume(struct sys_device *sdev) #define intc_resume NULL #endif -static struct sysdev_class intc_class = { - .name = "intc", +static struct syscore_ops intc_syscore_ops = { .suspend = intc_suspend, .resume = intc_resume, }; -static int __init intc_init_sysdev(void) +static int __init intc_init_syscore(void) { - int ret; - - ret = sysdev_class_register(&intc_class); - if (ret) - return ret; + register_syscore_ops(&intc_syscore_ops); - intc0.sysdev.id = 0; - intc0.sysdev.cls = &intc_class; - ret = sysdev_register(&intc0.sysdev); - - return ret; + return 0; } -device_initcall(intc_init_sysdev); +device_initcall(intc_init_syscore); unsigned long intc_get_pending(unsigned int group) { diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c index 0b5f72f17fd0..401eb1d8e3b4 100644 --- a/arch/blackfin/kernel/nmi.c +++ b/arch/blackfin/kernel/nmi.c @@ -12,7 +12,7 @@ #include <linux/bitops.h> #include <linux/hardirq.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/pm.h> #include <linux/nmi.h> #include <linux/smp.h> @@ -196,43 +196,31 @@ void touch_nmi_watchdog(void) /* Suspend/resume support */ #ifdef CONFIG_PM -static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state) +static int nmi_wdt_suspend(void) { nmi_wdt_stop(); return 0; } -static int nmi_wdt_resume(struct sys_device *dev) +static void nmi_wdt_resume(void) { if (nmi_active) nmi_wdt_start(); - return 0; } -static struct sysdev_class nmi_sysclass = { - .name = DRV_NAME, +static struct syscore_ops nmi_syscore_ops = { .resume = nmi_wdt_resume, .suspend = nmi_wdt_suspend, }; -static struct sys_device device_nmi_wdt = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_nmi_wdt_sysfs(void) +static int __init init_nmi_wdt_syscore(void) { - int error; - - if (!nmi_active) - return 0; + if (nmi_active) + register_syscore_ops(&nmi_syscore_ops); - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_nmi_wdt); - return error; + return 0; } -late_initcall(init_nmi_wdt_sysfs); +late_initcall(init_nmi_wdt_syscore); #endif /* CONFIG_PM */ diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c index cdb4beb6bc8f..9e9b60d969dc 100644 --- a/arch/blackfin/kernel/time-ts.c +++ b/arch/blackfin/kernel/time-ts.c @@ -23,29 +23,6 @@ #include <asm/gptimers.h> #include <asm/nmi.h> -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ - -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ #if defined(CONFIG_CYCLES_CLOCKSOURCE) @@ -63,7 +40,6 @@ static struct clocksource bfin_cs_cycles = { .rating = 400, .read = bfin_read_cycles, .mask = CLOCKSOURCE_MASK(64), - .shift = CYC2NS_SCALE_FACTOR, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -75,10 +51,7 @@ static inline unsigned long long bfin_cs_cycles_sched_clock(void) static int __init bfin_cs_cycles_init(void) { - bfin_cs_cycles.mult = \ - clocksource_hz2mult(get_cclk(), bfin_cs_cycles.shift); - - if (clocksource_register(&bfin_cs_cycles)) + if (clocksource_register_hz(&bfin_cs_cycles, get_cclk())) panic("failed to register clocksource"); return 0; @@ -111,7 +84,6 @@ static struct clocksource bfin_cs_gptimer0 = { .rating = 350, .read = bfin_read_gptimer0, .mask = CLOCKSOURCE_MASK(32), - .shift = CYC2NS_SCALE_FACTOR, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -125,10 +97,7 @@ static int __init bfin_cs_gptimer0_init(void) { setup_gptimer0(); - bfin_cs_gptimer0.mult = \ - clocksource_hz2mult(get_sclk(), bfin_cs_gptimer0.shift); - - if (clocksource_register(&bfin_cs_gptimer0)) + if (clocksource_register_hz(&bfin_cs_gptimer0, get_sclk())) panic("failed to register clocksource"); return 0; diff --git a/arch/blackfin/mach-common/dpmc.c b/arch/blackfin/mach-common/dpmc.c index 382099fd5561..5e4112e518a9 100644 --- a/arch/blackfin/mach-common/dpmc.c +++ b/arch/blackfin/mach-common/dpmc.c @@ -19,9 +19,6 @@ #define DRIVER_NAME "bfin dpmc" -#define dprintk(msg...) \ - cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, DRIVER_NAME, msg) - struct bfin_dpmc_platform_data *pdata; /** diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 8bce5ed031e4..1fbd94c44457 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -177,6 +177,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) while (msg_queue->count) { msg = &msg_queue->ipi_message[msg_queue->head]; switch (msg->type) { + case BFIN_IPI_RESCHEDULE: + scheduler_ipi(); + break; case BFIN_IPI_CALL_FUNC: spin_unlock_irqrestore(&msg_queue->lock, flags); ipi_call_function(cpu, msg); diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 4c9e3e1ba5d1..66cc75657e2f 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id) ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi); + if (ipi.vector & IPI_SCHEDULE) { + scheduler_ipi(); + } if (ipi.vector & IPI_CALL) { - func(info); + func(info); } if (ipi.vector & IPI_FLUSH_TLB) { - if (flush_mm == FLUSH_ALL) - __flush_tlb_all(); - else if (flush_vma == FLUSH_ALL) + if (flush_mm == FLUSH_ALL) + __flush_tlb_all(); + else if (flush_vma == FLUSH_ALL) __flush_tlb_mm(flush_mm); - else + else __flush_tlb_page(flush_vma, flush_addr); } diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 4ce8d1358fee..c04dd576f333 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -37,6 +37,7 @@ #include <linux/crash_dump.h> #include <linux/iommu-helper.h> #include <linux/dma-mapping.h> +#include <linux/prefetch.h> #include <asm/delay.h> /* ia64_get_itc() */ #include <asm/io.h> diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c index 22f61526a8e1..f09b174244d5 100644 --- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -23,8 +23,6 @@ #include <linux/acpi.h> #include <acpi/processor.h> -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) - MODULE_AUTHOR("Venkatesh Pallipadi"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); MODULE_LICENSE("GPL"); @@ -47,12 +45,12 @@ processor_set_pstate ( { s64 retval; - dprintk("processor_set_pstate\n"); + pr_debug("processor_set_pstate\n"); retval = ia64_pal_set_pstate((u64)value); if (retval) { - dprintk("Failed to set freq to 0x%x, with error 0x%lx\n", + pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n", value, retval); return -ENODEV; } @@ -67,14 +65,14 @@ processor_get_pstate ( u64 pstate_index = 0; s64 retval; - dprintk("processor_get_pstate\n"); + pr_debug("processor_get_pstate\n"); retval = ia64_pal_get_pstate(&pstate_index, PAL_GET_PSTATE_TYPE_INSTANT); *value = (u32) pstate_index; if (retval) - dprintk("Failed to get current freq with " + pr_debug("Failed to get current freq with " "error 0x%lx, idx 0x%x\n", retval, *value); return (int)retval; @@ -90,7 +88,7 @@ extract_clock ( { unsigned long i; - dprintk("extract_clock\n"); + pr_debug("extract_clock\n"); for (i = 0; i < data->acpi_data.state_count; i++) { if (value == data->acpi_data.states[i].status) @@ -110,7 +108,7 @@ processor_get_freq ( cpumask_t saved_mask; unsigned long clock_freq; - dprintk("processor_get_freq\n"); + pr_debug("processor_get_freq\n"); saved_mask = current->cpus_allowed; set_cpus_allowed_ptr(current, cpumask_of(cpu)); @@ -148,7 +146,7 @@ processor_set_freq ( cpumask_t saved_mask; int retval; - dprintk("processor_set_freq\n"); + pr_debug("processor_set_freq\n"); saved_mask = current->cpus_allowed; set_cpus_allowed_ptr(current, cpumask_of(cpu)); @@ -159,16 +157,16 @@ processor_set_freq ( if (state == data->acpi_data.state) { if (unlikely(data->resume)) { - dprintk("Called after resume, resetting to P%d\n", state); + pr_debug("Called after resume, resetting to P%d\n", state); data->resume = 0; } else { - dprintk("Already at target state (P%d)\n", state); + pr_debug("Already at target state (P%d)\n", state); retval = 0; goto migrate_end; } } - dprintk("Transitioning from P%d to P%d\n", + pr_debug("Transitioning from P%d to P%d\n", data->acpi_data.state, state); /* cpufreq frequency struct */ @@ -186,7 +184,7 @@ processor_set_freq ( value = (u32) data->acpi_data.states[state].control; - dprintk("Transitioning to state: 0x%08x\n", value); + pr_debug("Transitioning to state: 0x%08x\n", value); ret = processor_set_pstate(value); if (ret) { @@ -219,7 +217,7 @@ acpi_cpufreq_get ( { struct cpufreq_acpi_io *data = acpi_io_data[cpu]; - dprintk("acpi_cpufreq_get\n"); + pr_debug("acpi_cpufreq_get\n"); return processor_get_freq(data, cpu); } @@ -235,7 +233,7 @@ acpi_cpufreq_target ( unsigned int next_state = 0; unsigned int result = 0; - dprintk("acpi_cpufreq_setpolicy\n"); + pr_debug("acpi_cpufreq_setpolicy\n"); result = cpufreq_frequency_table_target(policy, data->freq_table, target_freq, relation, &next_state); @@ -255,7 +253,7 @@ acpi_cpufreq_verify ( unsigned int result = 0; struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; - dprintk("acpi_cpufreq_verify\n"); + pr_debug("acpi_cpufreq_verify\n"); result = cpufreq_frequency_table_verify(policy, data->freq_table); @@ -273,7 +271,7 @@ acpi_cpufreq_cpu_init ( struct cpufreq_acpi_io *data; unsigned int result = 0; - dprintk("acpi_cpufreq_cpu_init\n"); + pr_debug("acpi_cpufreq_cpu_init\n"); data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); if (!data) @@ -288,7 +286,7 @@ acpi_cpufreq_cpu_init ( /* capability check */ if (data->acpi_data.state_count <= 1) { - dprintk("No P-States\n"); + pr_debug("No P-States\n"); result = -ENODEV; goto err_unreg; } @@ -297,7 +295,7 @@ acpi_cpufreq_cpu_init ( ACPI_ADR_SPACE_FIXED_HARDWARE) || (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { - dprintk("Unsupported address space [%d, %d]\n", + pr_debug("Unsupported address space [%d, %d]\n", (u32) (data->acpi_data.control_register.space_id), (u32) (data->acpi_data.status_register.space_id)); result = -ENODEV; @@ -348,7 +346,7 @@ acpi_cpufreq_cpu_init ( "activated.\n", cpu); for (i = 0; i < data->acpi_data.state_count; i++) - dprintk(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n", + pr_debug(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n", (i == data->acpi_data.state?'*':' '), i, (u32) data->acpi_data.states[i].core_frequency, (u32) data->acpi_data.states[i].power, @@ -383,7 +381,7 @@ acpi_cpufreq_cpu_exit ( { struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; - dprintk("acpi_cpufreq_cpu_exit\n"); + pr_debug("acpi_cpufreq_cpu_exit\n"); if (data) { cpufreq_frequency_table_put_attr(policy->cpu); @@ -418,7 +416,7 @@ static struct cpufreq_driver acpi_cpufreq_driver = { static int __init acpi_cpufreq_init (void) { - dprintk("acpi_cpufreq_init\n"); + pr_debug("acpi_cpufreq_init\n"); return cpufreq_register_driver(&acpi_cpufreq_driver); } @@ -427,7 +425,7 @@ acpi_cpufreq_init (void) static void __exit acpi_cpufreq_exit (void) { - dprintk("acpi_cpufreq_exit\n"); + pr_debug("acpi_cpufreq_exit\n"); cpufreq_unregister_driver(&acpi_cpufreq_driver); return; diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 1b811c61bdc6..f64097b5118a 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -31,8 +31,6 @@ static struct clocksource clocksource_cyclone = { .rating = 300, .read = read_cyclone, .mask = (1LL << 40) - 1, - .mult = 0, /*to be calculated*/ - .shift = 16, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -118,9 +116,7 @@ int __init init_cyclone_clock(void) /* initialize last tick */ cyclone_mc = cyclone_timer; clocksource_cyclone.fsys_mmio = cyclone_timer; - clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ, - clocksource_cyclone.shift); - clocksource_register(&clocksource_cyclone); + clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ); return 0; } diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 5b704740f160..782c3a357f24 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -31,6 +31,7 @@ #include <linux/irq.h> #include <linux/ratelimit.h> #include <linux/acpi.h> +#include <linux/sched.h> #include <asm/delay.h> #include <asm/intrinsics.h> @@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) smp_local_flush_tlb(); kstat_incr_irqs_this_cpu(irq, desc); } else if (unlikely(IS_RESCHEDULE(vector))) { + scheduler_ipi(); kstat_incr_irqs_this_cpu(irq, desc); } else { ia64_setreg(_IA64_REG_CR_TPR, vector); diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 156ad803d5b7..04440cc09b40 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -73,8 +73,6 @@ static struct clocksource clocksource_itc = { .rating = 350, .read = itc_get_cycles, .mask = CLOCKSOURCE_MASK(64), - .mult = 0, /*to be calculated*/ - .shift = 16, .flags = CLOCK_SOURCE_IS_CONTINUOUS, #ifdef CONFIG_PARAVIRT .resume = paravirt_clocksource_resume, @@ -365,11 +363,8 @@ ia64_init_itm (void) ia64_cpu_local_tick(); if (!itc_clocksource) { - /* Sort out mult/shift values: */ - clocksource_itc.mult = - clocksource_hz2mult(local_cpu_data->itc_freq, - clocksource_itc.shift); - clocksource_register(&clocksource_itc); + clocksource_register_hz(&clocksource_itc, + local_cpu_data->itc_freq); itc_clocksource = &clocksource_itc; } } diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 787de4a77d82..53c0ba004e9e 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -209,6 +209,7 @@ SECTIONS { data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET) { + _sdata = .; INIT_TASK_DATA(PAGE_SIZE) CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) READ_MOSTLY_DATA(SMP_CACHE_BYTES) diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h index f6c5617e16af..b214b5b0432d 100644 --- a/arch/ia64/kvm/vti.h +++ b/arch/ia64/kvm/vti.h @@ -83,13 +83,13 @@ union vac { unsigned long value; struct { - int a_int:1; - int a_from_int_cr:1; - int a_to_int_cr:1; - int a_from_psr:1; - int a_from_cpuid:1; - int a_cover:1; - int a_bsw:1; + unsigned int a_int:1; + unsigned int a_from_int_cr:1; + unsigned int a_to_int_cr:1; + unsigned int a_from_psr:1; + unsigned int a_from_cpuid:1; + unsigned int a_cover:1; + unsigned int a_bsw:1; long reserved:57; }; }; @@ -97,12 +97,12 @@ union vac { union vdc { unsigned long value; struct { - int d_vmsw:1; - int d_extint:1; - int d_ibr_dbr:1; - int d_pmc:1; - int d_to_pmd:1; - int d_itm:1; + unsigned int d_vmsw:1; + unsigned int d_extint:1; + unsigned int d_ibr_dbr:1; + unsigned int d_pmc:1; + unsigned int d_to_pmd:1; + unsigned int d_itm:1; long reserved:58; }; }; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 0799fea4c588..20b359376128 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -10,6 +10,7 @@ #include <linux/interrupt.h> #include <linux/kprobes.h> #include <linux/kdebug.h> +#include <linux/prefetch.h> #include <asm/pgtable.h> #include <asm/processor.h> diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c index 5cdd7e4a597c..f7b798993cea 100644 --- a/arch/ia64/oprofile/backtrace.c +++ b/arch/ia64/oprofile/backtrace.c @@ -29,7 +29,7 @@ typedef struct unsigned int depth; struct pt_regs *regs; struct unw_frame_info frame; - u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */ + unsigned long *prev_pfs_loc; /* state for WAR for old spinlock ool code */ } ia64_backtrace_t; /* Returns non-zero if the PC is in the Interrupt Vector Table */ diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index 21d6f09e3447..c34efda122e1 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -33,8 +33,6 @@ static struct clocksource clocksource_sn2 = { .rating = 450, .read = read_sn2, .mask = (1LL << 55) - 1, - .mult = 0, - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -57,9 +55,7 @@ ia64_sn_udelay (unsigned long usecs) void __init sn_timer_init(void) { clocksource_sn2.fsys_mmio = RTC_COUNTER_ADDR; - clocksource_sn2.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, - clocksource_sn2.shift); - clocksource_register(&clocksource_sn2); + clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second); ia64_udelay = &ia64_sn_udelay; } diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c index 108bb858acf2..b279e142c633 100644 --- a/arch/ia64/xen/irq_xen.c +++ b/arch/ia64/xen/irq_xen.c @@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt; static int xen_slab_ready; #ifdef CONFIG_SMP +#include <linux/sched.h> + /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ, * it ends up to issue several memory accesses upon percpu data and * thus adds unnecessary traffic to other paths. @@ -99,7 +101,13 @@ static int xen_slab_ready; static irqreturn_t xen_dummy_handler(int irq, void *dev_id) { + return IRQ_HANDLED; +} +static irqreturn_t +xen_resched_handler(int irq, void *dev_id) +{ + scheduler_ipi(); return IRQ_HANDLED; } @@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = { }; static struct irqaction xen_resched_irqaction = { - .handler = xen_dummy_handler, + .handler = xen_resched_handler, .flags = IRQF_DISABLED, .name = "resched" }; diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index 31cef20b2996..fc10b39893d4 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c @@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id) * * Description: This routine executes on CPU which received * 'RESCHEDULE_IPI'. - * Rescheduling is processed at the exit of interrupt - * operation. * * Born on Date: 2002.02.05 * @@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id) *==========================================================================*/ void smp_reschedule_interrupt(void) { - /* nothing to do */ + scheduler_ipi(); } /*==========================================================================* diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index c194d64cdbb9..cf95aec77460 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -44,6 +44,7 @@ SECTIONS EXCEPTION_TABLE(16) NOTES + _sdata = .; /* Start of data section */ RODATA RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE) _edata = .; /* End of data section */ diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c index b995513d527f..95022b04b62d 100644 --- a/arch/m68k/atari/atakeyb.c +++ b/arch/m68k/atari/atakeyb.c @@ -36,13 +36,10 @@ /* Hook for MIDI serial driver */ void (*atari_MIDI_interrupt_hook) (void); -/* Hook for mouse driver */ -void (*atari_mouse_interrupt_hook) (char *); /* Hook for keyboard inputdev driver */ void (*atari_input_keyboard_interrupt_hook) (unsigned char, char); /* Hook for mouse inputdev driver */ void (*atari_input_mouse_interrupt_hook) (char *); -EXPORT_SYMBOL(atari_mouse_interrupt_hook); EXPORT_SYMBOL(atari_input_keyboard_interrupt_hook); EXPORT_SYMBOL(atari_input_mouse_interrupt_hook); @@ -263,8 +260,8 @@ repeat: kb_state.buf[kb_state.len++] = scancode; if (kb_state.len == 3) { kb_state.state = KEYBOARD; - if (atari_mouse_interrupt_hook) - atari_mouse_interrupt_hook(kb_state.buf); + if (atari_input_mouse_interrupt_hook) + atari_input_mouse_interrupt_hook(kb_state.buf); } break; @@ -575,7 +572,7 @@ int atari_keyb_init(void) kb_state.len = 0; error = request_irq(IRQ_MFP_ACIA, atari_keyboard_interrupt, - IRQ_TYPE_SLOW, "keyboard/mouse/MIDI", + IRQ_TYPE_SLOW, "keyboard,mouse,MIDI", atari_keyboard_interrupt); if (error) return error; diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c index 604329fafbb8..ddbf43ca8858 100644 --- a/arch/m68k/atari/stdma.c +++ b/arch/m68k/atari/stdma.c @@ -180,7 +180,7 @@ void __init stdma_init(void) { stdma_isr = NULL; if (request_irq(IRQ_MFP_FDC, stdma_int, IRQ_TYPE_SLOW | IRQF_SHARED, - "ST-DMA: floppy/ACSI/IDE/Falcon-SCSI", stdma_int)) + "ST-DMA floppy,ACSI,IDE,Falcon-SCSI", stdma_int)) pr_err("Couldn't register ST-DMA interrupt\n"); } diff --git a/arch/m68k/include/asm/atarikb.h b/arch/m68k/include/asm/atarikb.h index 546e7da5804f..68f3622bf591 100644 --- a/arch/m68k/include/asm/atarikb.h +++ b/arch/m68k/include/asm/atarikb.h @@ -34,8 +34,6 @@ void ikbd_joystick_disable(void); /* Hook for MIDI serial driver */ extern void (*atari_MIDI_interrupt_hook) (void); -/* Hook for mouse driver */ -extern void (*atari_mouse_interrupt_hook) (char *); /* Hook for keyboard inputdev driver */ extern void (*atari_input_keyboard_interrupt_hook) (unsigned char, char); /* Hook for mouse inputdev driver */ diff --git a/arch/m68k/include/asm/bitops_mm.h b/arch/m68k/include/asm/bitops_mm.h index 9d69f6e62365..e9020f88a748 100644 --- a/arch/m68k/include/asm/bitops_mm.h +++ b/arch/m68k/include/asm/bitops_mm.h @@ -181,14 +181,15 @@ static inline int find_first_zero_bit(const unsigned long *vaddr, { const unsigned long *p = vaddr; int res = 32; + unsigned int words; unsigned long num; if (!size) return 0; - size = (size + 31) >> 5; + words = (size + 31) >> 5; while (!(num = ~*p++)) { - if (!--size) + if (!--words) goto out; } @@ -196,7 +197,8 @@ static inline int find_first_zero_bit(const unsigned long *vaddr, : "=d" (res) : "d" (num & -num)); res ^= 31; out: - return ((long)p - (long)vaddr - 4) * 8 + res; + res += ((long)p - (long)vaddr - 4) * 8; + return res < size ? res : size; } static inline int find_next_zero_bit(const unsigned long *vaddr, int size, @@ -215,27 +217,32 @@ static inline int find_next_zero_bit(const unsigned long *vaddr, int size, /* Look for zero in first longword */ __asm__ __volatile__ ("bfffo %1{#0,#0},%0" : "=d" (res) : "d" (num & -num)); - if (res < 32) - return offset + (res ^ 31); + if (res < 32) { + offset += res ^ 31; + return offset < size ? offset : size; + } offset += 32; + + if (offset >= size) + return size; } /* No zero yet, search remaining full bytes for a zero */ - res = find_first_zero_bit(p, size - ((long)p - (long)vaddr) * 8); - return offset + res; + return offset + find_first_zero_bit(p, size - offset); } static inline int find_first_bit(const unsigned long *vaddr, unsigned size) { const unsigned long *p = vaddr; int res = 32; + unsigned int words; unsigned long num; if (!size) return 0; - size = (size + 31) >> 5; + words = (size + 31) >> 5; while (!(num = *p++)) { - if (!--size) + if (!--words) goto out; } @@ -243,7 +250,8 @@ static inline int find_first_bit(const unsigned long *vaddr, unsigned size) : "=d" (res) : "d" (num & -num)); res ^= 31; out: - return ((long)p - (long)vaddr - 4) * 8 + res; + res += ((long)p - (long)vaddr - 4) * 8; + return res < size ? res : size; } static inline int find_next_bit(const unsigned long *vaddr, int size, @@ -262,13 +270,17 @@ static inline int find_next_bit(const unsigned long *vaddr, int size, /* Look for one in first longword */ __asm__ __volatile__ ("bfffo %1{#0,#0},%0" : "=d" (res) : "d" (num & -num)); - if (res < 32) - return offset + (res ^ 31); + if (res < 32) { + offset += res ^ 31; + return offset < size ? offset : size; + } offset += 32; + + if (offset >= size) + return size; } /* No one yet, search remaining full bytes for a one */ - res = find_first_bit(p, size - ((long)p - (long)vaddr) * 8); - return offset + res; + return offset + find_first_bit(p, size - offset); } /* @@ -366,23 +378,25 @@ static inline int test_bit_le(int nr, const void *vaddr) static inline int find_first_zero_bit_le(const void *vaddr, unsigned size) { const unsigned long *p = vaddr, *addr = vaddr; - int res; + int res = 0; + unsigned int words; if (!size) return 0; - size = (size >> 5) + ((size & 31) > 0); - while (*p++ == ~0UL) - { - if (--size == 0) - return (p - addr) << 5; + words = (size >> 5) + ((size & 31) > 0); + while (*p++ == ~0UL) { + if (--words == 0) + goto out; } --p; for (res = 0; res < 32; res++) if (!test_bit_le(res, p)) break; - return (p - addr) * 32 + res; +out: + res += (p - addr) * 32; + return res < size ? res : size; } static inline unsigned long find_next_zero_bit_le(const void *addr, @@ -400,10 +414,15 @@ static inline unsigned long find_next_zero_bit_le(const void *addr, offset -= bit; /* Look for zero in first longword */ for (res = bit; res < 32; res++) - if (!test_bit_le(res, p)) - return offset + res; + if (!test_bit_le(res, p)) { + offset += res; + return offset < size ? offset : size; + } p++; offset += 32; + + if (offset >= size) + return size; } /* No zero yet, search remaining full bytes for a zero */ return offset + find_first_zero_bit_le(p, size - offset); @@ -412,22 +431,25 @@ static inline unsigned long find_next_zero_bit_le(const void *addr, static inline int find_first_bit_le(const void *vaddr, unsigned size) { const unsigned long *p = vaddr, *addr = vaddr; - int res; + int res = 0; + unsigned int words; if (!size) return 0; - size = (size >> 5) + ((size & 31) > 0); + words = (size >> 5) + ((size & 31) > 0); while (*p++ == 0UL) { - if (--size == 0) - return (p - addr) << 5; + if (--words == 0) + goto out; } --p; for (res = 0; res < 32; res++) if (test_bit_le(res, p)) break; - return (p - addr) * 32 + res; +out: + res += (p - addr) * 32; + return res < size ? res : size; } static inline unsigned long find_next_bit_le(const void *addr, @@ -445,10 +467,15 @@ static inline unsigned long find_next_bit_le(const void *addr, offset -= bit; /* Look for one in first longword */ for (res = bit; res < 32; res++) - if (test_bit_le(res, p)) - return offset + res; + if (test_bit_le(res, p)) { + offset += res; + return offset < size ? offset : size; + } p++; offset += 32; + + if (offset >= size) + return size; } /* No set bit yet, search remaining full bytes for a set bit */ return offset + find_first_bit_le(p, size - offset); diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 29e17907d9f2..f3b649de2a1b 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -22,7 +22,7 @@ #define __NR_mknod 14 #define __NR_chmod 15 #define __NR_chown 16 -#define __NR_break 17 +/*#define __NR_break 17*/ #define __NR_oldstat 18 #define __NR_lseek 19 #define __NR_getpid 20 @@ -36,11 +36,11 @@ #define __NR_oldfstat 28 #define __NR_pause 29 #define __NR_utime 30 -#define __NR_stty 31 -#define __NR_gtty 32 +/*#define __NR_stty 31*/ +/*#define __NR_gtty 32*/ #define __NR_access 33 #define __NR_nice 34 -#define __NR_ftime 35 +/*#define __NR_ftime 35*/ #define __NR_sync 36 #define __NR_kill 37 #define __NR_rename 38 @@ -49,7 +49,7 @@ #define __NR_dup 41 #define __NR_pipe 42 #define __NR_times 43 -#define __NR_prof 44 +/*#define __NR_prof 44*/ #define __NR_brk 45 #define __NR_setgid 46 #define __NR_getgid 47 @@ -58,13 +58,13 @@ #define __NR_getegid 50 #define __NR_acct 51 #define __NR_umount2 52 -#define __NR_lock 53 +/*#define __NR_lock 53*/ #define __NR_ioctl 54 #define __NR_fcntl 55 -#define __NR_mpx 56 +/*#define __NR_mpx 56*/ #define __NR_setpgid 57 -#define __NR_ulimit 58 -#define __NR_oldolduname 59 +/*#define __NR_ulimit 58*/ +/*#define __NR_oldolduname 59*/ #define __NR_umask 60 #define __NR_chroot 61 #define __NR_ustat 62 @@ -103,10 +103,10 @@ #define __NR_fchown 95 #define __NR_getpriority 96 #define __NR_setpriority 97 -#define __NR_profil 98 +/*#define __NR_profil 98*/ #define __NR_statfs 99 #define __NR_fstatfs 100 -#define __NR_ioperm 101 +/*#define __NR_ioperm 101*/ #define __NR_socketcall 102 #define __NR_syslog 103 #define __NR_setitimer 104 @@ -114,11 +114,11 @@ #define __NR_stat 106 #define __NR_lstat 107 #define __NR_fstat 108 -#define __NR_olduname 109 -#define __NR_iopl /* 110 */ not supported +/*#define __NR_olduname 109*/ +/*#define __NR_iopl 110*/ /* not supported */ #define __NR_vhangup 111 -#define __NR_idle /* 112 */ Obsolete -#define __NR_vm86 /* 113 */ not supported +/*#define __NR_idle 112*/ /* Obsolete */ +/*#define __NR_vm86 113*/ /* not supported */ #define __NR_wait4 114 #define __NR_swapoff 115 #define __NR_sysinfo 116 @@ -132,17 +132,17 @@ #define __NR_adjtimex 124 #define __NR_mprotect 125 #define __NR_sigprocmask 126 -#define __NR_create_module 127 +/*#define __NR_create_module 127*/ #define __NR_init_module 128 #define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 +/*#define __NR_get_kernel_syms 130*/ #define __NR_quotactl 131 #define __NR_getpgid 132 #define __NR_fchdir 133 #define __NR_bdflush 134 #define __NR_sysfs 135 #define __NR_personality 136 -#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +/*#define __NR_afs_syscall 137*/ /* Syscall for Andrew File System */ #define __NR_setfsuid 138 #define __NR_setfsgid 139 #define __NR__llseek 140 @@ -172,7 +172,7 @@ #define __NR_setresuid 164 #define __NR_getresuid 165 #define __NR_getpagesize 166 -#define __NR_query_module 167 +/*#define __NR_query_module 167*/ #define __NR_poll 168 #define __NR_nfsservctl 169 #define __NR_setresgid 170 @@ -193,8 +193,8 @@ #define __NR_capset 185 #define __NR_sigaltstack 186 #define __NR_sendfile 187 -#define __NR_getpmsg 188 /* some people actually want streams */ -#define __NR_putpmsg 189 /* some people actually want streams */ +/*#define __NR_getpmsg 188*/ /* some people actually want streams */ +/*#define __NR_putpmsg 189*/ /* some people actually want streams */ #define __NR_vfork 190 #define __NR_ugetrlimit 191 #define __NR_mmap2 192 @@ -223,6 +223,8 @@ #define __NR_setfsuid32 215 #define __NR_setfsgid32 216 #define __NR_pivot_root 217 +/* 218*/ +/* 219*/ #define __NR_getdents64 220 #define __NR_gettid 221 #define __NR_tkill 222 @@ -281,7 +283,7 @@ #define __NR_mq_notify 275 #define __NR_mq_getsetattr 276 #define __NR_waitid 277 -#define __NR_vserver 278 +/*#define __NR_vserver 278*/ #define __NR_add_key 279 #define __NR_request_key 280 #define __NR_keyctl 281 diff --git a/arch/m68k/kernel/Makefile_mm b/arch/m68k/kernel/Makefile_mm index 55d5d6b680a2..aced67804579 100644 --- a/arch/m68k/kernel/Makefile_mm +++ b/arch/m68k/kernel/Makefile_mm @@ -10,7 +10,7 @@ endif extra-y += vmlinux.lds obj-y := entry.o process.o traps.o ints.o signal.o ptrace.o module.o \ - sys_m68k.o time.o setup.o m68k_ksyms.o devres.o + sys_m68k.o time.o setup.o m68k_ksyms.o devres.o syscalltable.o devres-y = ../../../kernel/irq/devres.o diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S index 1359ee659574..bd0ec05263b2 100644 --- a/arch/m68k/kernel/entry_mm.S +++ b/arch/m68k/kernel/entry_mm.S @@ -407,351 +407,3 @@ resume: rts -.data -ALIGN -sys_call_table: - .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ - .long sys_exit - .long sys_fork - .long sys_read - .long sys_write - .long sys_open /* 5 */ - .long sys_close - .long sys_waitpid - .long sys_creat - .long sys_link - .long sys_unlink /* 10 */ - .long sys_execve - .long sys_chdir - .long sys_time - .long sys_mknod - .long sys_chmod /* 15 */ - .long sys_chown16 - .long sys_ni_syscall /* old break syscall holder */ - .long sys_stat - .long sys_lseek - .long sys_getpid /* 20 */ - .long sys_mount - .long sys_oldumount - .long sys_setuid16 - .long sys_getuid16 - .long sys_stime /* 25 */ - .long sys_ptrace - .long sys_alarm - .long sys_fstat - .long sys_pause - .long sys_utime /* 30 */ - .long sys_ni_syscall /* old stty syscall holder */ - .long sys_ni_syscall /* old gtty syscall holder */ - .long sys_access - .long sys_nice - .long sys_ni_syscall /* 35 */ /* old ftime syscall holder */ - .long sys_sync - .long sys_kill - .long sys_rename - .long sys_mkdir - .long sys_rmdir /* 40 */ - .long sys_dup - .long sys_pipe - .long sys_times - .long sys_ni_syscall /* old prof syscall holder */ - .long sys_brk /* 45 */ - .long sys_setgid16 - .long sys_getgid16 - .long sys_signal - .long sys_geteuid16 - .long sys_getegid16 /* 50 */ - .long sys_acct - .long sys_umount /* recycled never used phys() */ - .long sys_ni_syscall /* old lock syscall holder */ - .long sys_ioctl - .long sys_fcntl /* 55 */ - .long sys_ni_syscall /* old mpx syscall holder */ - .long sys_setpgid - .long sys_ni_syscall /* old ulimit syscall holder */ - .long sys_ni_syscall - .long sys_umask /* 60 */ - .long sys_chroot - .long sys_ustat - .long sys_dup2 - .long sys_getppid - .long sys_getpgrp /* 65 */ - .long sys_setsid - .long sys_sigaction - .long sys_sgetmask - .long sys_ssetmask - .long sys_setreuid16 /* 70 */ - .long sys_setregid16 - .long sys_sigsuspend - .long sys_sigpending - .long sys_sethostname - .long sys_setrlimit /* 75 */ - .long sys_old_getrlimit - .long sys_getrusage - .long sys_gettimeofday - .long sys_settimeofday - .long sys_getgroups16 /* 80 */ - .long sys_setgroups16 - .long sys_old_select - .long sys_symlink - .long sys_lstat - .long sys_readlink /* 85 */ - .long sys_uselib - .long sys_swapon - .long sys_reboot - .long sys_old_readdir - .long sys_old_mmap /* 90 */ - .long sys_munmap - .long sys_truncate - .long sys_ftruncate - .long sys_fchmod - .long sys_fchown16 /* 95 */ - .long sys_getpriority - .long sys_setpriority - .long sys_ni_syscall /* old profil syscall holder */ - .long sys_statfs - .long sys_fstatfs /* 100 */ - .long sys_ni_syscall /* ioperm for i386 */ - .long sys_socketcall - .long sys_syslog - .long sys_setitimer - .long sys_getitimer /* 105 */ - .long sys_newstat - .long sys_newlstat - .long sys_newfstat - .long sys_ni_syscall - .long sys_ni_syscall /* 110 */ /* iopl for i386 */ - .long sys_vhangup - .long sys_ni_syscall /* obsolete idle() syscall */ - .long sys_ni_syscall /* vm86old for i386 */ - .long sys_wait4 - .long sys_swapoff /* 115 */ - .long sys_sysinfo - .long sys_ipc - .long sys_fsync - .long sys_sigreturn - .long sys_clone /* 120 */ - .long sys_setdomainname - .long sys_newuname - .long sys_cacheflush /* modify_ldt for i386 */ - .long sys_adjtimex - .long sys_mprotect /* 125 */ - .long sys_sigprocmask - .long sys_ni_syscall /* old "create_module" */ - .long sys_init_module - .long sys_delete_module - .long sys_ni_syscall /* 130 - old "get_kernel_syms" */ - .long sys_quotactl - .long sys_getpgid - .long sys_fchdir - .long sys_bdflush - .long sys_sysfs /* 135 */ - .long sys_personality - .long sys_ni_syscall /* for afs_syscall */ - .long sys_setfsuid16 - .long sys_setfsgid16 - .long sys_llseek /* 140 */ - .long sys_getdents - .long sys_select - .long sys_flock - .long sys_msync - .long sys_readv /* 145 */ - .long sys_writev - .long sys_getsid - .long sys_fdatasync - .long sys_sysctl - .long sys_mlock /* 150 */ - .long sys_munlock - .long sys_mlockall - .long sys_munlockall - .long sys_sched_setparam - .long sys_sched_getparam /* 155 */ - .long sys_sched_setscheduler - .long sys_sched_getscheduler - .long sys_sched_yield - .long sys_sched_get_priority_max - .long sys_sched_get_priority_min /* 160 */ - .long sys_sched_rr_get_interval - .long sys_nanosleep - .long sys_mremap - .long sys_setresuid16 - .long sys_getresuid16 /* 165 */ - .long sys_getpagesize - .long sys_ni_syscall /* old sys_query_module */ - .long sys_poll - .long sys_nfsservctl - .long sys_setresgid16 /* 170 */ - .long sys_getresgid16 - .long sys_prctl - .long sys_rt_sigreturn - .long sys_rt_sigaction - .long sys_rt_sigprocmask /* 175 */ - .long sys_rt_sigpending - .long sys_rt_sigtimedwait - .long sys_rt_sigqueueinfo - .long sys_rt_sigsuspend - .long sys_pread64 /* 180 */ - .long sys_pwrite64 - .long sys_lchown16; - .long sys_getcwd - .long sys_capget - .long sys_capset /* 185 */ - .long sys_sigaltstack - .long sys_sendfile - .long sys_ni_syscall /* streams1 */ - .long sys_ni_syscall /* streams2 */ - .long sys_vfork /* 190 */ - .long sys_getrlimit - .long sys_mmap2 - .long sys_truncate64 - .long sys_ftruncate64 - .long sys_stat64 /* 195 */ - .long sys_lstat64 - .long sys_fstat64 - .long sys_chown - .long sys_getuid - .long sys_getgid /* 200 */ - .long sys_geteuid - .long sys_getegid - .long sys_setreuid - .long sys_setregid - .long sys_getgroups /* 205 */ - .long sys_setgroups - .long sys_fchown - .long sys_setresuid - .long sys_getresuid - .long sys_setresgid /* 210 */ - .long sys_getresgid - .long sys_lchown - .long sys_setuid - .long sys_setgid - .long sys_setfsuid /* 215 */ - .long sys_setfsgid - .long sys_pivot_root - .long sys_ni_syscall - .long sys_ni_syscall - .long sys_getdents64 /* 220 */ - .long sys_gettid - .long sys_tkill - .long sys_setxattr - .long sys_lsetxattr - .long sys_fsetxattr /* 225 */ - .long sys_getxattr - .long sys_lgetxattr - .long sys_fgetxattr - .long sys_listxattr - .long sys_llistxattr /* 230 */ - .long sys_flistxattr - .long sys_removexattr - .long sys_lremovexattr - .long sys_fremovexattr - .long sys_futex /* 235 */ - .long sys_sendfile64 - .long sys_mincore - .long sys_madvise - .long sys_fcntl64 - .long sys_readahead /* 240 */ - .long sys_io_setup - .long sys_io_destroy - .long sys_io_getevents - .long sys_io_submit - .long sys_io_cancel /* 245 */ - .long sys_fadvise64 - .long sys_exit_group - .long sys_lookup_dcookie - .long sys_epoll_create - .long sys_epoll_ctl /* 250 */ - .long sys_epoll_wait - .long sys_remap_file_pages - .long sys_set_tid_address - .long sys_timer_create - .long sys_timer_settime /* 255 */ - .long sys_timer_gettime - .long sys_timer_getoverrun - .long sys_timer_delete - .long sys_clock_settime - .long sys_clock_gettime /* 260 */ - .long sys_clock_getres - .long sys_clock_nanosleep - .long sys_statfs64 - .long sys_fstatfs64 - .long sys_tgkill /* 265 */ - .long sys_utimes - .long sys_fadvise64_64 - .long sys_mbind - .long sys_get_mempolicy - .long sys_set_mempolicy /* 270 */ - .long sys_mq_open - .long sys_mq_unlink - .long sys_mq_timedsend - .long sys_mq_timedreceive - .long sys_mq_notify /* 275 */ - .long sys_mq_getsetattr - .long sys_waitid - .long sys_ni_syscall /* for sys_vserver */ - .long sys_add_key - .long sys_request_key /* 280 */ - .long sys_keyctl - .long sys_ioprio_set - .long sys_ioprio_get - .long sys_inotify_init - .long sys_inotify_add_watch /* 285 */ - .long sys_inotify_rm_watch - .long sys_migrate_pages - .long sys_openat - .long sys_mkdirat - .long sys_mknodat /* 290 */ - .long sys_fchownat - .long sys_futimesat - .long sys_fstatat64 - .long sys_unlinkat - .long sys_renameat /* 295 */ - .long sys_linkat - .long sys_symlinkat - .long sys_readlinkat - .long sys_fchmodat - .long sys_faccessat /* 300 */ - .long sys_ni_syscall /* Reserved for pselect6 */ - .long sys_ni_syscall /* Reserved for ppoll */ - .long sys_unshare - .long sys_set_robust_list - .long sys_get_robust_list /* 305 */ - .long sys_splice - .long sys_sync_file_range - .long sys_tee - .long sys_vmsplice - .long sys_move_pages /* 310 */ - .long sys_sched_setaffinity - .long sys_sched_getaffinity - .long sys_kexec_load - .long sys_getcpu - .long sys_epoll_pwait /* 315 */ - .long sys_utimensat - .long sys_signalfd - .long sys_timerfd_create - .long sys_eventfd - .long sys_fallocate /* 320 */ - .long sys_timerfd_settime - .long sys_timerfd_gettime - .long sys_signalfd4 - .long sys_eventfd2 - .long sys_epoll_create1 /* 325 */ - .long sys_dup3 - .long sys_pipe2 - .long sys_inotify_init1 - .long sys_preadv - .long sys_pwritev /* 330 */ - .long sys_rt_tgsigqueueinfo - .long sys_perf_event_open - .long sys_get_thread_area - .long sys_set_thread_area - .long sys_atomic_cmpxchg_32 /* 335 */ - .long sys_atomic_barrier - .long sys_fanotify_init - .long sys_fanotify_mark - .long sys_prlimit64 - .long sys_name_to_handle_at /* 340 */ - .long sys_open_by_handle_at - .long sys_clock_adjtime - .long sys_syncfs - diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index 9b8393d8adb8..5909e392cb1e 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -1,6 +1,4 @@ /* - * linux/arch/m68knommu/kernel/syscalltable.S - * * Copyright (C) 2002, Greg Ungerer (gerg@snapgear.com) * * Based on older entry.S files, the following copyrights apply: @@ -9,171 +7,176 @@ * Kenneth Albanowski <kjahds@kjahds.com>, * Copyright (C) 2000 Lineo Inc. (www.lineo.com) * Copyright (C) 1991, 1992 Linus Torvalds + * + * Linux/m68k support by Hamish Macdonald */ #include <linux/sys.h> #include <linux/linkage.h> -#include <asm/unistd.h> -.text +#ifndef CONFIG_MMU +#define sys_mmap2 sys_mmap_pgoff +#endif + +.section .rodata ALIGN ENTRY(sys_call_table) - .long sys_restart_syscall /* 0 - old "setup()" system call */ + .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit .long sys_fork .long sys_read .long sys_write - .long sys_open /* 5 */ + .long sys_open /* 5 */ .long sys_close .long sys_waitpid .long sys_creat .long sys_link - .long sys_unlink /* 10 */ + .long sys_unlink /* 10 */ .long sys_execve .long sys_chdir .long sys_time .long sys_mknod - .long sys_chmod /* 15 */ + .long sys_chmod /* 15 */ .long sys_chown16 - .long sys_ni_syscall /* old break syscall holder */ + .long sys_ni_syscall /* old break syscall holder */ .long sys_stat .long sys_lseek - .long sys_getpid /* 20 */ + .long sys_getpid /* 20 */ .long sys_mount .long sys_oldumount .long sys_setuid16 .long sys_getuid16 - .long sys_stime /* 25 */ + .long sys_stime /* 25 */ .long sys_ptrace .long sys_alarm .long sys_fstat .long sys_pause - .long sys_utime /* 30 */ - .long sys_ni_syscall /* old stty syscall holder */ - .long sys_ni_syscall /* old gtty syscall holder */ + .long sys_utime /* 30 */ + .long sys_ni_syscall /* old stty syscall holder */ + .long sys_ni_syscall /* old gtty syscall holder */ .long sys_access .long sys_nice - .long sys_ni_syscall /* 35 */ /* old ftime syscall holder */ + .long sys_ni_syscall /* 35 - old ftime syscall holder */ .long sys_sync .long sys_kill .long sys_rename .long sys_mkdir - .long sys_rmdir /* 40 */ + .long sys_rmdir /* 40 */ .long sys_dup .long sys_pipe .long sys_times - .long sys_ni_syscall /* old prof syscall holder */ - .long sys_brk /* 45 */ + .long sys_ni_syscall /* old prof syscall holder */ + .long sys_brk /* 45 */ .long sys_setgid16 .long sys_getgid16 .long sys_signal .long sys_geteuid16 - .long sys_getegid16 /* 50 */ + .long sys_getegid16 /* 50 */ .long sys_acct - .long sys_umount /* recycled never used phys() */ - .long sys_ni_syscall /* old lock syscall holder */ + .long sys_umount /* recycled never used phys() */ + .long sys_ni_syscall /* old lock syscall holder */ .long sys_ioctl - .long sys_fcntl /* 55 */ - .long sys_ni_syscall /* old mpx syscall holder */ + .long sys_fcntl /* 55 */ + .long sys_ni_syscall /* old mpx syscall holder */ .long sys_setpgid - .long sys_ni_syscall /* old ulimit syscall holder */ + .long sys_ni_syscall /* old ulimit syscall holder */ .long sys_ni_syscall - .long sys_umask /* 60 */ + .long sys_umask /* 60 */ .long sys_chroot .long sys_ustat .long sys_dup2 .long sys_getppid - .long sys_getpgrp /* 65 */ + .long sys_getpgrp /* 65 */ .long sys_setsid .long sys_sigaction .long sys_sgetmask .long sys_ssetmask - .long sys_setreuid16 /* 70 */ + .long sys_setreuid16 /* 70 */ .long sys_setregid16 .long sys_sigsuspend .long sys_sigpending .long sys_sethostname - .long sys_setrlimit /* 75 */ + .long sys_setrlimit /* 75 */ .long sys_old_getrlimit .long sys_getrusage .long sys_gettimeofday .long sys_settimeofday - .long sys_getgroups16 /* 80 */ + .long sys_getgroups16 /* 80 */ .long sys_setgroups16 .long sys_old_select .long sys_symlink .long sys_lstat - .long sys_readlink /* 85 */ + .long sys_readlink /* 85 */ .long sys_uselib - .long sys_ni_syscall /* sys_swapon */ + .long sys_swapon .long sys_reboot .long sys_old_readdir - .long sys_old_mmap /* 90 */ + .long sys_old_mmap /* 90 */ .long sys_munmap .long sys_truncate .long sys_ftruncate .long sys_fchmod - .long sys_fchown16 /* 95 */ + .long sys_fchown16 /* 95 */ .long sys_getpriority .long sys_setpriority - .long sys_ni_syscall /* old profil syscall holder */ + .long sys_ni_syscall /* old profil syscall holder */ .long sys_statfs - .long sys_fstatfs /* 100 */ - .long sys_ni_syscall /* ioperm for i386 */ + .long sys_fstatfs /* 100 */ + .long sys_ni_syscall /* ioperm for i386 */ .long sys_socketcall .long sys_syslog .long sys_setitimer - .long sys_getitimer /* 105 */ + .long sys_getitimer /* 105 */ .long sys_newstat .long sys_newlstat .long sys_newfstat .long sys_ni_syscall - .long sys_ni_syscall /* iopl for i386 */ /* 110 */ + .long sys_ni_syscall /* 110 - iopl for i386 */ .long sys_vhangup - .long sys_ni_syscall /* obsolete idle() syscall */ - .long sys_ni_syscall /* vm86old for i386 */ + .long sys_ni_syscall /* obsolete idle() syscall */ + .long sys_ni_syscall /* vm86old for i386 */ .long sys_wait4 - .long sys_ni_syscall /* 115 */ /* sys_swapoff */ + .long sys_swapoff /* 115 */ .long sys_sysinfo .long sys_ipc .long sys_fsync .long sys_sigreturn - .long sys_clone /* 120 */ + .long sys_clone /* 120 */ .long sys_setdomainname .long sys_newuname - .long sys_cacheflush /* modify_ldt for i386 */ + .long sys_cacheflush /* modify_ldt for i386 */ .long sys_adjtimex - .long sys_ni_syscall /* 125 */ /* sys_mprotect */ + .long sys_mprotect /* 125 */ .long sys_sigprocmask - .long sys_ni_syscall /* old "creat_module" */ + .long sys_ni_syscall /* old "create_module" */ .long sys_init_module .long sys_delete_module - .long sys_ni_syscall /* 130: old "get_kernel_syms" */ + .long sys_ni_syscall /* 130 - old "get_kernel_syms" */ .long sys_quotactl .long sys_getpgid .long sys_fchdir .long sys_bdflush - .long sys_sysfs /* 135 */ + .long sys_sysfs /* 135 */ .long sys_personality - .long sys_ni_syscall /* for afs_syscall */ + .long sys_ni_syscall /* for afs_syscall */ .long sys_setfsuid16 .long sys_setfsgid16 - .long sys_llseek /* 140 */ + .long sys_llseek /* 140 */ .long sys_getdents .long sys_select .long sys_flock - .long sys_ni_syscall /* sys_msync */ - .long sys_readv /* 145 */ + .long sys_msync + .long sys_readv /* 145 */ .long sys_writev .long sys_getsid .long sys_fdatasync .long sys_sysctl - .long sys_ni_syscall /* 150 */ /* sys_mlock */ - .long sys_ni_syscall /* sys_munlock */ - .long sys_ni_syscall /* sys_mlockall */ - .long sys_ni_syscall /* sys_munlockall */ + .long sys_mlock /* 150 */ + .long sys_munlock + .long sys_mlockall + .long sys_munlockall .long sys_sched_setparam - .long sys_sched_getparam /* 155 */ + .long sys_sched_getparam /* 155 */ .long sys_sched_setscheduler .long sys_sched_getscheduler .long sys_sched_yield @@ -181,124 +184,124 @@ ENTRY(sys_call_table) .long sys_sched_get_priority_min /* 160 */ .long sys_sched_rr_get_interval .long sys_nanosleep - .long sys_ni_syscall /* sys_mremap */ + .long sys_mremap .long sys_setresuid16 - .long sys_getresuid16 /* 165 */ - .long sys_getpagesize /* sys_getpagesize */ - .long sys_ni_syscall /* old "query_module" */ + .long sys_getresuid16 /* 165 */ + .long sys_getpagesize + .long sys_ni_syscall /* old "query_module" */ .long sys_poll - .long sys_ni_syscall /* sys_nfsservctl */ - .long sys_setresgid16 /* 170 */ + .long sys_nfsservctl + .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl .long sys_rt_sigreturn .long sys_rt_sigaction - .long sys_rt_sigprocmask /* 175 */ + .long sys_rt_sigprocmask /* 175 */ .long sys_rt_sigpending .long sys_rt_sigtimedwait .long sys_rt_sigqueueinfo .long sys_rt_sigsuspend - .long sys_pread64 /* 180 */ + .long sys_pread64 /* 180 */ .long sys_pwrite64 .long sys_lchown16 .long sys_getcwd .long sys_capget - .long sys_capset /* 185 */ + .long sys_capset /* 185 */ .long sys_sigaltstack .long sys_sendfile - .long sys_ni_syscall /* streams1 */ - .long sys_ni_syscall /* streams2 */ - .long sys_vfork /* 190 */ + .long sys_ni_syscall /* streams1 */ + .long sys_ni_syscall /* streams2 */ + .long sys_vfork /* 190 */ .long sys_getrlimit - .long sys_mmap_pgoff + .long sys_mmap2 .long sys_truncate64 .long sys_ftruncate64 - .long sys_stat64 /* 195 */ + .long sys_stat64 /* 195 */ .long sys_lstat64 .long sys_fstat64 .long sys_chown .long sys_getuid - .long sys_getgid /* 200 */ + .long sys_getgid /* 200 */ .long sys_geteuid .long sys_getegid .long sys_setreuid .long sys_setregid - .long sys_getgroups /* 205 */ + .long sys_getgroups /* 205 */ .long sys_setgroups .long sys_fchown .long sys_setresuid .long sys_getresuid - .long sys_setresgid /* 210 */ + .long sys_setresgid /* 210 */ .long sys_getresgid .long sys_lchown .long sys_setuid .long sys_setgid - .long sys_setfsuid /* 215 */ + .long sys_setfsuid /* 215 */ .long sys_setfsgid .long sys_pivot_root .long sys_ni_syscall .long sys_ni_syscall - .long sys_getdents64 /* 220 */ + .long sys_getdents64 /* 220 */ .long sys_gettid .long sys_tkill .long sys_setxattr .long sys_lsetxattr - .long sys_fsetxattr /* 225 */ + .long sys_fsetxattr /* 225 */ .long sys_getxattr .long sys_lgetxattr .long sys_fgetxattr .long sys_listxattr - .long sys_llistxattr /* 230 */ + .long sys_llistxattr /* 230 */ .long sys_flistxattr .long sys_removexattr .long sys_lremovexattr .long sys_fremovexattr - .long sys_futex /* 235 */ + .long sys_futex /* 235 */ .long sys_sendfile64 - .long sys_ni_syscall /* sys_mincore */ - .long sys_ni_syscall /* sys_madvise */ + .long sys_mincore + .long sys_madvise .long sys_fcntl64 - .long sys_readahead /* 240 */ + .long sys_readahead /* 240 */ .long sys_io_setup .long sys_io_destroy .long sys_io_getevents .long sys_io_submit - .long sys_io_cancel /* 245 */ + .long sys_io_cancel /* 245 */ .long sys_fadvise64 .long sys_exit_group .long sys_lookup_dcookie .long sys_epoll_create - .long sys_epoll_ctl /* 250 */ + .long sys_epoll_ctl /* 250 */ .long sys_epoll_wait - .long sys_ni_syscall /* sys_remap_file_pages */ + .long sys_remap_file_pages .long sys_set_tid_address .long sys_timer_create - .long sys_timer_settime /* 255 */ + .long sys_timer_settime /* 255 */ .long sys_timer_gettime .long sys_timer_getoverrun .long sys_timer_delete .long sys_clock_settime - .long sys_clock_gettime /* 260 */ + .long sys_clock_gettime /* 260 */ .long sys_clock_getres .long sys_clock_nanosleep .long sys_statfs64 .long sys_fstatfs64 - .long sys_tgkill /* 265 */ + .long sys_tgkill /* 265 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_mbind + .long sys_mbind .long sys_get_mempolicy - .long sys_set_mempolicy /* 270 */ + .long sys_set_mempolicy /* 270 */ .long sys_mq_open .long sys_mq_unlink .long sys_mq_timedsend .long sys_mq_timedreceive - .long sys_mq_notify /* 275 */ + .long sys_mq_notify /* 275 */ .long sys_mq_getsetattr .long sys_waitid - .long sys_ni_syscall /* for sys_vserver */ + .long sys_ni_syscall /* for sys_vserver */ .long sys_add_key - .long sys_request_key /* 280 */ + .long sys_request_key /* 280 */ .long sys_keyctl .long sys_ioprio_set .long sys_ioprio_get @@ -319,8 +322,8 @@ ENTRY(sys_call_table) .long sys_readlinkat .long sys_fchmodat .long sys_faccessat /* 300 */ - .long sys_ni_syscall /* Reserved for pselect6 */ - .long sys_ni_syscall /* Reserved for ppoll */ + .long sys_pselect6 + .long sys_ppoll .long sys_unshare .long sys_set_robust_list .long sys_get_robust_list /* 305 */ @@ -363,7 +366,3 @@ ENTRY(sys_call_table) .long sys_clock_adjtime .long sys_syncfs - .rept NR_syscalls-(.-sys_call_table)/4 - .long sys_ni_syscall - .endr - diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 878be5f38cad..d0993594f558 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -25,6 +25,8 @@ SECTIONS EXCEPTION_TABLE(16) + _sdata = .; /* Start of data section */ + RODATA RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 1ad6b7ad2c17..8080469ee6c1 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -25,6 +25,7 @@ SECTIONS _etext = .; /* End of text section */ EXCEPTION_TABLE(16) :data + _sdata = .; /* Start of rw data section */ RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) :data /* End of data goes *here* so that freeing init code works properly. */ _edata = .; diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index d8a214f11ac2..e5550ce4e0eb 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -217,16 +217,12 @@ static struct clocksource clocksource_microblaze = { .rating = 300, .read = microblaze_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 8, /* I can shift it */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static int __init microblaze_clocksource_init(void) { - clocksource_microblaze.mult = - clocksource_hz2mult(timer_clock_freq, - clocksource_microblaze.shift); - if (clocksource_register(&clocksource_microblaze)) + if (clocksource_register_hz(&clocksource_microblaze, timer_clock_freq)) panic("failed to register clocksource"); /* stop timer1 */ diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index 7ff9b5492041..aef6c917b45a 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -11,6 +11,7 @@ platforms += dec platforms += emma platforms += jazz platforms += jz4740 +platforms += lantiq platforms += lasat platforms += loongson platforms += mipssim diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 351c80fbba7e..cef1a854487d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -212,6 +212,24 @@ config MACH_JZ4740 select HAVE_PWM select HAVE_CLK +config LANTIQ + bool "Lantiq based platforms" + select DMA_NONCOHERENT + select IRQ_CPU + select CEVT_R4K + select CSRC_R4K + select SYS_HAS_CPU_MIPS32_R1 + select SYS_HAS_CPU_MIPS32_R2 + select SYS_SUPPORTS_BIG_ENDIAN + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_MULTITHREADING + select SYS_HAS_EARLY_PRINTK + select ARCH_REQUIRE_GPIOLIB + select SWAP_IO_SPACE + select BOOT_RAW + select HAVE_CLK + select MIPS_MACHINE + config LASAT bool "LASAT Networks platforms" select CEVT_R4K @@ -736,6 +754,33 @@ config CAVIUM_OCTEON_REFERENCE_BOARD Hikari Say Y here for most Octeon reference boards. +config NLM_XLR_BOARD + bool "Netlogic XLR/XLS based systems" + depends on EXPERIMENTAL + select BOOT_ELF32 + select NLM_COMMON + select NLM_XLR + select SYS_HAS_CPU_XLR + select SYS_SUPPORTS_SMP + select HW_HAS_PCI + select SWAP_IO_SPACE + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_64BIT_KERNEL + select 64BIT_PHYS_ADDR + select SYS_SUPPORTS_BIG_ENDIAN + select SYS_SUPPORTS_HIGHMEM + select DMA_COHERENT + select NR_CPUS_DEFAULT_32 + select CEVT_R4K + select CSRC_R4K + select IRQ_CPU + select ZONE_DMA if 64BIT + select SYNC_R4K + select SYS_HAS_EARLY_PRINTK + help + Support for systems based on Netlogic XLR and XLS processors. + Say Y here if you have a XLR or XLS based board. + endchoice source "arch/mips/alchemy/Kconfig" @@ -743,6 +788,7 @@ source "arch/mips/ath79/Kconfig" source "arch/mips/bcm63xx/Kconfig" source "arch/mips/jazz/Kconfig" source "arch/mips/jz4740/Kconfig" +source "arch/mips/lantiq/Kconfig" source "arch/mips/lasat/Kconfig" source "arch/mips/pmc-sierra/Kconfig" source "arch/mips/powertv/Kconfig" @@ -752,6 +798,7 @@ source "arch/mips/txx9/Kconfig" source "arch/mips/vr41xx/Kconfig" source "arch/mips/cavium-octeon/Kconfig" source "arch/mips/loongson/Kconfig" +source "arch/mips/netlogic/Kconfig" endmenu @@ -1420,6 +1467,17 @@ config CPU_BMIPS5000 help Broadcom BMIPS5000 processors. +config CPU_XLR + bool "Netlogic XLR SoC" + depends on SYS_HAS_CPU_XLR + select CPU_SUPPORTS_32BIT_KERNEL + select CPU_SUPPORTS_64BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM + select WEAK_ORDERING + select WEAK_REORDERING_BEYOND_LLSC + select CPU_SUPPORTS_HUGEPAGES + help + Netlogic Microsystems XLR/XLS processors. endchoice if CPU_LOONGSON2F @@ -1550,6 +1608,9 @@ config SYS_HAS_CPU_BMIPS4380 config SYS_HAS_CPU_BMIPS5000 bool +config SYS_HAS_CPU_XLR + bool + # # CPU may reorder R->R, R->W, W->R, W->W # Reordering beyond LL and SC is handled in WEAK_REORDERING_BEYOND_LLSC @@ -2334,6 +2395,7 @@ config MMU config I8253 bool + select CLKSRC_I8253 select MIPS_EXTERNAL_TIMER config ZONE_DMA32 diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 53e3514ba10e..884819cd0607 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -191,6 +191,18 @@ endif # include $(srctree)/arch/mips/Kbuild.platforms +# +# NETLOGIC SOC Common (common) +# +cflags-$(CONFIG_NLM_COMMON) += -I$(srctree)/arch/mips/include/asm/mach-netlogic +cflags-$(CONFIG_NLM_COMMON) += -I$(srctree)/arch/mips/include/asm/netlogic + +# +# NETLOGIC XLR/XLS SoC, Simulator and boards +# +core-$(CONFIG_NLM_XLR) += arch/mips/netlogic/xlr/ +load-$(CONFIG_NLM_XLR_BOARD) += 0xffffffff84000000 + cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic drivers-$(CONFIG_PCI) += arch/mips/pci/ diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c index ca0506a8585a..3a5abb54d505 100644 --- a/arch/mips/alchemy/common/dbdma.c +++ b/arch/mips/alchemy/common/dbdma.c @@ -36,7 +36,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/module.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <asm/mach-au1x00/au1000.h> #include <asm/mach-au1x00/au1xxx_dbdma.h> @@ -58,7 +58,8 @@ static DEFINE_SPINLOCK(au1xxx_dbdma_spin_lock); /* I couldn't find a macro that did this... */ #define ALIGN_ADDR(x, a) ((((u32)(x)) + (a-1)) & ~(a-1)) -static dbdma_global_t *dbdma_gptr = (dbdma_global_t *)DDMA_GLOBAL_BASE; +static dbdma_global_t *dbdma_gptr = + (dbdma_global_t *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR); static int dbdma_initialized; static dbdev_tab_t dbdev_tab[] = { @@ -299,7 +300,7 @@ u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid, if (ctp != NULL) { memset(ctp, 0, sizeof(chan_tab_t)); ctp->chan_index = chan = i; - dcp = DDMA_CHANNEL_BASE; + dcp = KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR); dcp += (0x0100 * chan); ctp->chan_ptr = (au1x_dma_chan_t *)dcp; cp = (au1x_dma_chan_t *)dcp; @@ -958,105 +959,75 @@ u32 au1xxx_dbdma_put_dscr(u32 chanid, au1x_ddma_desc_t *dscr) } -struct alchemy_dbdma_sysdev { - struct sys_device sysdev; - u32 pm_regs[NUM_DBDMA_CHANS + 1][6]; -}; +static unsigned long alchemy_dbdma_pm_data[NUM_DBDMA_CHANS + 1][6]; -static int alchemy_dbdma_suspend(struct sys_device *dev, - pm_message_t state) +static int alchemy_dbdma_suspend(void) { - struct alchemy_dbdma_sysdev *sdev = - container_of(dev, struct alchemy_dbdma_sysdev, sysdev); int i; - u32 addr; + void __iomem *addr; - addr = DDMA_GLOBAL_BASE; - sdev->pm_regs[0][0] = au_readl(addr + 0x00); - sdev->pm_regs[0][1] = au_readl(addr + 0x04); - sdev->pm_regs[0][2] = au_readl(addr + 0x08); - sdev->pm_regs[0][3] = au_readl(addr + 0x0c); + addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR); + alchemy_dbdma_pm_data[0][0] = __raw_readl(addr + 0x00); + alchemy_dbdma_pm_data[0][1] = __raw_readl(addr + 0x04); + alchemy_dbdma_pm_data[0][2] = __raw_readl(addr + 0x08); + alchemy_dbdma_pm_data[0][3] = __raw_readl(addr + 0x0c); /* save channel configurations */ - for (i = 1, addr = DDMA_CHANNEL_BASE; i <= NUM_DBDMA_CHANS; i++) { - sdev->pm_regs[i][0] = au_readl(addr + 0x00); - sdev->pm_regs[i][1] = au_readl(addr + 0x04); - sdev->pm_regs[i][2] = au_readl(addr + 0x08); - sdev->pm_regs[i][3] = au_readl(addr + 0x0c); - sdev->pm_regs[i][4] = au_readl(addr + 0x10); - sdev->pm_regs[i][5] = au_readl(addr + 0x14); + addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR); + for (i = 1; i <= NUM_DBDMA_CHANS; i++) { + alchemy_dbdma_pm_data[i][0] = __raw_readl(addr + 0x00); + alchemy_dbdma_pm_data[i][1] = __raw_readl(addr + 0x04); + alchemy_dbdma_pm_data[i][2] = __raw_readl(addr + 0x08); + alchemy_dbdma_pm_data[i][3] = __raw_readl(addr + 0x0c); + alchemy_dbdma_pm_data[i][4] = __raw_readl(addr + 0x10); + alchemy_dbdma_pm_data[i][5] = __raw_readl(addr + 0x14); /* halt channel */ - au_writel(sdev->pm_regs[i][0] & ~1, addr + 0x00); - au_sync(); - while (!(au_readl(addr + 0x14) & 1)) - au_sync(); + __raw_writel(alchemy_dbdma_pm_data[i][0] & ~1, addr + 0x00); + wmb(); + while (!(__raw_readl(addr + 0x14) & 1)) + wmb(); addr += 0x100; /* next channel base */ } /* disable channel interrupts */ - au_writel(0, DDMA_GLOBAL_BASE + 0x0c); - au_sync(); + addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR); + __raw_writel(0, addr + 0x0c); + wmb(); return 0; } -static int alchemy_dbdma_resume(struct sys_device *dev) +static void alchemy_dbdma_resume(void) { - struct alchemy_dbdma_sysdev *sdev = - container_of(dev, struct alchemy_dbdma_sysdev, sysdev); int i; - u32 addr; + void __iomem *addr; - addr = DDMA_GLOBAL_BASE; - au_writel(sdev->pm_regs[0][0], addr + 0x00); - au_writel(sdev->pm_regs[0][1], addr + 0x04); - au_writel(sdev->pm_regs[0][2], addr + 0x08); - au_writel(sdev->pm_regs[0][3], addr + 0x0c); + addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR); + __raw_writel(alchemy_dbdma_pm_data[0][0], addr + 0x00); + __raw_writel(alchemy_dbdma_pm_data[0][1], addr + 0x04); + __raw_writel(alchemy_dbdma_pm_data[0][2], addr + 0x08); + __raw_writel(alchemy_dbdma_pm_data[0][3], addr + 0x0c); /* restore channel configurations */ - for (i = 1, addr = DDMA_CHANNEL_BASE; i <= NUM_DBDMA_CHANS; i++) { - au_writel(sdev->pm_regs[i][0], addr + 0x00); - au_writel(sdev->pm_regs[i][1], addr + 0x04); - au_writel(sdev->pm_regs[i][2], addr + 0x08); - au_writel(sdev->pm_regs[i][3], addr + 0x0c); - au_writel(sdev->pm_regs[i][4], addr + 0x10); - au_writel(sdev->pm_regs[i][5], addr + 0x14); - au_sync(); + addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR); + for (i = 1; i <= NUM_DBDMA_CHANS; i++) { + __raw_writel(alchemy_dbdma_pm_data[i][0], addr + 0x00); + __raw_writel(alchemy_dbdma_pm_data[i][1], addr + 0x04); + __raw_writel(alchemy_dbdma_pm_data[i][2], addr + 0x08); + __raw_writel(alchemy_dbdma_pm_data[i][3], addr + 0x0c); + __raw_writel(alchemy_dbdma_pm_data[i][4], addr + 0x10); + __raw_writel(alchemy_dbdma_pm_data[i][5], addr + 0x14); + wmb(); addr += 0x100; /* next channel base */ } - - return 0; } -static struct sysdev_class alchemy_dbdma_sysdev_class = { - .name = "dbdma", +static struct syscore_ops alchemy_dbdma_syscore_ops = { .suspend = alchemy_dbdma_suspend, .resume = alchemy_dbdma_resume, }; -static int __init alchemy_dbdma_sysdev_init(void) -{ - struct alchemy_dbdma_sysdev *sdev; - int ret; - - ret = sysdev_class_register(&alchemy_dbdma_sysdev_class); - if (ret) - return ret; - - sdev = kzalloc(sizeof(struct alchemy_dbdma_sysdev), GFP_KERNEL); - if (!sdev) - return -ENOMEM; - - sdev->sysdev.id = -1; - sdev->sysdev.cls = &alchemy_dbdma_sysdev_class; - ret = sysdev_register(&sdev->sysdev); - if (ret) - kfree(sdev); - - return ret; -} - static int __init au1xxx_dbdma_init(void) { int irq_nr, ret; @@ -1084,11 +1055,7 @@ static int __init au1xxx_dbdma_init(void) else { dbdma_initialized = 1; printk(KERN_INFO "Alchemy DBDMA initialized\n"); - ret = alchemy_dbdma_sysdev_init(); - if (ret) { - printk(KERN_ERR "DBDMA PM init failed\n"); - ret = 0; - } + register_syscore_ops(&alchemy_dbdma_syscore_ops); } return ret; diff --git a/arch/mips/alchemy/common/dma.c b/arch/mips/alchemy/common/dma.c index d5278877891d..347980e79a89 100644 --- a/arch/mips/alchemy/common/dma.c +++ b/arch/mips/alchemy/common/dma.c @@ -58,6 +58,9 @@ * returned from request_dma. */ +/* DMA Channel register block spacing */ +#define DMA_CHANNEL_LEN 0x00000100 + DEFINE_SPINLOCK(au1000_dma_spin_lock); struct dma_chan au1000_dma_table[NUM_AU1000_DMA_CHANNELS] = { @@ -77,22 +80,23 @@ static const struct dma_dev { unsigned int fifo_addr; unsigned int dma_mode; } dma_dev_table[DMA_NUM_DEV] = { - {UART0_ADDR + UART_TX, 0}, - {UART0_ADDR + UART_RX, 0}, - {0, 0}, - {0, 0}, - {AC97C_DATA, DMA_DW16 }, /* coherent */ - {AC97C_DATA, DMA_DR | DMA_DW16 }, /* coherent */ - {UART3_ADDR + UART_TX, DMA_DW8 | DMA_NC}, - {UART3_ADDR + UART_RX, DMA_DR | DMA_DW8 | DMA_NC}, - {USBD_EP0RD, DMA_DR | DMA_DW8 | DMA_NC}, - {USBD_EP0WR, DMA_DW8 | DMA_NC}, - {USBD_EP2WR, DMA_DW8 | DMA_NC}, - {USBD_EP3WR, DMA_DW8 | DMA_NC}, - {USBD_EP4RD, DMA_DR | DMA_DW8 | DMA_NC}, - {USBD_EP5RD, DMA_DR | DMA_DW8 | DMA_NC}, - {I2S_DATA, DMA_DW32 | DMA_NC}, - {I2S_DATA, DMA_DR | DMA_DW32 | DMA_NC} + { AU1000_UART0_PHYS_ADDR + 0x04, DMA_DW8 }, /* UART0_TX */ + { AU1000_UART0_PHYS_ADDR + 0x00, DMA_DW8 | DMA_DR }, /* UART0_RX */ + { 0, 0 }, /* DMA_REQ0 */ + { 0, 0 }, /* DMA_REQ1 */ + { AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 }, /* AC97 TX c */ + { AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 | DMA_DR }, /* AC97 RX c */ + { AU1000_UART3_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC }, /* UART3_TX */ + { AU1000_UART3_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* UART3_RX */ + { AU1000_USBD_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* EP0RD */ + { AU1000_USBD_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC }, /* EP0WR */ + { AU1000_USBD_PHYS_ADDR + 0x08, DMA_DW8 | DMA_NC }, /* EP2WR */ + { AU1000_USBD_PHYS_ADDR + 0x0c, DMA_DW8 | DMA_NC }, /* EP3WR */ + { AU1000_USBD_PHYS_ADDR + 0x10, DMA_DW8 | DMA_NC | DMA_DR }, /* EP4RD */ + { AU1000_USBD_PHYS_ADDR + 0x14, DMA_DW8 | DMA_NC | DMA_DR }, /* EP5RD */ + /* on Au1500, these 2 are DMA_REQ2/3 (GPIO208/209) instead! */ + { AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC}, /* I2S TX */ + { AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC | DMA_DR}, /* I2S RX */ }; int au1000_dma_read_proc(char *buf, char **start, off_t fpos, @@ -123,10 +127,10 @@ int au1000_dma_read_proc(char *buf, char **start, off_t fpos, /* Device FIFO addresses and default DMA modes - 2nd bank */ static const struct dma_dev dma_dev_table_bank2[DMA_NUM_DEV_BANK2] = { - { SD0_XMIT_FIFO, DMA_DS | DMA_DW8 }, /* coherent */ - { SD0_RECV_FIFO, DMA_DS | DMA_DR | DMA_DW8 }, /* coherent */ - { SD1_XMIT_FIFO, DMA_DS | DMA_DW8 }, /* coherent */ - { SD1_RECV_FIFO, DMA_DS | DMA_DR | DMA_DW8 } /* coherent */ + { AU1100_SD0_PHYS_ADDR + 0x00, DMA_DS | DMA_DW8 }, /* coherent */ + { AU1100_SD0_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR }, /* coherent */ + { AU1100_SD1_PHYS_ADDR + 0x00, DMA_DS | DMA_DW8 }, /* coherent */ + { AU1100_SD1_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR } /* coherent */ }; void dump_au1000_dma_channel(unsigned int dmanr) @@ -202,7 +206,7 @@ int request_au1000_dma(int dev_id, const char *dev_str, } /* fill it in */ - chan->io = DMA_CHANNEL_BASE + i * DMA_CHANNEL_LEN; + chan->io = KSEG1ADDR(AU1000_DMA_PHYS_ADDR) + i * DMA_CHANNEL_LEN; chan->dev_id = dev_id; chan->dev_str = dev_str; chan->fifo_addr = dev->fifo_addr; diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c index 55dd7c888517..8b60ba0675e2 100644 --- a/arch/mips/alchemy/common/irq.c +++ b/arch/mips/alchemy/common/irq.c @@ -30,7 +30,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/slab.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <asm/irq_cpu.h> #include <asm/mipsregs.h> @@ -39,6 +39,36 @@ #include <asm/mach-pb1x00/pb1000.h> #endif +/* Interrupt Controller register offsets */ +#define IC_CFG0RD 0x40 +#define IC_CFG0SET 0x40 +#define IC_CFG0CLR 0x44 +#define IC_CFG1RD 0x48 +#define IC_CFG1SET 0x48 +#define IC_CFG1CLR 0x4C +#define IC_CFG2RD 0x50 +#define IC_CFG2SET 0x50 +#define IC_CFG2CLR 0x54 +#define IC_REQ0INT 0x54 +#define IC_SRCRD 0x58 +#define IC_SRCSET 0x58 +#define IC_SRCCLR 0x5C +#define IC_REQ1INT 0x5C +#define IC_ASSIGNRD 0x60 +#define IC_ASSIGNSET 0x60 +#define IC_ASSIGNCLR 0x64 +#define IC_WAKERD 0x68 +#define IC_WAKESET 0x68 +#define IC_WAKECLR 0x6C +#define IC_MASKRD 0x70 +#define IC_MASKSET 0x70 +#define IC_MASKCLR 0x74 +#define IC_RISINGRD 0x78 +#define IC_RISINGCLR 0x78 +#define IC_FALLINGRD 0x7C +#define IC_FALLINGCLR 0x7C +#define IC_TESTBIT 0x80 + static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type); /* NOTE on interrupt priorities: The original writers of this code said: @@ -221,89 +251,101 @@ struct au1xxx_irqmap au1200_irqmap[] __initdata = { static void au1x_ic0_unmask(struct irq_data *d) { unsigned int bit = d->irq - AU1000_INTC0_INT_BASE; - au_writel(1 << bit, IC0_MASKSET); - au_writel(1 << bit, IC0_WAKESET); - au_sync(); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR); + + __raw_writel(1 << bit, base + IC_MASKSET); + __raw_writel(1 << bit, base + IC_WAKESET); + wmb(); } static void au1x_ic1_unmask(struct irq_data *d) { unsigned int bit = d->irq - AU1000_INTC1_INT_BASE; - au_writel(1 << bit, IC1_MASKSET); - au_writel(1 << bit, IC1_WAKESET); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR); + + __raw_writel(1 << bit, base + IC_MASKSET); + __raw_writel(1 << bit, base + IC_WAKESET); /* very hacky. does the pb1000 cpld auto-disable this int? * nowhere in the current kernel sources is it disabled. --mlau */ #if defined(CONFIG_MIPS_PB1000) if (d->irq == AU1000_GPIO15_INT) - au_writel(0x4000, PB1000_MDR); /* enable int */ + __raw_writel(0x4000, (void __iomem *)PB1000_MDR); /* enable int */ #endif - au_sync(); + wmb(); } static void au1x_ic0_mask(struct irq_data *d) { unsigned int bit = d->irq - AU1000_INTC0_INT_BASE; - au_writel(1 << bit, IC0_MASKCLR); - au_writel(1 << bit, IC0_WAKECLR); - au_sync(); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR); + + __raw_writel(1 << bit, base + IC_MASKCLR); + __raw_writel(1 << bit, base + IC_WAKECLR); + wmb(); } static void au1x_ic1_mask(struct irq_data *d) { unsigned int bit = d->irq - AU1000_INTC1_INT_BASE; - au_writel(1 << bit, IC1_MASKCLR); - au_writel(1 << bit, IC1_WAKECLR); - au_sync(); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR); + + __raw_writel(1 << bit, base + IC_MASKCLR); + __raw_writel(1 << bit, base + IC_WAKECLR); + wmb(); } static void au1x_ic0_ack(struct irq_data *d) { unsigned int bit = d->irq - AU1000_INTC0_INT_BASE; + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR); /* * This may assume that we don't get interrupts from * both edges at once, or if we do, that we don't care. */ - au_writel(1 << bit, IC0_FALLINGCLR); - au_writel(1 << bit, IC0_RISINGCLR); - au_sync(); + __raw_writel(1 << bit, base + IC_FALLINGCLR); + __raw_writel(1 << bit, base + IC_RISINGCLR); + wmb(); } static void au1x_ic1_ack(struct irq_data *d) { unsigned int bit = d->irq - AU1000_INTC1_INT_BASE; + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR); /* * This may assume that we don't get interrupts from * both edges at once, or if we do, that we don't care. */ - au_writel(1 << bit, IC1_FALLINGCLR); - au_writel(1 << bit, IC1_RISINGCLR); - au_sync(); + __raw_writel(1 << bit, base + IC_FALLINGCLR); + __raw_writel(1 << bit, base + IC_RISINGCLR); + wmb(); } static void au1x_ic0_maskack(struct irq_data *d) { unsigned int bit = d->irq - AU1000_INTC0_INT_BASE; + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR); - au_writel(1 << bit, IC0_WAKECLR); - au_writel(1 << bit, IC0_MASKCLR); - au_writel(1 << bit, IC0_RISINGCLR); - au_writel(1 << bit, IC0_FALLINGCLR); - au_sync(); + __raw_writel(1 << bit, base + IC_WAKECLR); + __raw_writel(1 << bit, base + IC_MASKCLR); + __raw_writel(1 << bit, base + IC_RISINGCLR); + __raw_writel(1 << bit, base + IC_FALLINGCLR); + wmb(); } static void au1x_ic1_maskack(struct irq_data *d) { unsigned int bit = d->irq - AU1000_INTC1_INT_BASE; + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR); - au_writel(1 << bit, IC1_WAKECLR); - au_writel(1 << bit, IC1_MASKCLR); - au_writel(1 << bit, IC1_RISINGCLR); - au_writel(1 << bit, IC1_FALLINGCLR); - au_sync(); + __raw_writel(1 << bit, base + IC_WAKECLR); + __raw_writel(1 << bit, base + IC_MASKCLR); + __raw_writel(1 << bit, base + IC_RISINGCLR); + __raw_writel(1 << bit, base + IC_FALLINGCLR); + wmb(); } static int au1x_ic1_setwake(struct irq_data *d, unsigned int on) @@ -318,13 +360,13 @@ static int au1x_ic1_setwake(struct irq_data *d, unsigned int on) return -EINVAL; local_irq_save(flags); - wakemsk = au_readl(SYS_WAKEMSK); + wakemsk = __raw_readl((void __iomem *)SYS_WAKEMSK); if (on) wakemsk |= 1 << bit; else wakemsk &= ~(1 << bit); - au_writel(wakemsk, SYS_WAKEMSK); - au_sync(); + __raw_writel(wakemsk, (void __iomem *)SYS_WAKEMSK); + wmb(); local_irq_restore(flags); return 0; @@ -356,81 +398,74 @@ static struct irq_chip au1x_ic1_chip = { static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type) { struct irq_chip *chip; - unsigned long icr[6]; - unsigned int bit, ic, irq = d->irq; + unsigned int bit, irq = d->irq; irq_flow_handler_t handler = NULL; unsigned char *name = NULL; + void __iomem *base; int ret; if (irq >= AU1000_INTC1_INT_BASE) { bit = irq - AU1000_INTC1_INT_BASE; chip = &au1x_ic1_chip; - ic = 1; + base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR); } else { bit = irq - AU1000_INTC0_INT_BASE; chip = &au1x_ic0_chip; - ic = 0; + base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR); } if (bit > 31) return -EINVAL; - icr[0] = ic ? IC1_CFG0SET : IC0_CFG0SET; - icr[1] = ic ? IC1_CFG1SET : IC0_CFG1SET; - icr[2] = ic ? IC1_CFG2SET : IC0_CFG2SET; - icr[3] = ic ? IC1_CFG0CLR : IC0_CFG0CLR; - icr[4] = ic ? IC1_CFG1CLR : IC0_CFG1CLR; - icr[5] = ic ? IC1_CFG2CLR : IC0_CFG2CLR; - ret = 0; switch (flow_type) { /* cfgregs 2:1:0 */ case IRQ_TYPE_EDGE_RISING: /* 0:0:1 */ - au_writel(1 << bit, icr[5]); - au_writel(1 << bit, icr[4]); - au_writel(1 << bit, icr[0]); + __raw_writel(1 << bit, base + IC_CFG2CLR); + __raw_writel(1 << bit, base + IC_CFG1CLR); + __raw_writel(1 << bit, base + IC_CFG0SET); handler = handle_edge_irq; name = "riseedge"; break; case IRQ_TYPE_EDGE_FALLING: /* 0:1:0 */ - au_writel(1 << bit, icr[5]); - au_writel(1 << bit, icr[1]); - au_writel(1 << bit, icr[3]); + __raw_writel(1 << bit, base + IC_CFG2CLR); + __raw_writel(1 << bit, base + IC_CFG1SET); + __raw_writel(1 << bit, base + IC_CFG0CLR); handler = handle_edge_irq; name = "falledge"; break; case IRQ_TYPE_EDGE_BOTH: /* 0:1:1 */ - au_writel(1 << bit, icr[5]); - au_writel(1 << bit, icr[1]); - au_writel(1 << bit, icr[0]); + __raw_writel(1 << bit, base + IC_CFG2CLR); + __raw_writel(1 << bit, base + IC_CFG1SET); + __raw_writel(1 << bit, base + IC_CFG0SET); handler = handle_edge_irq; name = "bothedge"; break; case IRQ_TYPE_LEVEL_HIGH: /* 1:0:1 */ - au_writel(1 << bit, icr[2]); - au_writel(1 << bit, icr[4]); - au_writel(1 << bit, icr[0]); + __raw_writel(1 << bit, base + IC_CFG2SET); + __raw_writel(1 << bit, base + IC_CFG1CLR); + __raw_writel(1 << bit, base + IC_CFG0SET); handler = handle_level_irq; name = "hilevel"; break; case IRQ_TYPE_LEVEL_LOW: /* 1:1:0 */ - au_writel(1 << bit, icr[2]); - au_writel(1 << bit, icr[1]); - au_writel(1 << bit, icr[3]); + __raw_writel(1 << bit, base + IC_CFG2SET); + __raw_writel(1 << bit, base + IC_CFG1SET); + __raw_writel(1 << bit, base + IC_CFG0CLR); handler = handle_level_irq; name = "lowlevel"; break; case IRQ_TYPE_NONE: /* 0:0:0 */ - au_writel(1 << bit, icr[5]); - au_writel(1 << bit, icr[4]); - au_writel(1 << bit, icr[3]); + __raw_writel(1 << bit, base + IC_CFG2CLR); + __raw_writel(1 << bit, base + IC_CFG1CLR); + __raw_writel(1 << bit, base + IC_CFG0CLR); break; default: ret = -EINVAL; } __irq_set_chip_handler_name_locked(d->irq, chip, handler, name); - au_sync(); + wmb(); return ret; } @@ -444,21 +479,21 @@ asmlinkage void plat_irq_dispatch(void) off = MIPS_CPU_IRQ_BASE + 7; goto handle; } else if (pending & CAUSEF_IP2) { - s = IC0_REQ0INT; + s = KSEG1ADDR(AU1000_IC0_PHYS_ADDR) + IC_REQ0INT; off = AU1000_INTC0_INT_BASE; } else if (pending & CAUSEF_IP3) { - s = IC0_REQ1INT; + s = KSEG1ADDR(AU1000_IC0_PHYS_ADDR) + IC_REQ1INT; off = AU1000_INTC0_INT_BASE; } else if (pending & CAUSEF_IP4) { - s = IC1_REQ0INT; + s = KSEG1ADDR(AU1000_IC1_PHYS_ADDR) + IC_REQ0INT; off = AU1000_INTC1_INT_BASE; } else if (pending & CAUSEF_IP5) { - s = IC1_REQ1INT; + s = KSEG1ADDR(AU1000_IC1_PHYS_ADDR) + IC_REQ1INT; off = AU1000_INTC1_INT_BASE; } else goto spurious; - s = au_readl(s); + s = __raw_readl((void __iomem *)s); if (unlikely(!s)) { spurious: spurious_interrupt(); @@ -469,48 +504,42 @@ handle: do_IRQ(off); } + +static inline void ic_init(void __iomem *base) +{ + /* initialize interrupt controller to a safe state */ + __raw_writel(0xffffffff, base + IC_CFG0CLR); + __raw_writel(0xffffffff, base + IC_CFG1CLR); + __raw_writel(0xffffffff, base + IC_CFG2CLR); + __raw_writel(0xffffffff, base + IC_MASKCLR); + __raw_writel(0xffffffff, base + IC_ASSIGNCLR); + __raw_writel(0xffffffff, base + IC_WAKECLR); + __raw_writel(0xffffffff, base + IC_SRCSET); + __raw_writel(0xffffffff, base + IC_FALLINGCLR); + __raw_writel(0xffffffff, base + IC_RISINGCLR); + __raw_writel(0x00000000, base + IC_TESTBIT); + wmb(); +} + static void __init au1000_init_irq(struct au1xxx_irqmap *map) { unsigned int bit, irq_nr; - int i; - - /* - * Initialize interrupt controllers to a safe state. - */ - au_writel(0xffffffff, IC0_CFG0CLR); - au_writel(0xffffffff, IC0_CFG1CLR); - au_writel(0xffffffff, IC0_CFG2CLR); - au_writel(0xffffffff, IC0_MASKCLR); - au_writel(0xffffffff, IC0_ASSIGNCLR); - au_writel(0xffffffff, IC0_WAKECLR); - au_writel(0xffffffff, IC0_SRCSET); - au_writel(0xffffffff, IC0_FALLINGCLR); - au_writel(0xffffffff, IC0_RISINGCLR); - au_writel(0x00000000, IC0_TESTBIT); - - au_writel(0xffffffff, IC1_CFG0CLR); - au_writel(0xffffffff, IC1_CFG1CLR); - au_writel(0xffffffff, IC1_CFG2CLR); - au_writel(0xffffffff, IC1_MASKCLR); - au_writel(0xffffffff, IC1_ASSIGNCLR); - au_writel(0xffffffff, IC1_WAKECLR); - au_writel(0xffffffff, IC1_SRCSET); - au_writel(0xffffffff, IC1_FALLINGCLR); - au_writel(0xffffffff, IC1_RISINGCLR); - au_writel(0x00000000, IC1_TESTBIT); + void __iomem *base; + ic_init((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR)); + ic_init((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR)); mips_cpu_irq_init(); /* register all 64 possible IC0+IC1 irq sources as type "none". * Use set_irq_type() to set edge/level behaviour at runtime. */ - for (i = AU1000_INTC0_INT_BASE; - (i < AU1000_INTC0_INT_BASE + 32); i++) - au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE); + for (irq_nr = AU1000_INTC0_INT_BASE; + (irq_nr < AU1000_INTC0_INT_BASE + 32); irq_nr++) + au1x_ic_settype(irq_get_irq_data(irq_nr), IRQ_TYPE_NONE); - for (i = AU1000_INTC1_INT_BASE; - (i < AU1000_INTC1_INT_BASE + 32); i++) - au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE); + for (irq_nr = AU1000_INTC1_INT_BASE; + (irq_nr < AU1000_INTC1_INT_BASE + 32); irq_nr++) + au1x_ic_settype(irq_get_irq_data(irq_nr), IRQ_TYPE_NONE); /* * Initialize IC0, which is fixed per processor. @@ -520,13 +549,13 @@ static void __init au1000_init_irq(struct au1xxx_irqmap *map) if (irq_nr >= AU1000_INTC1_INT_BASE) { bit = irq_nr - AU1000_INTC1_INT_BASE; - if (map->im_request) - au_writel(1 << bit, IC1_ASSIGNSET); + base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR); } else { bit = irq_nr - AU1000_INTC0_INT_BASE; - if (map->im_request) - au_writel(1 << bit, IC0_ASSIGNSET); + base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR); } + if (map->im_request) + __raw_writel(1 << bit, base + IC_ASSIGNSET); au1x_ic_settype(irq_get_irq_data(irq_nr), map->im_type); ++map; @@ -556,90 +585,62 @@ void __init arch_init_irq(void) } } -struct alchemy_ic_sysdev { - struct sys_device sysdev; - void __iomem *base; - unsigned long pmdata[7]; -}; -static int alchemy_ic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct alchemy_ic_sysdev *icdev = - container_of(dev, struct alchemy_ic_sysdev, sysdev); +static unsigned long alchemy_ic_pmdata[7 * 2]; - icdev->pmdata[0] = __raw_readl(icdev->base + IC_CFG0RD); - icdev->pmdata[1] = __raw_readl(icdev->base + IC_CFG1RD); - icdev->pmdata[2] = __raw_readl(icdev->base + IC_CFG2RD); - icdev->pmdata[3] = __raw_readl(icdev->base + IC_SRCRD); - icdev->pmdata[4] = __raw_readl(icdev->base + IC_ASSIGNRD); - icdev->pmdata[5] = __raw_readl(icdev->base + IC_WAKERD); - icdev->pmdata[6] = __raw_readl(icdev->base + IC_MASKRD); - - return 0; +static inline void alchemy_ic_suspend_one(void __iomem *base, unsigned long *d) +{ + d[0] = __raw_readl(base + IC_CFG0RD); + d[1] = __raw_readl(base + IC_CFG1RD); + d[2] = __raw_readl(base + IC_CFG2RD); + d[3] = __raw_readl(base + IC_SRCRD); + d[4] = __raw_readl(base + IC_ASSIGNRD); + d[5] = __raw_readl(base + IC_WAKERD); + d[6] = __raw_readl(base + IC_MASKRD); + ic_init(base); /* shut it up too while at it */ } -static int alchemy_ic_resume(struct sys_device *dev) +static inline void alchemy_ic_resume_one(void __iomem *base, unsigned long *d) { - struct alchemy_ic_sysdev *icdev = - container_of(dev, struct alchemy_ic_sysdev, sysdev); - - __raw_writel(0xffffffff, icdev->base + IC_MASKCLR); - __raw_writel(0xffffffff, icdev->base + IC_CFG0CLR); - __raw_writel(0xffffffff, icdev->base + IC_CFG1CLR); - __raw_writel(0xffffffff, icdev->base + IC_CFG2CLR); - __raw_writel(0xffffffff, icdev->base + IC_SRCCLR); - __raw_writel(0xffffffff, icdev->base + IC_ASSIGNCLR); - __raw_writel(0xffffffff, icdev->base + IC_WAKECLR); - __raw_writel(0xffffffff, icdev->base + IC_RISINGCLR); - __raw_writel(0xffffffff, icdev->base + IC_FALLINGCLR); - __raw_writel(0x00000000, icdev->base + IC_TESTBIT); - wmb(); - __raw_writel(icdev->pmdata[0], icdev->base + IC_CFG0SET); - __raw_writel(icdev->pmdata[1], icdev->base + IC_CFG1SET); - __raw_writel(icdev->pmdata[2], icdev->base + IC_CFG2SET); - __raw_writel(icdev->pmdata[3], icdev->base + IC_SRCSET); - __raw_writel(icdev->pmdata[4], icdev->base + IC_ASSIGNSET); - __raw_writel(icdev->pmdata[5], icdev->base + IC_WAKESET); + ic_init(base); + + __raw_writel(d[0], base + IC_CFG0SET); + __raw_writel(d[1], base + IC_CFG1SET); + __raw_writel(d[2], base + IC_CFG2SET); + __raw_writel(d[3], base + IC_SRCSET); + __raw_writel(d[4], base + IC_ASSIGNSET); + __raw_writel(d[5], base + IC_WAKESET); wmb(); - __raw_writel(icdev->pmdata[6], icdev->base + IC_MASKSET); + __raw_writel(d[6], base + IC_MASKSET); wmb(); +} +static int alchemy_ic_suspend(void) +{ + alchemy_ic_suspend_one((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR), + alchemy_ic_pmdata); + alchemy_ic_suspend_one((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR), + &alchemy_ic_pmdata[7]); return 0; } -static struct sysdev_class alchemy_ic_sysdev_class = { - .name = "ic", +static void alchemy_ic_resume(void) +{ + alchemy_ic_resume_one((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR), + &alchemy_ic_pmdata[7]); + alchemy_ic_resume_one((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR), + alchemy_ic_pmdata); +} + +static struct syscore_ops alchemy_ic_syscore_ops = { .suspend = alchemy_ic_suspend, .resume = alchemy_ic_resume, }; -static int __init alchemy_ic_sysdev_init(void) +static int __init alchemy_ic_pm_init(void) { - struct alchemy_ic_sysdev *icdev; - unsigned long icbase[2] = { IC0_PHYS_ADDR, IC1_PHYS_ADDR }; - int err, i; - - err = sysdev_class_register(&alchemy_ic_sysdev_class); - if (err) - return err; - - for (i = 0; i < 2; i++) { - icdev = kzalloc(sizeof(struct alchemy_ic_sysdev), GFP_KERNEL); - if (!icdev) - return -ENOMEM; - - icdev->base = ioremap(icbase[i], 0x1000); - - icdev->sysdev.id = i; - icdev->sysdev.cls = &alchemy_ic_sysdev_class; - err = sysdev_register(&icdev->sysdev); - if (err) { - kfree(icdev); - return err; - } - } - + register_syscore_ops(&alchemy_ic_syscore_ops); return 0; } -device_initcall(alchemy_ic_sysdev_init); +device_initcall(alchemy_ic_pm_init); diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c index 9e7814db3d03..3b2c18b14341 100644 --- a/arch/mips/alchemy/common/platform.c +++ b/arch/mips/alchemy/common/platform.c @@ -13,9 +13,10 @@ #include <linux/dma-mapping.h> #include <linux/etherdevice.h> +#include <linux/init.h> #include <linux/platform_device.h> #include <linux/serial_8250.h> -#include <linux/init.h> +#include <linux/slab.h> #include <asm/mach-au1x00/au1xxx.h> #include <asm/mach-au1x00/au1xxx_dbdma.h> @@ -30,21 +31,12 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state, #ifdef CONFIG_SERIAL_8250 switch (state) { case 0: - if ((__raw_readl(port->membase + UART_MOD_CNTRL) & 3) != 3) { - /* power-on sequence as suggested in the databooks */ - __raw_writel(0, port->membase + UART_MOD_CNTRL); - wmb(); - __raw_writel(1, port->membase + UART_MOD_CNTRL); - wmb(); - } - __raw_writel(3, port->membase + UART_MOD_CNTRL); /* full on */ - wmb(); + alchemy_uart_enable(CPHYSADDR(port->membase)); serial8250_do_pm(port, state, old_state); break; case 3: /* power off */ serial8250_do_pm(port, state, old_state); - __raw_writel(0, port->membase + UART_MOD_CNTRL); - wmb(); + alchemy_uart_disable(CPHYSADDR(port->membase)); break; default: serial8250_do_pm(port, state, old_state); @@ -65,38 +57,60 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state, .pm = alchemy_8250_pm, \ } -static struct plat_serial8250_port au1x00_uart_data[] = { -#if defined(CONFIG_SOC_AU1000) - PORT(UART0_PHYS_ADDR, AU1000_UART0_INT), - PORT(UART1_PHYS_ADDR, AU1000_UART1_INT), - PORT(UART2_PHYS_ADDR, AU1000_UART2_INT), - PORT(UART3_PHYS_ADDR, AU1000_UART3_INT), -#elif defined(CONFIG_SOC_AU1500) - PORT(UART0_PHYS_ADDR, AU1500_UART0_INT), - PORT(UART3_PHYS_ADDR, AU1500_UART3_INT), -#elif defined(CONFIG_SOC_AU1100) - PORT(UART0_PHYS_ADDR, AU1100_UART0_INT), - PORT(UART1_PHYS_ADDR, AU1100_UART1_INT), - PORT(UART3_PHYS_ADDR, AU1100_UART3_INT), -#elif defined(CONFIG_SOC_AU1550) - PORT(UART0_PHYS_ADDR, AU1550_UART0_INT), - PORT(UART1_PHYS_ADDR, AU1550_UART1_INT), - PORT(UART3_PHYS_ADDR, AU1550_UART3_INT), -#elif defined(CONFIG_SOC_AU1200) - PORT(UART0_PHYS_ADDR, AU1200_UART0_INT), - PORT(UART1_PHYS_ADDR, AU1200_UART1_INT), -#endif - { }, +static struct plat_serial8250_port au1x00_uart_data[][4] __initdata = { + [ALCHEMY_CPU_AU1000] = { + PORT(AU1000_UART0_PHYS_ADDR, AU1000_UART0_INT), + PORT(AU1000_UART1_PHYS_ADDR, AU1000_UART1_INT), + PORT(AU1000_UART2_PHYS_ADDR, AU1000_UART2_INT), + PORT(AU1000_UART3_PHYS_ADDR, AU1000_UART3_INT), + }, + [ALCHEMY_CPU_AU1500] = { + PORT(AU1000_UART0_PHYS_ADDR, AU1500_UART0_INT), + PORT(AU1000_UART3_PHYS_ADDR, AU1500_UART3_INT), + }, + [ALCHEMY_CPU_AU1100] = { + PORT(AU1000_UART0_PHYS_ADDR, AU1100_UART0_INT), + PORT(AU1000_UART1_PHYS_ADDR, AU1100_UART1_INT), + PORT(AU1000_UART3_PHYS_ADDR, AU1100_UART3_INT), + }, + [ALCHEMY_CPU_AU1550] = { + PORT(AU1000_UART0_PHYS_ADDR, AU1550_UART0_INT), + PORT(AU1000_UART1_PHYS_ADDR, AU1550_UART1_INT), + PORT(AU1000_UART3_PHYS_ADDR, AU1550_UART3_INT), + }, + [ALCHEMY_CPU_AU1200] = { + PORT(AU1000_UART0_PHYS_ADDR, AU1200_UART0_INT), + PORT(AU1000_UART1_PHYS_ADDR, AU1200_UART1_INT), + }, }; static struct platform_device au1xx0_uart_device = { .name = "serial8250", .id = PLAT8250_DEV_AU1X00, - .dev = { - .platform_data = au1x00_uart_data, - }, }; +static void __init alchemy_setup_uarts(int ctype) +{ + unsigned int uartclk = get_au1x00_uart_baud_base() * 16; + int s = sizeof(struct plat_serial8250_port); + int c = alchemy_get_uarts(ctype); + struct plat_serial8250_port *ports; + + ports = kzalloc(s * (c + 1), GFP_KERNEL); + if (!ports) { + printk(KERN_INFO "Alchemy: no memory for UART data\n"); + return; + } + memcpy(ports, au1x00_uart_data[ctype], s * c); + au1xx0_uart_device.dev.platform_data = ports; + + /* Fill up uartclk. */ + for (s = 0; s < c; s++) + ports[s].uartclk = uartclk; + if (platform_device_register(&au1xx0_uart_device)) + printk(KERN_INFO "Alchemy: failed to register UARTs\n"); +} + /* OHCI (USB full speed host controller) */ static struct resource au1xxx_usb_ohci_resources[] = { [0] = { @@ -269,8 +283,8 @@ extern struct au1xmmc_platform_data au1xmmc_platdata[2]; static struct resource au1200_mmc0_resources[] = { [0] = { - .start = SD0_PHYS_ADDR, - .end = SD0_PHYS_ADDR + 0x7ffff, + .start = AU1100_SD0_PHYS_ADDR, + .end = AU1100_SD0_PHYS_ADDR + 0xfff, .flags = IORESOURCE_MEM, }, [1] = { @@ -305,8 +319,8 @@ static struct platform_device au1200_mmc0_device = { #ifndef CONFIG_MIPS_DB1200 static struct resource au1200_mmc1_resources[] = { [0] = { - .start = SD1_PHYS_ADDR, - .end = SD1_PHYS_ADDR + 0x7ffff, + .start = AU1100_SD1_PHYS_ADDR, + .end = AU1100_SD1_PHYS_ADDR + 0xfff, .flags = IORESOURCE_MEM, }, [1] = { @@ -359,15 +373,16 @@ static struct platform_device pbdb_smbus_device = { #endif /* Macro to help defining the Ethernet MAC resources */ +#define MAC_RES_COUNT 3 /* MAC regs base, MAC enable reg, MAC INT */ #define MAC_RES(_base, _enable, _irq) \ { \ - .start = CPHYSADDR(_base), \ - .end = CPHYSADDR(_base + 0xffff), \ + .start = _base, \ + .end = _base + 0xffff, \ .flags = IORESOURCE_MEM, \ }, \ { \ - .start = CPHYSADDR(_enable), \ - .end = CPHYSADDR(_enable + 0x3), \ + .start = _enable, \ + .end = _enable + 0x3, \ .flags = IORESOURCE_MEM, \ }, \ { \ @@ -376,19 +391,29 @@ static struct platform_device pbdb_smbus_device = { .flags = IORESOURCE_IRQ \ } -static struct resource au1xxx_eth0_resources[] = { -#if defined(CONFIG_SOC_AU1000) - MAC_RES(AU1000_ETH0_BASE, AU1000_MAC0_ENABLE, AU1000_MAC0_DMA_INT), -#elif defined(CONFIG_SOC_AU1100) - MAC_RES(AU1100_ETH0_BASE, AU1100_MAC0_ENABLE, AU1100_MAC0_DMA_INT), -#elif defined(CONFIG_SOC_AU1550) - MAC_RES(AU1550_ETH0_BASE, AU1550_MAC0_ENABLE, AU1550_MAC0_DMA_INT), -#elif defined(CONFIG_SOC_AU1500) - MAC_RES(AU1500_ETH0_BASE, AU1500_MAC0_ENABLE, AU1500_MAC0_DMA_INT), -#endif +static struct resource au1xxx_eth0_resources[][MAC_RES_COUNT] __initdata = { + [ALCHEMY_CPU_AU1000] = { + MAC_RES(AU1000_MAC0_PHYS_ADDR, + AU1000_MACEN_PHYS_ADDR, + AU1000_MAC0_DMA_INT) + }, + [ALCHEMY_CPU_AU1500] = { + MAC_RES(AU1500_MAC0_PHYS_ADDR, + AU1500_MACEN_PHYS_ADDR, + AU1500_MAC0_DMA_INT) + }, + [ALCHEMY_CPU_AU1100] = { + MAC_RES(AU1000_MAC0_PHYS_ADDR, + AU1000_MACEN_PHYS_ADDR, + AU1100_MAC0_DMA_INT) + }, + [ALCHEMY_CPU_AU1550] = { + MAC_RES(AU1000_MAC0_PHYS_ADDR, + AU1000_MACEN_PHYS_ADDR, + AU1550_MAC0_DMA_INT) + }, }; - static struct au1000_eth_platform_data au1xxx_eth0_platform_data = { .phy1_search_mac0 = 1, }; @@ -396,20 +421,26 @@ static struct au1000_eth_platform_data au1xxx_eth0_platform_data = { static struct platform_device au1xxx_eth0_device = { .name = "au1000-eth", .id = 0, - .num_resources = ARRAY_SIZE(au1xxx_eth0_resources), - .resource = au1xxx_eth0_resources, + .num_resources = MAC_RES_COUNT, .dev.platform_data = &au1xxx_eth0_platform_data, }; -#ifndef CONFIG_SOC_AU1100 -static struct resource au1xxx_eth1_resources[] = { -#if defined(CONFIG_SOC_AU1000) - MAC_RES(AU1000_ETH1_BASE, AU1000_MAC1_ENABLE, AU1000_MAC1_DMA_INT), -#elif defined(CONFIG_SOC_AU1550) - MAC_RES(AU1550_ETH1_BASE, AU1550_MAC1_ENABLE, AU1550_MAC1_DMA_INT), -#elif defined(CONFIG_SOC_AU1500) - MAC_RES(AU1500_ETH1_BASE, AU1500_MAC1_ENABLE, AU1500_MAC1_DMA_INT), -#endif +static struct resource au1xxx_eth1_resources[][MAC_RES_COUNT] __initdata = { + [ALCHEMY_CPU_AU1000] = { + MAC_RES(AU1000_MAC1_PHYS_ADDR, + AU1000_MACEN_PHYS_ADDR + 4, + AU1000_MAC1_DMA_INT) + }, + [ALCHEMY_CPU_AU1500] = { + MAC_RES(AU1500_MAC1_PHYS_ADDR, + AU1500_MACEN_PHYS_ADDR + 4, + AU1500_MAC1_DMA_INT) + }, + [ALCHEMY_CPU_AU1550] = { + MAC_RES(AU1000_MAC1_PHYS_ADDR, + AU1000_MACEN_PHYS_ADDR + 4, + AU1550_MAC1_DMA_INT) + }, }; static struct au1000_eth_platform_data au1xxx_eth1_platform_data = { @@ -419,11 +450,9 @@ static struct au1000_eth_platform_data au1xxx_eth1_platform_data = { static struct platform_device au1xxx_eth1_device = { .name = "au1000-eth", .id = 1, - .num_resources = ARRAY_SIZE(au1xxx_eth1_resources), - .resource = au1xxx_eth1_resources, + .num_resources = MAC_RES_COUNT, .dev.platform_data = &au1xxx_eth1_platform_data, }; -#endif void __init au1xxx_override_eth_cfg(unsigned int port, struct au1000_eth_platform_data *eth_data) @@ -434,15 +463,65 @@ void __init au1xxx_override_eth_cfg(unsigned int port, if (port == 0) memcpy(&au1xxx_eth0_platform_data, eth_data, sizeof(struct au1000_eth_platform_data)); -#ifndef CONFIG_SOC_AU1100 else memcpy(&au1xxx_eth1_platform_data, eth_data, sizeof(struct au1000_eth_platform_data)); -#endif +} + +static void __init alchemy_setup_macs(int ctype) +{ + int ret, i; + unsigned char ethaddr[6]; + struct resource *macres; + + /* Handle 1st MAC */ + if (alchemy_get_macs(ctype) < 1) + return; + + macres = kmalloc(sizeof(struct resource) * MAC_RES_COUNT, GFP_KERNEL); + if (!macres) { + printk(KERN_INFO "Alchemy: no memory for MAC0 resources\n"); + return; + } + memcpy(macres, au1xxx_eth0_resources[ctype], + sizeof(struct resource) * MAC_RES_COUNT); + au1xxx_eth0_device.resource = macres; + + i = prom_get_ethernet_addr(ethaddr); + if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac)) + memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6); + + ret = platform_device_register(&au1xxx_eth0_device); + if (!ret) + printk(KERN_INFO "Alchemy: failed to register MAC0\n"); + + + /* Handle 2nd MAC */ + if (alchemy_get_macs(ctype) < 2) + return; + + macres = kmalloc(sizeof(struct resource) * MAC_RES_COUNT, GFP_KERNEL); + if (!macres) { + printk(KERN_INFO "Alchemy: no memory for MAC1 resources\n"); + return; + } + memcpy(macres, au1xxx_eth1_resources[ctype], + sizeof(struct resource) * MAC_RES_COUNT); + au1xxx_eth1_device.resource = macres; + + ethaddr[5] += 1; /* next addr for 2nd MAC */ + if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac)) + memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6); + + /* Register second MAC if enabled in pinfunc */ + if (!(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2)) { + ret = platform_device_register(&au1xxx_eth1_device); + if (ret) + printk(KERN_INFO "Alchemy: failed to register MAC1\n"); + } } static struct platform_device *au1xxx_platform_devices[] __initdata = { - &au1xx0_uart_device, &au1xxx_usb_ohci_device, #ifdef CONFIG_FB_AU1100 &au1100_lcd_device, @@ -460,36 +539,17 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = { #ifdef SMBUS_PSC_BASE &pbdb_smbus_device, #endif - &au1xxx_eth0_device, }; static int __init au1xxx_platform_init(void) { - unsigned int uartclk = get_au1x00_uart_baud_base() * 16; - int err, i; - unsigned char ethaddr[6]; + int err, ctype = alchemy_get_cputype(); - /* Fill up uartclk. */ - for (i = 0; au1x00_uart_data[i].flags; i++) - au1x00_uart_data[i].uartclk = uartclk; - - /* use firmware-provided mac addr if available and necessary */ - i = prom_get_ethernet_addr(ethaddr); - if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac)) - memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6); + alchemy_setup_uarts(ctype); + alchemy_setup_macs(ctype); err = platform_add_devices(au1xxx_platform_devices, ARRAY_SIZE(au1xxx_platform_devices)); -#ifndef CONFIG_SOC_AU1100 - ethaddr[5] += 1; /* next addr for 2nd MAC */ - if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac)) - memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6); - - /* Register second MAC if enabled in pinfunc */ - if (!err && !(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2)) - err = platform_device_register(&au1xxx_eth1_device); -#endif - return err; } diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c index 561e5da2658b..1b887c868417 100644 --- a/arch/mips/alchemy/common/setup.c +++ b/arch/mips/alchemy/common/setup.c @@ -52,8 +52,6 @@ void __init plat_mem_setup(void) /* this is faster than wasting cycles trying to approximate it */ preset_lpj = (est_freq >> 1) / HZ; - board_setup(); /* board specific setup */ - if (au1xxx_cpu_needs_config_od()) /* Various early Au1xx0 errata corrected by this */ set_c0_config(1 << 19); /* Set Config[OD] */ @@ -61,6 +59,8 @@ void __init plat_mem_setup(void) /* Clear to obtain best system bus performance */ clear_c0_config(1 << 19); /* Clear Config[OD] */ + board_setup(); /* board specific setup */ + /* IO/MEM resources. */ set_io_port_base(0); ioport_resource.start = IOPORT_RESOURCE_START; diff --git a/arch/mips/alchemy/common/time.c b/arch/mips/alchemy/common/time.c index 2aecb2fdf982..d5da6adbf634 100644 --- a/arch/mips/alchemy/common/time.c +++ b/arch/mips/alchemy/common/time.c @@ -141,8 +141,7 @@ static int __init alchemy_time_init(unsigned int m2int) goto cntr_err; /* register counter1 clocksource and event device */ - clocksource_set_clock(&au1x_counter1_clocksource, 32768); - clocksource_register(&au1x_counter1_clocksource); + clocksource_register_hz(&au1x_counter1_clocksource, 32768); cd->shift = 32; cd->mult = div_sc(32768, NSEC_PER_SEC, cd->shift); diff --git a/arch/mips/alchemy/devboards/db1200/setup.c b/arch/mips/alchemy/devboards/db1200/setup.c index 4a8980027ecf..1dac4f27d334 100644 --- a/arch/mips/alchemy/devboards/db1200/setup.c +++ b/arch/mips/alchemy/devboards/db1200/setup.c @@ -23,6 +23,13 @@ void __init board_setup(void) unsigned long freq0, clksrc, div, pfc; unsigned short whoami; + /* Set Config[OD] (disable overlapping bus transaction): + * This gets rid of a _lot_ of spurious interrupts (especially + * wrt. IDE); but incurs ~10% performance hit in some + * cpu-bound applications. + */ + set_c0_config(1 << 19); + bcsr_init(DB1200_BCSR_PHYS_ADDR, DB1200_BCSR_PHYS_ADDR + DB1200_BCSR_HEXLED_OFS); diff --git a/arch/mips/alchemy/devboards/pb1000/board_setup.c b/arch/mips/alchemy/devboards/pb1000/board_setup.c index 2d85c4b5be09..e64fdcbf75d0 100644 --- a/arch/mips/alchemy/devboards/pb1000/board_setup.c +++ b/arch/mips/alchemy/devboards/pb1000/board_setup.c @@ -65,7 +65,7 @@ void __init board_setup(void) /* Set AUX clock to 12 MHz * 8 = 96 MHz */ au_writel(8, SYS_AUXPLL); - au_writel(0, SYS_PINSTATERD); + alchemy_gpio1_input_enable(); udelay(100); #if defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) diff --git a/arch/mips/alchemy/devboards/pb1500/board_setup.c b/arch/mips/alchemy/devboards/pb1500/board_setup.c index 83f46215eb0c..3b4fa3206969 100644 --- a/arch/mips/alchemy/devboards/pb1500/board_setup.c +++ b/arch/mips/alchemy/devboards/pb1500/board_setup.c @@ -56,7 +56,7 @@ void __init board_setup(void) sys_clksrc = sys_freqctrl = pin_func = 0; /* Set AUX clock to 12 MHz * 8 = 96 MHz */ au_writel(8, SYS_AUXPLL); - au_writel(0, SYS_PINSTATERD); + alchemy_gpio1_input_enable(); udelay(100); /* GPIO201 is input for PCMCIA card detect */ diff --git a/arch/mips/alchemy/devboards/prom.c b/arch/mips/alchemy/devboards/prom.c index baeb21385058..e5306b56da6d 100644 --- a/arch/mips/alchemy/devboards/prom.c +++ b/arch/mips/alchemy/devboards/prom.c @@ -62,5 +62,5 @@ void __init prom_init(void) void prom_putchar(unsigned char c) { - alchemy_uart_putchar(UART0_PHYS_ADDR, c); + alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c); } diff --git a/arch/mips/alchemy/gpr/board_setup.c b/arch/mips/alchemy/gpr/board_setup.c index ad2e3f137933..5f8f0691ed2d 100644 --- a/arch/mips/alchemy/gpr/board_setup.c +++ b/arch/mips/alchemy/gpr/board_setup.c @@ -36,9 +36,6 @@ #include <prom.h> -#define UART1_ADDR KSEG1ADDR(UART1_PHYS_ADDR) -#define UART3_ADDR KSEG1ADDR(UART3_PHYS_ADDR) - char irq_tab_alchemy[][5] __initdata = { [0] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff }, }; @@ -67,18 +64,15 @@ static void gpr_power_off(void) void __init board_setup(void) { - printk(KERN_INFO "Tarpeze ITS GPR board\n"); + printk(KERN_INFO "Trapeze ITS GPR board\n"); pm_power_off = gpr_power_off; _machine_halt = gpr_power_off; _machine_restart = gpr_reset; - /* Enable UART3 */ - au_writel(0x1, UART3_ADDR + UART_MOD_CNTRL);/* clock enable (CE) */ - au_writel(0x3, UART3_ADDR + UART_MOD_CNTRL); /* CE and "enable" */ - /* Enable UART1 */ - au_writel(0x1, UART1_ADDR + UART_MOD_CNTRL); /* clock enable (CE) */ - au_writel(0x3, UART1_ADDR + UART_MOD_CNTRL); /* CE and "enable" */ + /* Enable UART1/3 */ + alchemy_uart_enable(AU1000_UART3_PHYS_ADDR); + alchemy_uart_enable(AU1000_UART1_PHYS_ADDR); /* Take away Reset of UMTS-card */ alchemy_gpio_direction_output(215, 1); diff --git a/arch/mips/alchemy/gpr/init.c b/arch/mips/alchemy/gpr/init.c index f044f4c541d7..229aafae680c 100644 --- a/arch/mips/alchemy/gpr/init.c +++ b/arch/mips/alchemy/gpr/init.c @@ -59,5 +59,5 @@ void __init prom_init(void) void prom_putchar(unsigned char c) { - alchemy_uart_putchar(UART0_PHYS_ADDR, c); + alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c); } diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c index cf436ab679ae..3ae984cf98cf 100644 --- a/arch/mips/alchemy/mtx-1/board_setup.c +++ b/arch/mips/alchemy/mtx-1/board_setup.c @@ -87,7 +87,7 @@ void __init board_setup(void) au_writel(SYS_PF_NI2, SYS_PINFUNC); /* Initialize GPIO */ - au_writel(0xFFFFFFFF, SYS_TRIOUTCLR); + au_writel(~0, KSEG1ADDR(AU1000_SYS_PHYS_ADDR) + SYS_TRIOUTCLR); alchemy_gpio_direction_output(0, 0); /* Disable M66EN (PCI 66MHz) */ alchemy_gpio_direction_output(3, 1); /* Disable PCI CLKRUN# */ alchemy_gpio_direction_output(1, 1); /* Enable EXT_IO3 */ diff --git a/arch/mips/alchemy/mtx-1/init.c b/arch/mips/alchemy/mtx-1/init.c index f8d25575fa05..2e81cc7f3422 100644 --- a/arch/mips/alchemy/mtx-1/init.c +++ b/arch/mips/alchemy/mtx-1/init.c @@ -62,5 +62,5 @@ void __init prom_init(void) void prom_putchar(unsigned char c) { - alchemy_uart_putchar(UART0_PHYS_ADDR, c); + alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c); } diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c index 956f946218c5..55628e390fd7 100644 --- a/arch/mips/alchemy/mtx-1/platform.c +++ b/arch/mips/alchemy/mtx-1/platform.c @@ -53,8 +53,8 @@ static struct platform_device mtx1_button = { static struct resource mtx1_wdt_res[] = { [0] = { - .start = 15, - .end = 15, + .start = 215, + .end = 215, .name = "mtx1-wdt-gpio", .flags = IORESOURCE_IRQ, } diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c index febfb0fb0896..81e57fad07ab 100644 --- a/arch/mips/alchemy/xxs1500/board_setup.c +++ b/arch/mips/alchemy/xxs1500/board_setup.c @@ -66,13 +66,10 @@ void __init board_setup(void) au_writel(pin_func, SYS_PINFUNC); /* Enable UART */ - au_writel(0x01, UART3_ADDR + UART_MOD_CNTRL); /* clock enable (CE) */ - mdelay(10); - au_writel(0x03, UART3_ADDR + UART_MOD_CNTRL); /* CE and "enable" */ - mdelay(10); - - /* Enable DTR = USB power up */ - au_writel(0x01, UART3_ADDR + UART_MCR); /* UART_MCR_DTR is 0x01??? */ + alchemy_uart_enable(AU1000_UART3_PHYS_ADDR); + /* Enable DTR (MCR bit 0) = USB power up */ + __raw_writel(1, (void __iomem *)KSEG1ADDR(AU1000_UART3_PHYS_ADDR + 0x18)); + wmb(); #ifdef CONFIG_PCI #if defined(__MIPSEB__) diff --git a/arch/mips/alchemy/xxs1500/init.c b/arch/mips/alchemy/xxs1500/init.c index 34a90a4bb6f4..0ee02cfa989d 100644 --- a/arch/mips/alchemy/xxs1500/init.c +++ b/arch/mips/alchemy/xxs1500/init.c @@ -59,5 +59,5 @@ void __init prom_init(void) void prom_putchar(unsigned char c) { - alchemy_uart_putchar(UART0_PHYS_ADDR, c); + alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c); } diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c index 425dfa5d6e12..bb571bcdb8f2 100644 --- a/arch/mips/ar7/gpio.c +++ b/arch/mips/ar7/gpio.c @@ -325,9 +325,7 @@ int __init ar7_gpio_init(void) size = 0x1f; } - gpch->regs = ioremap_nocache(AR7_REGS_GPIO, - AR7_REGS_GPIO + 0x10); - + gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size); if (!gpch->regs) { printk(KERN_ERR "%s: failed to ioremap regs\n", gpch->chip.label); diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c index e5b6615731e5..54db815bc86c 100644 --- a/arch/mips/bcm47xx/nvram.c +++ b/arch/mips/bcm47xx/nvram.c @@ -3,6 +3,7 @@ * * Copyright (C) 2005 Broadcom Corporation * Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org> + * Copyright (C) 2010-2011 Hauke Mehrtens <hauke@hauke-m.de> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -23,7 +24,7 @@ static char nvram_buf[NVRAM_SPACE]; /* Probe for NVRAM header */ -static void __init early_nvram_init(void) +static void early_nvram_init(void) { struct ssb_mipscore *mcore = &ssb_bcm47xx.mipscore; struct nvram_header *header; diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index c95f90bf734c..73b529b57433 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -3,6 +3,7 @@ * Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org> * Copyright (C) 2006 Michael Buesch <mb@bu3sch.de> * Copyright (C) 2010 Waldemar Brodkorb <wbx@openadk.org> + * Copyright (C) 2010-2011 Hauke Mehrtens <hauke@hauke-m.de> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -57,10 +58,49 @@ static void bcm47xx_machine_halt(void) } #define READ_FROM_NVRAM(_outvar, name, buf) \ - if (nvram_getenv(name, buf, sizeof(buf)) >= 0)\ + if (nvram_getprefix(prefix, name, buf, sizeof(buf)) >= 0)\ sprom->_outvar = simple_strtoul(buf, NULL, 0); -static void bcm47xx_fill_sprom(struct ssb_sprom *sprom) +#define READ_FROM_NVRAM2(_outvar, name1, name2, buf) \ + if (nvram_getprefix(prefix, name1, buf, sizeof(buf)) >= 0 || \ + nvram_getprefix(prefix, name2, buf, sizeof(buf)) >= 0)\ + sprom->_outvar = simple_strtoul(buf, NULL, 0); + +static inline int nvram_getprefix(const char *prefix, char *name, + char *buf, int len) +{ + if (prefix) { + char key[100]; + + snprintf(key, sizeof(key), "%s%s", prefix, name); + return nvram_getenv(key, buf, len); + } + + return nvram_getenv(name, buf, len); +} + +static u32 nvram_getu32(const char *name, char *buf, int len) +{ + int rv; + char key[100]; + u16 var0, var1; + + snprintf(key, sizeof(key), "%s0", name); + rv = nvram_getenv(key, buf, len); + /* return 0 here so this looks like unset */ + if (rv < 0) + return 0; + var0 = simple_strtoul(buf, NULL, 0); + + snprintf(key, sizeof(key), "%s1", name); + rv = nvram_getenv(key, buf, len); + if (rv < 0) + return 0; + var1 = simple_strtoul(buf, NULL, 0); + return var1 << 16 | var0; +} + +static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix) { char buf[100]; u32 boardflags; @@ -69,11 +109,12 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom) sprom->revision = 1; /* Fallback: Old hardware does not define this. */ READ_FROM_NVRAM(revision, "sromrev", buf); - if (nvram_getenv("il0macaddr", buf, sizeof(buf)) >= 0) + if (nvram_getprefix(prefix, "il0macaddr", buf, sizeof(buf)) >= 0 || + nvram_getprefix(prefix, "macaddr", buf, sizeof(buf)) >= 0) nvram_parse_macaddr(buf, sprom->il0mac); - if (nvram_getenv("et0macaddr", buf, sizeof(buf)) >= 0) + if (nvram_getprefix(prefix, "et0macaddr", buf, sizeof(buf)) >= 0) nvram_parse_macaddr(buf, sprom->et0mac); - if (nvram_getenv("et1macaddr", buf, sizeof(buf)) >= 0) + if (nvram_getprefix(prefix, "et1macaddr", buf, sizeof(buf)) >= 0) nvram_parse_macaddr(buf, sprom->et1mac); READ_FROM_NVRAM(et0phyaddr, "et0phyaddr", buf); READ_FROM_NVRAM(et1phyaddr, "et1phyaddr", buf); @@ -95,20 +136,36 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom) READ_FROM_NVRAM(pa1hib0, "pa1hib0", buf); READ_FROM_NVRAM(pa1hib2, "pa1hib1", buf); READ_FROM_NVRAM(pa1hib1, "pa1hib2", buf); - READ_FROM_NVRAM(gpio0, "wl0gpio0", buf); - READ_FROM_NVRAM(gpio1, "wl0gpio1", buf); - READ_FROM_NVRAM(gpio2, "wl0gpio2", buf); - READ_FROM_NVRAM(gpio3, "wl0gpio3", buf); - READ_FROM_NVRAM(maxpwr_bg, "pa0maxpwr", buf); - READ_FROM_NVRAM(maxpwr_al, "pa1lomaxpwr", buf); - READ_FROM_NVRAM(maxpwr_a, "pa1maxpwr", buf); - READ_FROM_NVRAM(maxpwr_ah, "pa1himaxpwr", buf); - READ_FROM_NVRAM(itssi_a, "pa1itssit", buf); - READ_FROM_NVRAM(itssi_bg, "pa0itssit", buf); + READ_FROM_NVRAM2(gpio0, "ledbh0", "wl0gpio0", buf); + READ_FROM_NVRAM2(gpio1, "ledbh1", "wl0gpio1", buf); + READ_FROM_NVRAM2(gpio2, "ledbh2", "wl0gpio2", buf); + READ_FROM_NVRAM2(gpio3, "ledbh3", "wl0gpio3", buf); + READ_FROM_NVRAM2(maxpwr_bg, "maxp2ga0", "pa0maxpwr", buf); + READ_FROM_NVRAM2(maxpwr_al, "maxp5gla0", "pa1lomaxpwr", buf); + READ_FROM_NVRAM2(maxpwr_a, "maxp5ga0", "pa1maxpwr", buf); + READ_FROM_NVRAM2(maxpwr_ah, "maxp5gha0", "pa1himaxpwr", buf); + READ_FROM_NVRAM2(itssi_bg, "itt5ga0", "pa0itssit", buf); + READ_FROM_NVRAM2(itssi_a, "itt2ga0", "pa1itssit", buf); READ_FROM_NVRAM(tri2g, "tri2g", buf); READ_FROM_NVRAM(tri5gl, "tri5gl", buf); READ_FROM_NVRAM(tri5g, "tri5g", buf); READ_FROM_NVRAM(tri5gh, "tri5gh", buf); + READ_FROM_NVRAM(txpid2g[0], "txpid2ga0", buf); + READ_FROM_NVRAM(txpid2g[1], "txpid2ga1", buf); + READ_FROM_NVRAM(txpid2g[2], "txpid2ga2", buf); + READ_FROM_NVRAM(txpid2g[3], "txpid2ga3", buf); + READ_FROM_NVRAM(txpid5g[0], "txpid5ga0", buf); + READ_FROM_NVRAM(txpid5g[1], "txpid5ga1", buf); + READ_FROM_NVRAM(txpid5g[2], "txpid5ga2", buf); + READ_FROM_NVRAM(txpid5g[3], "txpid5ga3", buf); + READ_FROM_NVRAM(txpid5gl[0], "txpid5gla0", buf); + READ_FROM_NVRAM(txpid5gl[1], "txpid5gla1", buf); + READ_FROM_NVRAM(txpid5gl[2], "txpid5gla2", buf); + READ_FROM_NVRAM(txpid5gl[3], "txpid5gla3", buf); + READ_FROM_NVRAM(txpid5gh[0], "txpid5gha0", buf); + READ_FROM_NVRAM(txpid5gh[1], "txpid5gha1", buf); + READ_FROM_NVRAM(txpid5gh[2], "txpid5gha2", buf); + READ_FROM_NVRAM(txpid5gh[3], "txpid5gha3", buf); READ_FROM_NVRAM(rxpo2g, "rxpo2g", buf); READ_FROM_NVRAM(rxpo5g, "rxpo5g", buf); READ_FROM_NVRAM(rssisav2g, "rssisav2g", buf); @@ -120,19 +177,27 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom) READ_FROM_NVRAM(rssismf5g, "rssismf5g", buf); READ_FROM_NVRAM(bxa5g, "bxa5g", buf); READ_FROM_NVRAM(cck2gpo, "cck2gpo", buf); - READ_FROM_NVRAM(ofdm2gpo, "ofdm2gpo", buf); - READ_FROM_NVRAM(ofdm5glpo, "ofdm5glpo", buf); - READ_FROM_NVRAM(ofdm5gpo, "ofdm5gpo", buf); - READ_FROM_NVRAM(ofdm5ghpo, "ofdm5ghpo", buf); - if (nvram_getenv("boardflags", buf, sizeof(buf)) >= 0) { + sprom->ofdm2gpo = nvram_getu32("ofdm2gpo", buf, sizeof(buf)); + sprom->ofdm5glpo = nvram_getu32("ofdm5glpo", buf, sizeof(buf)); + sprom->ofdm5gpo = nvram_getu32("ofdm5gpo", buf, sizeof(buf)); + sprom->ofdm5ghpo = nvram_getu32("ofdm5ghpo", buf, sizeof(buf)); + + READ_FROM_NVRAM(antenna_gain.ghz24.a0, "ag0", buf); + READ_FROM_NVRAM(antenna_gain.ghz24.a1, "ag1", buf); + READ_FROM_NVRAM(antenna_gain.ghz24.a2, "ag2", buf); + READ_FROM_NVRAM(antenna_gain.ghz24.a3, "ag3", buf); + memcpy(&sprom->antenna_gain.ghz5, &sprom->antenna_gain.ghz24, + sizeof(sprom->antenna_gain.ghz5)); + + if (nvram_getprefix(prefix, "boardflags", buf, sizeof(buf)) >= 0) { boardflags = simple_strtoul(buf, NULL, 0); if (boardflags) { sprom->boardflags_lo = (boardflags & 0x0000FFFFU); sprom->boardflags_hi = (boardflags & 0xFFFF0000U) >> 16; } } - if (nvram_getenv("boardflags2", buf, sizeof(buf)) >= 0) { + if (nvram_getprefix(prefix, "boardflags2", buf, sizeof(buf)) >= 0) { boardflags = simple_strtoul(buf, NULL, 0); if (boardflags) { sprom->boardflags2_lo = (boardflags & 0x0000FFFFU); @@ -141,6 +206,22 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom) } } +int bcm47xx_get_sprom(struct ssb_bus *bus, struct ssb_sprom *out) +{ + char prefix[10]; + + if (bus->bustype == SSB_BUSTYPE_PCI) { + snprintf(prefix, sizeof(prefix), "pci/%u/%u/", + bus->host_pci->bus->number + 1, + PCI_SLOT(bus->host_pci->devfn)); + bcm47xx_fill_sprom(out, prefix); + return 0; + } else { + printk(KERN_WARNING "bcm47xx: unable to fill SPROM for given bustype.\n"); + return -EINVAL; + } +} + static int bcm47xx_get_invariants(struct ssb_bus *bus, struct ssb_init_invariants *iv) { @@ -158,7 +239,7 @@ static int bcm47xx_get_invariants(struct ssb_bus *bus, if (nvram_getenv("boardrev", buf, sizeof(buf)) >= 0) iv->boardinfo.rev = (u16)simple_strtoul(buf, NULL, 0); - bcm47xx_fill_sprom(&iv->sprom); + bcm47xx_fill_sprom(&iv->sprom, NULL); if (nvram_getenv("cardbus", buf, sizeof(buf)) >= 0) iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10); @@ -172,6 +253,11 @@ void __init plat_mem_setup(void) char buf[100]; struct ssb_mipscore *mcore; + err = ssb_arch_register_fallback_sprom(&bcm47xx_get_sprom); + if (err) + printk(KERN_WARNING "bcm47xx: someone else already registered" + " a ssb SPROM callback handler (err %d)\n", err); + err = ssb_bus_ssbbus_register(&ssb_bcm47xx, SSB_ENUM_BASE, bcm47xx_get_invariants); if (err) diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index 8dba8cfb752f..40b223b603be 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -643,6 +643,17 @@ static struct ssb_sprom bcm63xx_sprom = { .boardflags_lo = 0x2848, .boardflags_hi = 0x0000, }; + +int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out) +{ + if (bus->bustype == SSB_BUSTYPE_PCI) { + memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom)); + return 0; + } else { + printk(KERN_ERR PFX "unable to fill SPROM for given bustype.\n"); + return -EINVAL; + } +} #endif /* @@ -793,8 +804,9 @@ void __init board_prom_init(void) if (!board_get_mac_address(bcm63xx_sprom.il0mac)) { memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN); memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN); - if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0) - printk(KERN_ERR "failed to register fallback SPROM\n"); + if (ssb_arch_register_fallback_sprom( + &bcm63xx_get_fallback_sprom) < 0) + printk(KERN_ERR PFX "failed to register fallback SPROM\n"); } #endif } diff --git a/arch/mips/boot/compressed/uart-alchemy.c b/arch/mips/boot/compressed/uart-alchemy.c index 1bff22fa089b..eb063e6dead9 100644 --- a/arch/mips/boot/compressed/uart-alchemy.c +++ b/arch/mips/boot/compressed/uart-alchemy.c @@ -3,5 +3,5 @@ void putc(char c) { /* all current (Jan. 2010) in-kernel boards */ - alchemy_uart_putchar(UART0_PHYS_ADDR, c); + alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c); } diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c index 26bf71130bf8..29d56afbb02d 100644 --- a/arch/mips/cavium-octeon/csrc-octeon.c +++ b/arch/mips/cavium-octeon/csrc-octeon.c @@ -105,8 +105,7 @@ unsigned long long notrace sched_clock(void) void __init plat_time_init(void) { clocksource_mips.rating = 300; - clocksource_set_clock(&clocksource_mips, octeon_get_clock_rate()); - clocksource_register(&clocksource_mips); + clocksource_register_hz(&clocksource_mips, octeon_get_clock_rate()); } static u64 octeon_udelay_factor; diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 0707fae3f0ee..2d9028f1474c 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -288,7 +288,6 @@ void octeon_user_io_init(void) union octeon_cvmemctl cvmmemctl; union cvmx_iob_fau_timeout fau_timeout; union cvmx_pow_nw_tim nm_tim; - uint64_t cvmctl; /* Get the current settings for CP0_CVMMEMCTL_REG */ cvmmemctl.u64 = read_c0_cvmmemctl(); @@ -392,12 +391,6 @@ void octeon_user_io_init(void) CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE, CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128); - /* Move the performance counter interrupts to IRQ 6 */ - cvmctl = read_c0_cvmctl(); - cvmctl &= ~(7 << 7); - cvmctl |= 6 << 7; - write_c0_cvmctl(cvmctl); - /* Set a default for the hardware timeouts */ fau_timeout.u64 = 0; fau_timeout.s.tout_val = 0xfff; diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index ba78b21cc8d0..8b606423bbd7 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -37,13 +37,15 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id) uint64_t action; /* Load the mailbox register to figure out what we're supposed to do */ - action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid)); + action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid)) & 0xffff; /* Clear the mailbox to clear the interrupt */ cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action); if (action & SMP_CALL_FUNCTION) smp_call_function_interrupt(); + if (action & SMP_RESCHEDULE_YOURSELF) + scheduler_ipi(); /* Check if we've been told to flush the icache */ if (action & SMP_ICACHE_FLUSH) @@ -200,16 +202,15 @@ void octeon_prepare_cpus(unsigned int max_cpus) if (labi->labi_signature != LABI_SIGNATURE) panic("The bootloader version on this board is incorrect."); #endif - - cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffffffff); + /* + * Only the low order mailbox bits are used for IPIs, leave + * the other bits alone. + */ + cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffff); if (request_irq(OCTEON_IRQ_MBOX0, mailbox_interrupt, IRQF_DISABLED, - "mailbox0", mailbox_interrupt)) { + "SMP-IPI", mailbox_interrupt)) { panic("Cannot request_irq(OCTEON_IRQ_MBOX0)\n"); } - if (request_irq(OCTEON_IRQ_MBOX1, mailbox_interrupt, IRQF_DISABLED, - "mailbox1", mailbox_interrupt)) { - panic("Cannot request_irq(OCTEON_IRQ_MBOX1)\n"); - } } /** diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 167c1d07b809..b6acd2f256b6 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -86,8 +86,8 @@ CONFIG_NET_SCHED=y CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m @@ -329,7 +329,7 @@ CONFIG_USB_LED=m CONFIG_USB_GADGET=m CONFIG_USB_GADGET_M66592=y CONFIG_MMC=m -CONFIG_LEDS_CLASS=m +CONFIG_LEDS_CLASS=y CONFIG_STAGING=y # CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_FB_SM7XX=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 7270f3183bda..5527abbb7dea 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -374,7 +374,7 @@ CONFIG_FB_CIRRUS=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_HID=m -CONFIG_LEDS_CLASS=m +CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index a97a42c6b2c8..37862b2ce363 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -225,8 +225,8 @@ CONFIG_TOSHIBA_FIR=m CONFIG_VLSI_FIR=m CONFIG_MCS_FIR=m CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig new file mode 100644 index 000000000000..e4b399fdaa61 --- /dev/null +++ b/arch/mips/configs/nlm_xlr_defconfig @@ -0,0 +1,574 @@ +CONFIG_NLM_XLR_BOARD=y +CONFIG_HIGHMEM=y +CONFIG_KSM=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_SMP=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_KEXEC=y +CONFIG_EXPERIMENTAL=y +CONFIG_CROSS_COMPILE="mips64-unknown-linux-gnu-" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_NAMESPACES=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="usr/dev_file_list usr/rootfs" +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_INITRAMFS_COMPRESSION_GZIP=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_EXPERT=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_ELF_CORE is not set +# CONFIG_PCSPKR_PLATFORM is not set +# CONFIG_PERF_EVENTS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BINFMT_MISC=m +CONFIG_PM_RUNTIME=y +CONFIG_PM_DEBUG=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NETFILTER_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_VS=m +CONFIG_IP_VS_IPV6=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_IP6_NF_QUEUE=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_DECNET_NF_GRABULATOR=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_802_3=m +CONFIG_BRIDGE_EBT_AMONG=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_IP6=m +CONFIG_BRIDGE_EBT_LIMIT=m +CONFIG_BRIDGE_EBT_MARK=m +CONFIG_BRIDGE_EBT_PKTTYPE=m +CONFIG_BRIDGE_EBT_STP=m +CONFIG_BRIDGE_EBT_VLAN=m +CONFIG_BRIDGE_EBT_ARPREPLY=m +CONFIG_BRIDGE_EBT_DNAT=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_SNAT=m +CONFIG_BRIDGE_EBT_LOG=m +CONFIG_BRIDGE_EBT_ULOG=m +CONFIG_BRIDGE_EBT_NFLOG=m +CONFIG_IP_DCCP=m +CONFIG_RDS=m +CONFIG_RDS_TCP=m +CONFIG_TIPC=m +CONFIG_ATM=m +CONFIG_ATM_CLIP=m +CONFIG_ATM_LANE=m +CONFIG_ATM_MPOA=m +CONFIG_ATM_BR2684=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_DECNET=m +CONFIG_LLC2=m +CONFIG_IPX=m +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=m +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_X25=m +CONFIG_LAPB=m +CONFIG_ECONET=m +CONFIG_ECONET_AUNUDP=y +CONFIG_ECONET_NATIVE=y +CONFIG_WAN_ROUTER=m +CONFIG_PHONET=m +CONFIG_IEEE802154=m +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_ATM=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_DCB=y +CONFIG_NET_PKTGEN=m +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +CONFIG_CONNECTOR=y +CONFIG_MTD=m +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_OSD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=65536 +CONFIG_CDROM_PKTCDVD=y +CONFIG_MISC_DEVICES=y +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=y +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SCAN_ASYNC=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_FC_TGT_ATTRS=y +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_SCSI_SRP_TGT_ATTRS=y +CONFIG_ISCSI_TCP=m +CONFIG_LIBFCOE=m +CONFIG_SCSI_DEBUG=m +CONFIG_SCSI_DH=y +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m +# CONFIG_INPUT_MOUSEDEV is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_EVBUG=m +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO_I8042 is not set +CONFIG_SERIO_SERPORT=m +CONFIG_SERIO_LIBPS2=y +CONFIG_SERIO_RAW=m +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_N_HDLC=m +# CONFIG_DEVKMEM is not set +CONFIG_STALDRV=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=48 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_TIMERIOMEM=m +CONFIG_RAW_DRIVER=m +# CONFIG_HWMON is not set +# CONFIG_VGA_CONSOLE is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_UIO=y +CONFIG_UIO_PDRV=m +CONFIG_UIO_PDRV_GENIRQ=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_OCFS2_FS=m +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=y +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_FSCACHE_STATS=y +CONFIG_FSCACHE_HISTOGRAM=y +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_CONFIGFS_FS=y +CONFIG_ADFS_FS=m +CONFIG_AFFS_FS=m +CONFIG_ECRYPT_FS=y +CONFIG_HFS_FS=m +CONFIG_HFSPLUS_FS=m +CONFIG_BEFS_FS=m +CONFIG_BFS_FS=m +CONFIG_EFS_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_VXFS_FS=m +CONFIG_MINIX_FS=m +CONFIG_OMFS_FS=m +CONFIG_HPFS_FS=m +CONFIG_QNX4FS_FS=m +CONFIG_ROMFS_FS=m +CONFIG_SYSV_FS=m +CONFIG_UFS_FS=m +CONFIG_EXOFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_FSCACHE=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_EXPERIMENTAL=y +CONFIG_NCP_FS=m +CONFIG_NCPFS_PACKET_SIGNING=y +CONFIG_NCPFS_IOCTL_LOCKING=y +CONFIG_NCPFS_STRONG=y +CONFIG_NCPFS_NFS_NS=y +CONFIG_NCPFS_OS2_NS=y +CONFIG_NCPFS_NLS=y +CONFIG_NCPFS_EXTRAS=y +CONFIG_CODA_FS=m +CONFIG_AFS_FS=m +CONFIG_PARTITION_ADVANCED=y +CONFIG_ACORN_PARTITION=y +CONFIG_ACORN_PARTITION_ICS=y +CONFIG_ACORN_PARTITION_RISCIX=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_ATARI_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_LDM_PARTITION=y +CONFIG_SGI_PARTITION=y +CONFIG_ULTRIX_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +CONFIG_SYSV68_PARTITION=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="cp437" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_PRINTK_TIME=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_KGDB=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_LSM_MMAP_MIN_ADDR=0 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SMACK=y +CONFIG_SECURITY_TOMOYO=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m +CONFIG_CRC_CCITT=m +CONFIG_CRC7=m diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 86877539c6e8..34c0d3cb116f 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -33,6 +33,7 @@ #define PRID_COMP_TOSHIBA 0x070000 #define PRID_COMP_LSI 0x080000 #define PRID_COMP_LEXRA 0x0b0000 +#define PRID_COMP_NETLOGIC 0x0c0000 #define PRID_COMP_CAVIUM 0x0d0000 #define PRID_COMP_INGENIC 0xd00000 @@ -142,6 +143,31 @@ #define PRID_IMP_JZRISC 0x0200 /* + * These are the PRID's for when 23:16 == PRID_COMP_NETLOGIC + */ +#define PRID_IMP_NETLOGIC_XLR732 0x0000 +#define PRID_IMP_NETLOGIC_XLR716 0x0200 +#define PRID_IMP_NETLOGIC_XLR532 0x0900 +#define PRID_IMP_NETLOGIC_XLR308 0x0600 +#define PRID_IMP_NETLOGIC_XLR532C 0x0800 +#define PRID_IMP_NETLOGIC_XLR516C 0x0a00 +#define PRID_IMP_NETLOGIC_XLR508C 0x0b00 +#define PRID_IMP_NETLOGIC_XLR308C 0x0f00 +#define PRID_IMP_NETLOGIC_XLS608 0x8000 +#define PRID_IMP_NETLOGIC_XLS408 0x8800 +#define PRID_IMP_NETLOGIC_XLS404 0x8c00 +#define PRID_IMP_NETLOGIC_XLS208 0x8e00 +#define PRID_IMP_NETLOGIC_XLS204 0x8f00 +#define PRID_IMP_NETLOGIC_XLS108 0xce00 +#define PRID_IMP_NETLOGIC_XLS104 0xcf00 +#define PRID_IMP_NETLOGIC_XLS616B 0x4000 +#define PRID_IMP_NETLOGIC_XLS608B 0x4a00 +#define PRID_IMP_NETLOGIC_XLS416B 0x4400 +#define PRID_IMP_NETLOGIC_XLS412B 0x4c00 +#define PRID_IMP_NETLOGIC_XLS408B 0x4e00 +#define PRID_IMP_NETLOGIC_XLS404B 0x4f00 + +/* * Definitions for 7:0 on legacy processors */ @@ -234,6 +260,7 @@ enum cpu_type_enum { */ CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2, + CPU_XLR, CPU_LAST }; diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 655f849bd08d..7aa37ddfca4b 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -5,7 +5,9 @@ #include <asm/cache.h> #include <asm-generic/dma-coherent.h> +#ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */ #include <dma-coherence.h> +#endif extern struct dma_map_ops *mips_dma_map_ops; diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h index 48bb82372994..9ad011366f73 100644 --- a/arch/mips/include/asm/i8253.h +++ b/arch/mips/include/asm/i8253.h @@ -12,8 +12,13 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 +#define PIT_LATCH LATCH + extern raw_spinlock_t i8253_lock; extern void setup_pit_timer(void); +#define inb_pit inb_p +#define outb_pit outb_p + #endif /* __ASM_I8253_H */ diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 7622ccf75076..1881b316ca45 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -20,16 +20,18 @@ #define WORD_INSN ".word" #endif -#define JUMP_LABEL(key, label) \ - do { \ - asm goto("1:\tnop\n\t" \ - "nop\n\t" \ - ".pushsection __jump_table, \"a\"\n\t" \ - WORD_INSN " 1b, %l[" #label "], %0\n\t" \ - ".popsection\n\t" \ - : : "i" (key) : : label); \ - } while (0) - +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("1:\tnop\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + WORD_INSN " 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i" (key) : : l_yes); + return false; +l_yes: + return true; +} #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h index a6976619160a..f260ebed713b 100644 --- a/arch/mips/include/asm/mach-au1x00/au1000.h +++ b/arch/mips/include/asm/mach-au1x00/au1000.h @@ -161,6 +161,45 @@ static inline int alchemy_get_cputype(void) return ALCHEMY_CPU_UNKNOWN; } +/* return number of uarts on a given cputype */ +static inline int alchemy_get_uarts(int type) +{ + switch (type) { + case ALCHEMY_CPU_AU1000: + return 4; + case ALCHEMY_CPU_AU1500: + case ALCHEMY_CPU_AU1200: + return 2; + case ALCHEMY_CPU_AU1100: + case ALCHEMY_CPU_AU1550: + return 3; + } + return 0; +} + +/* enable an UART block if it isn't already */ +static inline void alchemy_uart_enable(u32 uart_phys) +{ + void __iomem *addr = (void __iomem *)KSEG1ADDR(uart_phys); + + /* reset, enable clock, deassert reset */ + if ((__raw_readl(addr + 0x100) & 3) != 3) { + __raw_writel(0, addr + 0x100); + wmb(); + __raw_writel(1, addr + 0x100); + wmb(); + } + __raw_writel(3, addr + 0x100); + wmb(); +} + +static inline void alchemy_uart_disable(u32 uart_phys) +{ + void __iomem *addr = (void __iomem *)KSEG1ADDR(uart_phys); + __raw_writel(0, addr + 0x100); /* UART_MOD_CNTRL */ + wmb(); +} + static inline void alchemy_uart_putchar(u32 uart_phys, u8 c) { void __iomem *base = (void __iomem *)KSEG1ADDR(uart_phys); @@ -180,6 +219,20 @@ static inline void alchemy_uart_putchar(u32 uart_phys, u8 c) wmb(); } +/* return number of ethernet MACs on a given cputype */ +static inline int alchemy_get_macs(int type) +{ + switch (type) { + case ALCHEMY_CPU_AU1000: + case ALCHEMY_CPU_AU1500: + case ALCHEMY_CPU_AU1550: + return 2; + case ALCHEMY_CPU_AU1100: + return 1; + } + return 0; +} + /* arch/mips/au1000/common/clocks.c */ extern void set_au1x00_speed(unsigned int new_freq); extern unsigned int get_au1x00_speed(void); @@ -630,38 +683,42 @@ enum soc_au1200_ints { /* * Physical base addresses for integrated peripherals + * 0..au1000 1..au1500 2..au1100 3..au1550 4..au1200 */ +#define AU1000_AC97_PHYS_ADDR 0x10000000 /* 012 */ +#define AU1000_USBD_PHYS_ADDR 0x10200000 /* 0123 */ +#define AU1000_IC0_PHYS_ADDR 0x10400000 /* 01234 */ +#define AU1000_MAC0_PHYS_ADDR 0x10500000 /* 023 */ +#define AU1000_MAC1_PHYS_ADDR 0x10510000 /* 023 */ +#define AU1000_MACEN_PHYS_ADDR 0x10520000 /* 023 */ +#define AU1100_SD0_PHYS_ADDR 0x10600000 /* 24 */ +#define AU1100_SD1_PHYS_ADDR 0x10680000 /* 24 */ +#define AU1000_I2S_PHYS_ADDR 0x11000000 /* 02 */ +#define AU1500_MAC0_PHYS_ADDR 0x11500000 /* 1 */ +#define AU1500_MAC1_PHYS_ADDR 0x11510000 /* 1 */ +#define AU1500_MACEN_PHYS_ADDR 0x11520000 /* 1 */ +#define AU1000_UART0_PHYS_ADDR 0x11100000 /* 01234 */ +#define AU1000_UART1_PHYS_ADDR 0x11200000 /* 0234 */ +#define AU1000_UART2_PHYS_ADDR 0x11300000 /* 0 */ +#define AU1000_UART3_PHYS_ADDR 0x11400000 /* 0123 */ +#define AU1500_GPIO2_PHYS_ADDR 0x11700000 /* 1234 */ +#define AU1000_IC1_PHYS_ADDR 0x11800000 /* 01234 */ +#define AU1000_SYS_PHYS_ADDR 0x11900000 /* 01234 */ +#define AU1000_DMA_PHYS_ADDR 0x14002000 /* 012 */ +#define AU1550_DBDMA_PHYS_ADDR 0x14002000 /* 34 */ +#define AU1550_DBDMA_CONF_PHYS_ADDR 0x14003000 /* 34 */ +#define AU1000_MACDMA0_PHYS_ADDR 0x14004000 /* 0123 */ +#define AU1000_MACDMA1_PHYS_ADDR 0x14004200 /* 0123 */ + + #ifdef CONFIG_SOC_AU1000 #define MEM_PHYS_ADDR 0x14000000 #define STATIC_MEM_PHYS_ADDR 0x14001000 -#define DMA0_PHYS_ADDR 0x14002000 -#define DMA1_PHYS_ADDR 0x14002100 -#define DMA2_PHYS_ADDR 0x14002200 -#define DMA3_PHYS_ADDR 0x14002300 -#define DMA4_PHYS_ADDR 0x14002400 -#define DMA5_PHYS_ADDR 0x14002500 -#define DMA6_PHYS_ADDR 0x14002600 -#define DMA7_PHYS_ADDR 0x14002700 -#define IC0_PHYS_ADDR 0x10400000 -#define IC1_PHYS_ADDR 0x11800000 -#define AC97_PHYS_ADDR 0x10000000 #define USBH_PHYS_ADDR 0x10100000 -#define USBD_PHYS_ADDR 0x10200000 #define IRDA_PHYS_ADDR 0x10300000 -#define MAC0_PHYS_ADDR 0x10500000 -#define MAC1_PHYS_ADDR 0x10510000 -#define MACEN_PHYS_ADDR 0x10520000 -#define MACDMA0_PHYS_ADDR 0x14004000 -#define MACDMA1_PHYS_ADDR 0x14004200 -#define I2S_PHYS_ADDR 0x11000000 -#define UART0_PHYS_ADDR 0x11100000 -#define UART1_PHYS_ADDR 0x11200000 -#define UART2_PHYS_ADDR 0x11300000 -#define UART3_PHYS_ADDR 0x11400000 #define SSI0_PHYS_ADDR 0x11600000 #define SSI1_PHYS_ADDR 0x11680000 -#define SYS_PHYS_ADDR 0x11900000 #define PCMCIA_IO_PHYS_ADDR 0xF00000000ULL #define PCMCIA_ATTR_PHYS_ADDR 0xF40000000ULL #define PCMCIA_MEM_PHYS_ADDR 0xF80000000ULL @@ -672,30 +729,8 @@ enum soc_au1200_ints { #ifdef CONFIG_SOC_AU1500 #define MEM_PHYS_ADDR 0x14000000 #define STATIC_MEM_PHYS_ADDR 0x14001000 -#define DMA0_PHYS_ADDR 0x14002000 -#define DMA1_PHYS_ADDR 0x14002100 -#define DMA2_PHYS_ADDR 0x14002200 -#define DMA3_PHYS_ADDR 0x14002300 -#define DMA4_PHYS_ADDR 0x14002400 -#define DMA5_PHYS_ADDR 0x14002500 -#define DMA6_PHYS_ADDR 0x14002600 -#define DMA7_PHYS_ADDR 0x14002700 -#define IC0_PHYS_ADDR 0x10400000 -#define IC1_PHYS_ADDR 0x11800000 -#define AC97_PHYS_ADDR 0x10000000 #define USBH_PHYS_ADDR 0x10100000 -#define USBD_PHYS_ADDR 0x10200000 #define PCI_PHYS_ADDR 0x14005000 -#define MAC0_PHYS_ADDR 0x11500000 -#define MAC1_PHYS_ADDR 0x11510000 -#define MACEN_PHYS_ADDR 0x11520000 -#define MACDMA0_PHYS_ADDR 0x14004000 -#define MACDMA1_PHYS_ADDR 0x14004200 -#define I2S_PHYS_ADDR 0x11000000 -#define UART0_PHYS_ADDR 0x11100000 -#define UART3_PHYS_ADDR 0x11400000 -#define GPIO2_PHYS_ADDR 0x11700000 -#define SYS_PHYS_ADDR 0x11900000 #define PCI_MEM_PHYS_ADDR 0x400000000ULL #define PCI_IO_PHYS_ADDR 0x500000000ULL #define PCI_CONFIG0_PHYS_ADDR 0x600000000ULL @@ -710,34 +745,10 @@ enum soc_au1200_ints { #ifdef CONFIG_SOC_AU1100 #define MEM_PHYS_ADDR 0x14000000 #define STATIC_MEM_PHYS_ADDR 0x14001000 -#define DMA0_PHYS_ADDR 0x14002000 -#define DMA1_PHYS_ADDR 0x14002100 -#define DMA2_PHYS_ADDR 0x14002200 -#define DMA3_PHYS_ADDR 0x14002300 -#define DMA4_PHYS_ADDR 0x14002400 -#define DMA5_PHYS_ADDR 0x14002500 -#define DMA6_PHYS_ADDR 0x14002600 -#define DMA7_PHYS_ADDR 0x14002700 -#define IC0_PHYS_ADDR 0x10400000 -#define SD0_PHYS_ADDR 0x10600000 -#define SD1_PHYS_ADDR 0x10680000 -#define IC1_PHYS_ADDR 0x11800000 -#define AC97_PHYS_ADDR 0x10000000 #define USBH_PHYS_ADDR 0x10100000 -#define USBD_PHYS_ADDR 0x10200000 #define IRDA_PHYS_ADDR 0x10300000 -#define MAC0_PHYS_ADDR 0x10500000 -#define MACEN_PHYS_ADDR 0x10520000 -#define MACDMA0_PHYS_ADDR 0x14004000 -#define MACDMA1_PHYS_ADDR 0x14004200 -#define I2S_PHYS_ADDR 0x11000000 -#define UART0_PHYS_ADDR 0x11100000 -#define UART1_PHYS_ADDR 0x11200000 -#define UART3_PHYS_ADDR 0x11400000 #define SSI0_PHYS_ADDR 0x11600000 #define SSI1_PHYS_ADDR 0x11680000 -#define GPIO2_PHYS_ADDR 0x11700000 -#define SYS_PHYS_ADDR 0x11900000 #define LCD_PHYS_ADDR 0x15000000 #define PCMCIA_IO_PHYS_ADDR 0xF00000000ULL #define PCMCIA_ATTR_PHYS_ADDR 0xF40000000ULL @@ -749,22 +760,8 @@ enum soc_au1200_ints { #ifdef CONFIG_SOC_AU1550 #define MEM_PHYS_ADDR 0x14000000 #define STATIC_MEM_PHYS_ADDR 0x14001000 -#define IC0_PHYS_ADDR 0x10400000 -#define IC1_PHYS_ADDR 0x11800000 #define USBH_PHYS_ADDR 0x14020000 -#define USBD_PHYS_ADDR 0x10200000 #define PCI_PHYS_ADDR 0x14005000 -#define MAC0_PHYS_ADDR 0x10500000 -#define MAC1_PHYS_ADDR 0x10510000 -#define MACEN_PHYS_ADDR 0x10520000 -#define MACDMA0_PHYS_ADDR 0x14004000 -#define MACDMA1_PHYS_ADDR 0x14004200 -#define UART0_PHYS_ADDR 0x11100000 -#define UART1_PHYS_ADDR 0x11200000 -#define UART3_PHYS_ADDR 0x11400000 -#define GPIO2_PHYS_ADDR 0x11700000 -#define SYS_PHYS_ADDR 0x11900000 -#define DDMA_PHYS_ADDR 0x14002000 #define PE_PHYS_ADDR 0x14008000 #define PSC0_PHYS_ADDR 0x11A00000 #define PSC1_PHYS_ADDR 0x11B00000 @@ -786,19 +783,10 @@ enum soc_au1200_ints { #define STATIC_MEM_PHYS_ADDR 0x14001000 #define AES_PHYS_ADDR 0x10300000 #define CIM_PHYS_ADDR 0x14004000 -#define IC0_PHYS_ADDR 0x10400000 -#define IC1_PHYS_ADDR 0x11800000 #define USBM_PHYS_ADDR 0x14020000 #define USBH_PHYS_ADDR 0x14020100 -#define UART0_PHYS_ADDR 0x11100000 -#define UART1_PHYS_ADDR 0x11200000 -#define GPIO2_PHYS_ADDR 0x11700000 -#define SYS_PHYS_ADDR 0x11900000 -#define DDMA_PHYS_ADDR 0x14002000 #define PSC0_PHYS_ADDR 0x11A00000 #define PSC1_PHYS_ADDR 0x11B00000 -#define SD0_PHYS_ADDR 0x10600000 -#define SD1_PHYS_ADDR 0x10680000 #define LCD_PHYS_ADDR 0x15000000 #define SWCNT_PHYS_ADDR 0x1110010C #define MAEFE_PHYS_ADDR 0x14012000 @@ -835,183 +823,43 @@ enum soc_au1200_ints { #endif -/* Interrupt Controller register offsets */ -#define IC_CFG0RD 0x40 -#define IC_CFG0SET 0x40 -#define IC_CFG0CLR 0x44 -#define IC_CFG1RD 0x48 -#define IC_CFG1SET 0x48 -#define IC_CFG1CLR 0x4C -#define IC_CFG2RD 0x50 -#define IC_CFG2SET 0x50 -#define IC_CFG2CLR 0x54 -#define IC_REQ0INT 0x54 -#define IC_SRCRD 0x58 -#define IC_SRCSET 0x58 -#define IC_SRCCLR 0x5C -#define IC_REQ1INT 0x5C -#define IC_ASSIGNRD 0x60 -#define IC_ASSIGNSET 0x60 -#define IC_ASSIGNCLR 0x64 -#define IC_WAKERD 0x68 -#define IC_WAKESET 0x68 -#define IC_WAKECLR 0x6C -#define IC_MASKRD 0x70 -#define IC_MASKSET 0x70 -#define IC_MASKCLR 0x74 -#define IC_RISINGRD 0x78 -#define IC_RISINGCLR 0x78 -#define IC_FALLINGRD 0x7C -#define IC_FALLINGCLR 0x7C -#define IC_TESTBIT 0x80 - - -/* Interrupt Controller 0 */ -#define IC0_CFG0RD 0xB0400040 -#define IC0_CFG0SET 0xB0400040 -#define IC0_CFG0CLR 0xB0400044 - -#define IC0_CFG1RD 0xB0400048 -#define IC0_CFG1SET 0xB0400048 -#define IC0_CFG1CLR 0xB040004C - -#define IC0_CFG2RD 0xB0400050 -#define IC0_CFG2SET 0xB0400050 -#define IC0_CFG2CLR 0xB0400054 - -#define IC0_REQ0INT 0xB0400054 -#define IC0_SRCRD 0xB0400058 -#define IC0_SRCSET 0xB0400058 -#define IC0_SRCCLR 0xB040005C -#define IC0_REQ1INT 0xB040005C - -#define IC0_ASSIGNRD 0xB0400060 -#define IC0_ASSIGNSET 0xB0400060 -#define IC0_ASSIGNCLR 0xB0400064 - -#define IC0_WAKERD 0xB0400068 -#define IC0_WAKESET 0xB0400068 -#define IC0_WAKECLR 0xB040006C - -#define IC0_MASKRD 0xB0400070 -#define IC0_MASKSET 0xB0400070 -#define IC0_MASKCLR 0xB0400074 - -#define IC0_RISINGRD 0xB0400078 -#define IC0_RISINGCLR 0xB0400078 -#define IC0_FALLINGRD 0xB040007C -#define IC0_FALLINGCLR 0xB040007C - -#define IC0_TESTBIT 0xB0400080 - -/* Interrupt Controller 1 */ -#define IC1_CFG0RD 0xB1800040 -#define IC1_CFG0SET 0xB1800040 -#define IC1_CFG0CLR 0xB1800044 - -#define IC1_CFG1RD 0xB1800048 -#define IC1_CFG1SET 0xB1800048 -#define IC1_CFG1CLR 0xB180004C - -#define IC1_CFG2RD 0xB1800050 -#define IC1_CFG2SET 0xB1800050 -#define IC1_CFG2CLR 0xB1800054 - -#define IC1_REQ0INT 0xB1800054 -#define IC1_SRCRD 0xB1800058 -#define IC1_SRCSET 0xB1800058 -#define IC1_SRCCLR 0xB180005C -#define IC1_REQ1INT 0xB180005C - -#define IC1_ASSIGNRD 0xB1800060 -#define IC1_ASSIGNSET 0xB1800060 -#define IC1_ASSIGNCLR 0xB1800064 - -#define IC1_WAKERD 0xB1800068 -#define IC1_WAKESET 0xB1800068 -#define IC1_WAKECLR 0xB180006C - -#define IC1_MASKRD 0xB1800070 -#define IC1_MASKSET 0xB1800070 -#define IC1_MASKCLR 0xB1800074 - -#define IC1_RISINGRD 0xB1800078 -#define IC1_RISINGCLR 0xB1800078 -#define IC1_FALLINGRD 0xB180007C -#define IC1_FALLINGCLR 0xB180007C - -#define IC1_TESTBIT 0xB1800080 /* Au1000 */ #ifdef CONFIG_SOC_AU1000 -#define UART0_ADDR 0xB1100000 -#define UART3_ADDR 0xB1400000 - #define USB_OHCI_BASE 0x10100000 /* phys addr for ioremap */ #define USB_HOST_CONFIG 0xB017FFFC #define FOR_PLATFORM_C_USB_HOST_INT AU1000_USB_HOST_INT - -#define AU1000_ETH0_BASE 0xB0500000 -#define AU1000_ETH1_BASE 0xB0510000 -#define AU1000_MAC0_ENABLE 0xB0520000 -#define AU1000_MAC1_ENABLE 0xB0520004 -#define NUM_ETH_INTERFACES 2 #endif /* CONFIG_SOC_AU1000 */ /* Au1500 */ #ifdef CONFIG_SOC_AU1500 -#define UART0_ADDR 0xB1100000 -#define UART3_ADDR 0xB1400000 - #define USB_OHCI_BASE 0x10100000 /* phys addr for ioremap */ #define USB_HOST_CONFIG 0xB017fffc #define FOR_PLATFORM_C_USB_HOST_INT AU1500_USB_HOST_INT - -#define AU1500_ETH0_BASE 0xB1500000 -#define AU1500_ETH1_BASE 0xB1510000 -#define AU1500_MAC0_ENABLE 0xB1520000 -#define AU1500_MAC1_ENABLE 0xB1520004 -#define NUM_ETH_INTERFACES 2 #endif /* CONFIG_SOC_AU1500 */ /* Au1100 */ #ifdef CONFIG_SOC_AU1100 -#define UART0_ADDR 0xB1100000 -#define UART3_ADDR 0xB1400000 - #define USB_OHCI_BASE 0x10100000 /* phys addr for ioremap */ #define USB_HOST_CONFIG 0xB017FFFC #define FOR_PLATFORM_C_USB_HOST_INT AU1100_USB_HOST_INT - -#define AU1100_ETH0_BASE 0xB0500000 -#define AU1100_MAC0_ENABLE 0xB0520000 -#define NUM_ETH_INTERFACES 1 #endif /* CONFIG_SOC_AU1100 */ #ifdef CONFIG_SOC_AU1550 -#define UART0_ADDR 0xB1100000 #define USB_OHCI_BASE 0x14020000 /* phys addr for ioremap */ #define USB_OHCI_LEN 0x00060000 #define USB_HOST_CONFIG 0xB4027ffc #define FOR_PLATFORM_C_USB_HOST_INT AU1550_USB_HOST_INT - -#define AU1550_ETH0_BASE 0xB0500000 -#define AU1550_ETH1_BASE 0xB0510000 -#define AU1550_MAC0_ENABLE 0xB0520000 -#define AU1550_MAC1_ENABLE 0xB0520004 -#define NUM_ETH_INTERFACES 2 #endif /* CONFIG_SOC_AU1550 */ #ifdef CONFIG_SOC_AU1200 -#define UART0_ADDR 0xB1100000 - #define USB_UOC_BASE 0x14020020 #define USB_UOC_LEN 0x20 #define USB_OHCI_BASE 0x14020100 @@ -1504,22 +1352,6 @@ enum soc_au1200_ints { #define SYS_PINFUNC_S1B (1 << 2) #endif -#define SYS_TRIOUTRD 0xB1900100 -#define SYS_TRIOUTCLR 0xB1900100 -#define SYS_OUTPUTRD 0xB1900108 -#define SYS_OUTPUTSET 0xB1900108 -#define SYS_OUTPUTCLR 0xB190010C -#define SYS_PINSTATERD 0xB1900110 -#define SYS_PININPUTEN 0xB1900110 - -/* GPIO2, Au1500, Au1550 only */ -#define GPIO2_BASE 0xB1700000 -#define GPIO2_DIR (GPIO2_BASE + 0) -#define GPIO2_OUTPUT (GPIO2_BASE + 8) -#define GPIO2_PINSTATE (GPIO2_BASE + 0xC) -#define GPIO2_INTENABLE (GPIO2_BASE + 0x10) -#define GPIO2_ENABLE (GPIO2_BASE + 0x14) - /* Power Management */ #define SYS_SCRATCH0 0xB1900018 #define SYS_SCRATCH1 0xB190001C @@ -1635,12 +1467,6 @@ enum soc_au1200_ints { # define AC97C_RS (1 << 1) # define AC97C_CE (1 << 0) -/* Secure Digital (SD) Controller */ -#define SD0_XMIT_FIFO 0xB0600000 -#define SD0_RECV_FIFO 0xB0600004 -#define SD1_XMIT_FIFO 0xB0680000 -#define SD1_RECV_FIFO 0xB0680004 - #if defined(CONFIG_SOC_AU1500) || defined(CONFIG_SOC_AU1550) /* Au1500 PCI Controller */ #define Au1500_CFG_BASE 0xB4005000 /* virtual, KSEG1 addr */ diff --git a/arch/mips/include/asm/mach-au1x00/au1000_dma.h b/arch/mips/include/asm/mach-au1x00/au1000_dma.h index c333b4e1cd44..59f5b55b2200 100644 --- a/arch/mips/include/asm/mach-au1x00/au1000_dma.h +++ b/arch/mips/include/asm/mach-au1x00/au1000_dma.h @@ -37,10 +37,6 @@ #define NUM_AU1000_DMA_CHANNELS 8 -/* DMA Channel Base Addresses */ -#define DMA_CHANNEL_BASE 0xB4002000 -#define DMA_CHANNEL_LEN 0x00000100 - /* DMA Channel Register Offsets */ #define DMA_MODE_SET 0x00000000 #define DMA_MODE_READ DMA_MODE_SET diff --git a/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h b/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h index c8a553a36ba4..2fdacfe85e23 100644 --- a/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h +++ b/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h @@ -37,14 +37,6 @@ #ifndef _LANGUAGE_ASSEMBLY -/* - * The DMA base addresses. - * The channels are every 256 bytes (0x0100) from the channel 0 base. - * Interrupt status/enable is bits 15:0 for channels 15 to zero. - */ -#define DDMA_GLOBAL_BASE 0xb4003000 -#define DDMA_CHANNEL_BASE 0xb4002000 - typedef volatile struct dbdma_global { u32 ddma_config; u32 ddma_intstat; diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h index 62d2f136d941..1f41a522906d 100644 --- a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h +++ b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h @@ -24,6 +24,23 @@ #define MAKE_IRQ(intc, off) (AU1000_INTC##intc##_INT_BASE + (off)) +/* GPIO1 registers within SYS_ area */ +#define SYS_TRIOUTRD 0x100 +#define SYS_TRIOUTCLR 0x100 +#define SYS_OUTPUTRD 0x108 +#define SYS_OUTPUTSET 0x108 +#define SYS_OUTPUTCLR 0x10C +#define SYS_PINSTATERD 0x110 +#define SYS_PININPUTEN 0x110 + +/* register offsets within GPIO2 block */ +#define GPIO2_DIR 0x00 +#define GPIO2_OUTPUT 0x08 +#define GPIO2_PINSTATE 0x0C +#define GPIO2_INTENABLE 0x10 +#define GPIO2_ENABLE 0x14 + +struct gpio; static inline int au1000_gpio1_to_irq(int gpio) { @@ -200,23 +217,26 @@ static inline int au1200_irq_to_gpio(int irq) */ static inline void alchemy_gpio1_set_value(int gpio, int v) { + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR); unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE); unsigned long r = v ? SYS_OUTPUTSET : SYS_OUTPUTCLR; - au_writel(mask, r); - au_sync(); + __raw_writel(mask, base + r); + wmb(); } static inline int alchemy_gpio1_get_value(int gpio) { + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR); unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE); - return au_readl(SYS_PINSTATERD) & mask; + return __raw_readl(base + SYS_PINSTATERD) & mask; } static inline int alchemy_gpio1_direction_input(int gpio) { + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR); unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE); - au_writel(mask, SYS_TRIOUTCLR); - au_sync(); + __raw_writel(mask, base + SYS_TRIOUTCLR); + wmb(); return 0; } @@ -257,27 +277,31 @@ static inline int alchemy_gpio1_to_irq(int gpio) */ static inline void __alchemy_gpio2_mod_dir(int gpio, int to_out) { + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR); unsigned long mask = 1 << (gpio - ALCHEMY_GPIO2_BASE); - unsigned long d = au_readl(GPIO2_DIR); + unsigned long d = __raw_readl(base + GPIO2_DIR); + if (to_out) d |= mask; else d &= ~mask; - au_writel(d, GPIO2_DIR); - au_sync(); + __raw_writel(d, base + GPIO2_DIR); + wmb(); } static inline void alchemy_gpio2_set_value(int gpio, int v) { + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR); unsigned long mask; mask = ((v) ? 0x00010001 : 0x00010000) << (gpio - ALCHEMY_GPIO2_BASE); - au_writel(mask, GPIO2_OUTPUT); - au_sync(); + __raw_writel(mask, base + GPIO2_OUTPUT); + wmb(); } static inline int alchemy_gpio2_get_value(int gpio) { - return au_readl(GPIO2_PINSTATE) & (1 << (gpio - ALCHEMY_GPIO2_BASE)); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR); + return __raw_readl(base + GPIO2_PINSTATE) & (1 << (gpio - ALCHEMY_GPIO2_BASE)); } static inline int alchemy_gpio2_direction_input(int gpio) @@ -329,21 +353,23 @@ static inline int alchemy_gpio2_to_irq(int gpio) */ static inline void alchemy_gpio1_input_enable(void) { - au_writel(0, SYS_PININPUTEN); /* the write op is key */ - au_sync(); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR); + __raw_writel(0, base + SYS_PININPUTEN); /* the write op is key */ + wmb(); } /* GPIO2 shared interrupts and control */ static inline void __alchemy_gpio2_mod_int(int gpio2, int en) { - unsigned long r = au_readl(GPIO2_INTENABLE); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR); + unsigned long r = __raw_readl(base + GPIO2_INTENABLE); if (en) r |= 1 << gpio2; else r &= ~(1 << gpio2); - au_writel(r, GPIO2_INTENABLE); - au_sync(); + __raw_writel(r, base + GPIO2_INTENABLE); + wmb(); } /** @@ -418,10 +444,11 @@ static inline void alchemy_gpio2_disable_int(int gpio2) */ static inline void alchemy_gpio2_enable(void) { - au_writel(3, GPIO2_ENABLE); /* reset, clock enabled */ - au_sync(); - au_writel(1, GPIO2_ENABLE); /* clock enabled */ - au_sync(); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR); + __raw_writel(3, base + GPIO2_ENABLE); /* reset, clock enabled */ + wmb(); + __raw_writel(1, base + GPIO2_ENABLE); /* clock enabled */ + wmb(); } /** @@ -431,8 +458,9 @@ static inline void alchemy_gpio2_enable(void) */ static inline void alchemy_gpio2_disable(void) { - au_writel(2, GPIO2_ENABLE); /* reset, clock disabled */ - au_sync(); + void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR); + __raw_writel(2, base + GPIO2_ENABLE); /* reset, clock disabled */ + wmb(); } /**********************************************************************/ @@ -556,6 +584,16 @@ static inline void gpio_set_value(int gpio, int v) alchemy_gpio_set_value(gpio, v); } +static inline int gpio_get_value_cansleep(unsigned gpio) +{ + return gpio_get_value(gpio); +} + +static inline void gpio_set_value_cansleep(unsigned gpio, int value) +{ + gpio_set_value(gpio, value); +} + static inline int gpio_is_valid(int gpio) { return alchemy_gpio_is_valid(gpio); @@ -581,10 +619,50 @@ static inline int gpio_request(unsigned gpio, const char *label) return 0; } +static inline int gpio_request_one(unsigned gpio, + unsigned long flags, const char *label) +{ + return 0; +} + +static inline int gpio_request_array(struct gpio *array, size_t num) +{ + return 0; +} + static inline void gpio_free(unsigned gpio) { } +static inline void gpio_free_array(struct gpio *array, size_t num) +{ +} + +static inline int gpio_set_debounce(unsigned gpio, unsigned debounce) +{ + return -ENOSYS; +} + +static inline int gpio_export(unsigned gpio, bool direction_may_change) +{ + return -ENOSYS; +} + +static inline int gpio_export_link(struct device *dev, const char *name, + unsigned gpio) +{ + return -ENOSYS; +} + +static inline int gpio_sysfs_set_active_low(unsigned gpio, int value) +{ + return -ENOSYS; +} + +static inline void gpio_unexport(unsigned gpio) +{ +} + #endif /* !CONFIG_ALCHEMY_GPIO_INDIRECT */ diff --git a/arch/mips/include/asm/mach-bcm47xx/nvram.h b/arch/mips/include/asm/mach-bcm47xx/nvram.h index 9759588ba3cf..184d5ecb5f51 100644 --- a/arch/mips/include/asm/mach-bcm47xx/nvram.h +++ b/arch/mips/include/asm/mach-bcm47xx/nvram.h @@ -39,8 +39,16 @@ extern int nvram_getenv(char *name, char *val, size_t val_len); static inline void nvram_parse_macaddr(char *buf, u8 *macaddr) { - sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0], &macaddr[1], - &macaddr[2], &macaddr[3], &macaddr[4], &macaddr[5]); + if (strchr(buf, ':')) + sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0], + &macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4], + &macaddr[5]); + else if (strchr(buf, '-')) + sscanf(buf, "%hhx-%hhx-%hhx-%hhx-%hhx-%hhx", &macaddr[0], + &macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4], + &macaddr[5]); + else + printk(KERN_WARNING "Can not parse mac address: %s\n", buf); } #endif diff --git a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h index 0b2b5eb22e9b..dedef7d2b01f 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h @@ -63,6 +63,11 @@ # CN30XX Disable instruction prefetching or v0, v0, 0x2000 skip: + # First clear off CvmCtl[IPPCI] bit and move the performance + # counters interrupt to IRQ 6 + li v1, ~(7 << 7) + and v0, v0, v1 + ori v0, v0, (6 << 7) # Write the cavium control register dmtc0 v0, CP0_CVMCTL_REG sync diff --git a/arch/mips/include/asm/mach-lantiq/lantiq.h b/arch/mips/include/asm/mach-lantiq/lantiq.h new file mode 100644 index 000000000000..ce2f02929d22 --- /dev/null +++ b/arch/mips/include/asm/mach-lantiq/lantiq.h @@ -0,0 +1,63 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ +#ifndef _LANTIQ_H__ +#define _LANTIQ_H__ + +#include <linux/irq.h> + +/* generic reg access functions */ +#define ltq_r32(reg) __raw_readl(reg) +#define ltq_w32(val, reg) __raw_writel(val, reg) +#define ltq_w32_mask(clear, set, reg) \ + ltq_w32((ltq_r32(reg) & ~(clear)) | (set), reg) +#define ltq_r8(reg) __raw_readb(reg) +#define ltq_w8(val, reg) __raw_writeb(val, reg) + +/* register access macros for EBU and CGU */ +#define ltq_ebu_w32(x, y) ltq_w32((x), ltq_ebu_membase + (y)) +#define ltq_ebu_r32(x) ltq_r32(ltq_ebu_membase + (x)) +#define ltq_cgu_w32(x, y) ltq_w32((x), ltq_cgu_membase + (y)) +#define ltq_cgu_r32(x) ltq_r32(ltq_cgu_membase + (x)) + +extern __iomem void *ltq_ebu_membase; +extern __iomem void *ltq_cgu_membase; + +extern unsigned int ltq_get_cpu_ver(void); +extern unsigned int ltq_get_soc_type(void); + +/* clock speeds */ +#define CLOCK_60M 60000000 +#define CLOCK_83M 83333333 +#define CLOCK_111M 111111111 +#define CLOCK_133M 133333333 +#define CLOCK_167M 166666667 +#define CLOCK_200M 200000000 +#define CLOCK_266M 266666666 +#define CLOCK_333M 333333333 +#define CLOCK_400M 400000000 + +/* spinlock all ebu i/o */ +extern spinlock_t ebu_lock; + +/* some irq helpers */ +extern void ltq_disable_irq(struct irq_data *data); +extern void ltq_mask_and_ack_irq(struct irq_data *data); +extern void ltq_enable_irq(struct irq_data *data); + +/* find out what caused the last cpu reset */ +extern int ltq_reset_cause(void); +#define LTQ_RST_CAUSE_WDTRST 0x20 + +#define IOPORT_RESOURCE_START 0x10000000 +#define IOPORT_RESOURCE_END 0xffffffff +#define IOMEM_RESOURCE_START 0x10000000 +#define IOMEM_RESOURCE_END 0xffffffff +#define LTQ_FLASH_START 0x10000000 +#define LTQ_FLASH_MAX 0x04000000 + +#endif diff --git a/arch/mips/include/asm/mach-lantiq/lantiq_platform.h b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h new file mode 100644 index 000000000000..a305f1d0259e --- /dev/null +++ b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h @@ -0,0 +1,53 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LANTIQ_PLATFORM_H__ +#define _LANTIQ_PLATFORM_H__ + +#include <linux/mtd/partitions.h> +#include <linux/socket.h> + +/* struct used to pass info to the pci core */ +enum { + PCI_CLOCK_INT = 0, + PCI_CLOCK_EXT +}; + +#define PCI_EXIN0 0x0001 +#define PCI_EXIN1 0x0002 +#define PCI_EXIN2 0x0004 +#define PCI_EXIN3 0x0008 +#define PCI_EXIN4 0x0010 +#define PCI_EXIN5 0x0020 +#define PCI_EXIN_MAX 6 + +#define PCI_GNT1 0x0040 +#define PCI_GNT2 0x0080 +#define PCI_GNT3 0x0100 +#define PCI_GNT4 0x0200 + +#define PCI_REQ1 0x0400 +#define PCI_REQ2 0x0800 +#define PCI_REQ3 0x1000 +#define PCI_REQ4 0x2000 +#define PCI_REQ_SHIFT 10 +#define PCI_REQ_MASK 0xf + +struct ltq_pci_data { + int clock; + int gpio; + int irq[16]; +}; + +/* struct used to pass info to network drivers */ +struct ltq_eth_data { + struct sockaddr mac; + int mii_mode; +}; + +#endif diff --git a/arch/mips/include/asm/mach-lantiq/war.h b/arch/mips/include/asm/mach-lantiq/war.h new file mode 100644 index 000000000000..01b08ef368d1 --- /dev/null +++ b/arch/mips/include/asm/mach-lantiq/war.h @@ -0,0 +1,24 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + */ +#ifndef __ASM_MIPS_MACH_LANTIQ_WAR_H +#define __ASM_MIPS_MACH_LANTIQ_WAR_H + +#define R4600_V1_INDEX_ICACHEOP_WAR 0 +#define R4600_V1_HIT_CACHEOP_WAR 0 +#define R4600_V2_HIT_CACHEOP_WAR 0 +#define R5432_CP0_INTERRUPT_WAR 0 +#define BCM1250_M3_WAR 0 +#define SIBYTE_1956_WAR 0 +#define MIPS4K_ICACHE_REFILL_WAR 0 +#define MIPS_CACHE_SYNC_WAR 0 +#define TX49XX_ICACHE_INDEX_INV_WAR 0 +#define RM9000_CDEX_SMP_WAR 0 +#define ICACHE_REFILLS_WORKAROUND_WAR 0 +#define R10000_LLSC_WAR 0 +#define MIPS34K_MISSED_ITLB_WAR 0 + +#endif diff --git a/arch/mips/include/asm/mach-lantiq/xway/irq.h b/arch/mips/include/asm/mach-lantiq/xway/irq.h new file mode 100644 index 000000000000..a1471d2dd0d2 --- /dev/null +++ b/arch/mips/include/asm/mach-lantiq/xway/irq.h @@ -0,0 +1,18 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef __LANTIQ_IRQ_H +#define __LANTIQ_IRQ_H + +#include <lantiq_irq.h> + +#define NR_IRQS 256 + +#include_next <irq.h> + +#endif diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h new file mode 100644 index 000000000000..b4465a888e20 --- /dev/null +++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h @@ -0,0 +1,66 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LANTIQ_XWAY_IRQ_H__ +#define _LANTIQ_XWAY_IRQ_H__ + +#define INT_NUM_IRQ0 8 +#define INT_NUM_IM0_IRL0 (INT_NUM_IRQ0 + 0) +#define INT_NUM_IM1_IRL0 (INT_NUM_IRQ0 + 32) +#define INT_NUM_IM2_IRL0 (INT_NUM_IRQ0 + 64) +#define INT_NUM_IM3_IRL0 (INT_NUM_IRQ0 + 96) +#define INT_NUM_IM4_IRL0 (INT_NUM_IRQ0 + 128) +#define INT_NUM_IM_OFFSET (INT_NUM_IM1_IRL0 - INT_NUM_IM0_IRL0) + +#define LTQ_ASC_TIR(x) (INT_NUM_IM3_IRL0 + (x * 8)) +#define LTQ_ASC_RIR(x) (INT_NUM_IM3_IRL0 + (x * 8) + 1) +#define LTQ_ASC_EIR(x) (INT_NUM_IM3_IRL0 + (x * 8) + 2) + +#define LTQ_ASC_ASE_TIR INT_NUM_IM2_IRL0 +#define LTQ_ASC_ASE_RIR (INT_NUM_IM2_IRL0 + 2) +#define LTQ_ASC_ASE_EIR (INT_NUM_IM2_IRL0 + 3) + +#define LTQ_SSC_TIR (INT_NUM_IM0_IRL0 + 15) +#define LTQ_SSC_RIR (INT_NUM_IM0_IRL0 + 14) +#define LTQ_SSC_EIR (INT_NUM_IM0_IRL0 + 16) + +#define LTQ_MEI_DYING_GASP_INT (INT_NUM_IM1_IRL0 + 21) +#define LTQ_MEI_INT (INT_NUM_IM1_IRL0 + 23) + +#define LTQ_TIMER6_INT (INT_NUM_IM1_IRL0 + 23) +#define LTQ_USB_INT (INT_NUM_IM1_IRL0 + 22) +#define LTQ_USB_OC_INT (INT_NUM_IM4_IRL0 + 23) + +#define MIPS_CPU_TIMER_IRQ 7 + +#define LTQ_DMA_CH0_INT (INT_NUM_IM2_IRL0) +#define LTQ_DMA_CH1_INT (INT_NUM_IM2_IRL0 + 1) +#define LTQ_DMA_CH2_INT (INT_NUM_IM2_IRL0 + 2) +#define LTQ_DMA_CH3_INT (INT_NUM_IM2_IRL0 + 3) +#define LTQ_DMA_CH4_INT (INT_NUM_IM2_IRL0 + 4) +#define LTQ_DMA_CH5_INT (INT_NUM_IM2_IRL0 + 5) +#define LTQ_DMA_CH6_INT (INT_NUM_IM2_IRL0 + 6) +#define LTQ_DMA_CH7_INT (INT_NUM_IM2_IRL0 + 7) +#define LTQ_DMA_CH8_INT (INT_NUM_IM2_IRL0 + 8) +#define LTQ_DMA_CH9_INT (INT_NUM_IM2_IRL0 + 9) +#define LTQ_DMA_CH10_INT (INT_NUM_IM2_IRL0 + 10) +#define LTQ_DMA_CH11_INT (INT_NUM_IM2_IRL0 + 11) +#define LTQ_DMA_CH12_INT (INT_NUM_IM2_IRL0 + 25) +#define LTQ_DMA_CH13_INT (INT_NUM_IM2_IRL0 + 26) +#define LTQ_DMA_CH14_INT (INT_NUM_IM2_IRL0 + 27) +#define LTQ_DMA_CH15_INT (INT_NUM_IM2_IRL0 + 28) +#define LTQ_DMA_CH16_INT (INT_NUM_IM2_IRL0 + 29) +#define LTQ_DMA_CH17_INT (INT_NUM_IM2_IRL0 + 30) +#define LTQ_DMA_CH18_INT (INT_NUM_IM2_IRL0 + 16) +#define LTQ_DMA_CH19_INT (INT_NUM_IM2_IRL0 + 21) + +#define LTQ_PPE_MBOX_INT (INT_NUM_IM2_IRL0 + 24) + +#define INT_NUM_IM4_IRL14 (INT_NUM_IM4_IRL0 + 14) + +#endif diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h new file mode 100644 index 000000000000..8a3c6be669d2 --- /dev/null +++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h @@ -0,0 +1,141 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LTQ_XWAY_H__ +#define _LTQ_XWAY_H__ + +#ifdef CONFIG_SOC_TYPE_XWAY + +#include <lantiq.h> + +/* Chip IDs */ +#define SOC_ID_DANUBE1 0x129 +#define SOC_ID_DANUBE2 0x12B +#define SOC_ID_TWINPASS 0x12D +#define SOC_ID_AMAZON_SE 0x152 +#define SOC_ID_ARX188 0x16C +#define SOC_ID_ARX168 0x16D +#define SOC_ID_ARX182 0x16F + +/* SoC Types */ +#define SOC_TYPE_DANUBE 0x01 +#define SOC_TYPE_TWINPASS 0x02 +#define SOC_TYPE_AR9 0x03 +#define SOC_TYPE_VR9 0x04 +#define SOC_TYPE_AMAZON_SE 0x05 + +/* ASC0/1 - serial port */ +#define LTQ_ASC0_BASE_ADDR 0x1E100400 +#define LTQ_ASC1_BASE_ADDR 0x1E100C00 +#define LTQ_ASC_SIZE 0x400 + +/* RCU - reset control unit */ +#define LTQ_RCU_BASE_ADDR 0x1F203000 +#define LTQ_RCU_SIZE 0x1000 + +/* GPTU - general purpose timer unit */ +#define LTQ_GPTU_BASE_ADDR 0x18000300 +#define LTQ_GPTU_SIZE 0x100 + +/* EBU - external bus unit */ +#define LTQ_EBU_GPIO_START 0x14000000 +#define LTQ_EBU_GPIO_SIZE 0x1000 + +#define LTQ_EBU_BASE_ADDR 0x1E105300 +#define LTQ_EBU_SIZE 0x100 + +#define LTQ_EBU_BUSCON0 0x0060 +#define LTQ_EBU_PCC_CON 0x0090 +#define LTQ_EBU_PCC_IEN 0x00A4 +#define LTQ_EBU_PCC_ISTAT 0x00A0 +#define LTQ_EBU_BUSCON1 0x0064 +#define LTQ_EBU_ADDRSEL1 0x0024 +#define EBU_WRDIS 0x80000000 + +/* CGU - clock generation unit */ +#define LTQ_CGU_BASE_ADDR 0x1F103000 +#define LTQ_CGU_SIZE 0x1000 + +/* ICU - interrupt control unit */ +#define LTQ_ICU_BASE_ADDR 0x1F880200 +#define LTQ_ICU_SIZE 0x100 + +/* EIU - external interrupt unit */ +#define LTQ_EIU_BASE_ADDR 0x1F101000 +#define LTQ_EIU_SIZE 0x1000 + +/* PMU - power management unit */ +#define LTQ_PMU_BASE_ADDR 0x1F102000 +#define LTQ_PMU_SIZE 0x1000 + +#define PMU_DMA 0x0020 +#define PMU_USB 0x8041 +#define PMU_LED 0x0800 +#define PMU_GPT 0x1000 +#define PMU_PPE 0x2000 +#define PMU_FPI 0x4000 +#define PMU_SWITCH 0x10000000 + +/* ETOP - ethernet */ +#define LTQ_ETOP_BASE_ADDR 0x1E180000 +#define LTQ_ETOP_SIZE 0x40000 + +/* DMA */ +#define LTQ_DMA_BASE_ADDR 0x1E104100 +#define LTQ_DMA_SIZE 0x800 + +/* PCI */ +#define PCI_CR_BASE_ADDR 0x1E105400 +#define PCI_CR_SIZE 0x400 + +/* WDT */ +#define LTQ_WDT_BASE_ADDR 0x1F8803F0 +#define LTQ_WDT_SIZE 0x10 + +/* STP - serial to parallel conversion unit */ +#define LTQ_STP_BASE_ADDR 0x1E100BB0 +#define LTQ_STP_SIZE 0x40 + +/* GPIO */ +#define LTQ_GPIO0_BASE_ADDR 0x1E100B10 +#define LTQ_GPIO1_BASE_ADDR 0x1E100B40 +#define LTQ_GPIO2_BASE_ADDR 0x1E100B70 +#define LTQ_GPIO_SIZE 0x30 + +/* SSC */ +#define LTQ_SSC_BASE_ADDR 0x1e100800 +#define LTQ_SSC_SIZE 0x100 + +/* MEI - dsl core */ +#define LTQ_MEI_BASE_ADDR 0x1E116000 + +/* DEU - data encryption unit */ +#define LTQ_DEU_BASE_ADDR 0x1E103100 + +/* MPS - multi processor unit (voice) */ +#define LTQ_MPS_BASE_ADDR (KSEG1 + 0x1F107000) +#define LTQ_MPS_CHIPID ((u32 *)(LTQ_MPS_BASE_ADDR + 0x0344)) + +/* request a non-gpio and set the PIO config */ +extern int ltq_gpio_request(unsigned int pin, unsigned int alt0, + unsigned int alt1, unsigned int dir, const char *name); +extern void ltq_pmu_enable(unsigned int module); +extern void ltq_pmu_disable(unsigned int module); + +static inline int ltq_is_ar9(void) +{ + return (ltq_get_soc_type() == SOC_TYPE_AR9); +} + +static inline int ltq_is_vr9(void) +{ + return (ltq_get_soc_type() == SOC_TYPE_VR9); +} + +#endif /* CONFIG_SOC_TYPE_XWAY */ +#endif /* _LTQ_XWAY_H__ */ diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h new file mode 100644 index 000000000000..872943a4b90e --- /dev/null +++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h @@ -0,0 +1,60 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2011 John Crispin <blogic@openwrt.org> + */ + +#ifndef LTQ_DMA_H__ +#define LTQ_DMA_H__ + +#define LTQ_DESC_SIZE 0x08 /* each descriptor is 64bit */ +#define LTQ_DESC_NUM 0x40 /* 64 descriptors / channel */ + +#define LTQ_DMA_OWN BIT(31) /* owner bit */ +#define LTQ_DMA_C BIT(30) /* complete bit */ +#define LTQ_DMA_SOP BIT(29) /* start of packet */ +#define LTQ_DMA_EOP BIT(28) /* end of packet */ +#define LTQ_DMA_TX_OFFSET(x) ((x & 0x1f) << 23) /* data bytes offset */ +#define LTQ_DMA_RX_OFFSET(x) ((x & 0x7) << 23) /* data bytes offset */ +#define LTQ_DMA_SIZE_MASK (0xffff) /* the size field is 16 bit */ + +struct ltq_dma_desc { + u32 ctl; + u32 addr; +}; + +struct ltq_dma_channel { + int nr; /* the channel number */ + int irq; /* the mapped irq */ + int desc; /* the current descriptor */ + struct ltq_dma_desc *desc_base; /* the descriptor base */ + int phys; /* physical addr */ +}; + +enum { + DMA_PORT_ETOP = 0, + DMA_PORT_DEU, +}; + +extern void ltq_dma_enable_irq(struct ltq_dma_channel *ch); +extern void ltq_dma_disable_irq(struct ltq_dma_channel *ch); +extern void ltq_dma_ack_irq(struct ltq_dma_channel *ch); +extern void ltq_dma_open(struct ltq_dma_channel *ch); +extern void ltq_dma_close(struct ltq_dma_channel *ch); +extern void ltq_dma_alloc_tx(struct ltq_dma_channel *ch); +extern void ltq_dma_alloc_rx(struct ltq_dma_channel *ch); +extern void ltq_dma_free(struct ltq_dma_channel *ch); +extern void ltq_dma_init_port(int p); + +#endif diff --git a/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h new file mode 100644 index 000000000000..3b728275b9b0 --- /dev/null +++ b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h @@ -0,0 +1,47 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2011 Netlogic Microsystems + * Copyright (C) 2003 Ralf Baechle + */ +#ifndef __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H +#define __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H + +#define cpu_has_4kex 1 +#define cpu_has_4k_cache 1 +#define cpu_has_watch 1 +#define cpu_has_mips16 0 +#define cpu_has_counter 1 +#define cpu_has_divec 1 +#define cpu_has_vce 0 +#define cpu_has_cache_cdex_p 0 +#define cpu_has_cache_cdex_s 0 +#define cpu_has_prefetch 1 +#define cpu_has_mcheck 1 +#define cpu_has_ejtag 1 + +#define cpu_has_llsc 1 +#define cpu_has_vtag_icache 0 +#define cpu_has_dc_aliases 0 +#define cpu_has_ic_fills_f_dc 0 +#define cpu_has_dsp 0 +#define cpu_has_mipsmt 0 +#define cpu_has_userlocal 0 +#define cpu_icache_snoops_remote_store 0 + +#define cpu_has_nofpuex 0 +#define cpu_has_64bits 1 + +#define cpu_has_mips32r1 1 +#define cpu_has_mips32r2 0 +#define cpu_has_mips64r1 1 +#define cpu_has_mips64r2 0 + +#define cpu_has_inclusive_pcaches 0 + +#define cpu_dcache_line_size() 32 +#define cpu_icache_line_size() 32 + +#endif /* __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-netlogic/irq.h b/arch/mips/include/asm/mach-netlogic/irq.h new file mode 100644 index 000000000000..b5902458e7c1 --- /dev/null +++ b/arch/mips/include/asm/mach-netlogic/irq.h @@ -0,0 +1,14 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2011 Netlogic Microsystems. + */ +#ifndef __ASM_NETLOGIC_IRQ_H +#define __ASM_NETLOGIC_IRQ_H + +#define NR_IRQS 64 +#define MIPS_CPU_IRQ_BASE 0 + +#endif /* __ASM_NETLOGIC_IRQ_H */ diff --git a/arch/mips/include/asm/mach-netlogic/war.h b/arch/mips/include/asm/mach-netlogic/war.h new file mode 100644 index 000000000000..22da89327352 --- /dev/null +++ b/arch/mips/include/asm/mach-netlogic/war.h @@ -0,0 +1,26 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2011 Netlogic Microsystems. + * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org> + */ +#ifndef __ASM_MIPS_MACH_NLM_WAR_H +#define __ASM_MIPS_MACH_NLM_WAR_H + +#define R4600_V1_INDEX_ICACHEOP_WAR 0 +#define R4600_V1_HIT_CACHEOP_WAR 0 +#define R4600_V2_HIT_CACHEOP_WAR 0 +#define R5432_CP0_INTERRUPT_WAR 0 +#define BCM1250_M3_WAR 0 +#define SIBYTE_1956_WAR 0 +#define MIPS4K_ICACHE_REFILL_WAR 0 +#define MIPS_CACHE_SYNC_WAR 0 +#define TX49XX_ICACHE_INDEX_INV_WAR 0 +#define RM9000_CDEX_SMP_WAR 0 +#define ICACHE_REFILLS_WORKAROUND_WAR 0 +#define R10000_LLSC_WAR 0 +#define MIPS34K_MISSED_ITLB_WAR 0 + +#endif /* __ASM_MIPS_MACH_NLM_WAR_H */ diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h index d94085a3eafb..bc01a02cacd8 100644 --- a/arch/mips/include/asm/module.h +++ b/arch/mips/include/asm/module.h @@ -118,6 +118,8 @@ search_module_dbetables(unsigned long addr) #define MODULE_PROC_FAMILY "LOONGSON2 " #elif defined CONFIG_CPU_CAVIUM_OCTEON #define MODULE_PROC_FAMILY "OCTEON " +#elif defined CONFIG_CPU_XLR +#define MODULE_PROC_FAMILY "XLR " #else #error MODULE_PROC_FAMILY undefined for your processor configuration #endif diff --git a/arch/mips/include/asm/netlogic/interrupt.h b/arch/mips/include/asm/netlogic/interrupt.h new file mode 100644 index 000000000000..a85aadb6cfd7 --- /dev/null +++ b/arch/mips/include/asm/netlogic/interrupt.h @@ -0,0 +1,45 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ASM_NLM_INTERRUPT_H +#define _ASM_NLM_INTERRUPT_H + +/* Defines for the IRQ numbers */ + +#define IRQ_IPI_SMP_FUNCTION 3 +#define IRQ_IPI_SMP_RESCHEDULE 4 +#define IRQ_MSGRING 6 +#define IRQ_TIMER 7 + +#endif diff --git a/arch/mips/include/asm/netlogic/mips-extns.h b/arch/mips/include/asm/netlogic/mips-extns.h new file mode 100644 index 000000000000..8c53d0ba4bf2 --- /dev/null +++ b/arch/mips/include/asm/netlogic/mips-extns.h @@ -0,0 +1,76 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ASM_NLM_MIPS_EXTS_H +#define _ASM_NLM_MIPS_EXTS_H + +/* + * XLR and XLP interrupt request and interrupt mask registers + */ +#define read_c0_eirr() __read_64bit_c0_register($9, 6) +#define read_c0_eimr() __read_64bit_c0_register($9, 7) +#define write_c0_eirr(val) __write_64bit_c0_register($9, 6, val) + +/* + * Writing EIMR in 32 bit is a special case, the lower 8 bit of the + * EIMR is shadowed in the status register, so we cannot save and + * restore status register for split read. + */ +#define write_c0_eimr(val) \ +do { \ + if (sizeof(unsigned long) == 4) { \ + unsigned long __flags; \ + \ + local_irq_save(__flags); \ + __asm__ __volatile__( \ + ".set\tmips64\n\t" \ + "dsll\t%L0, %L0, 32\n\t" \ + "dsrl\t%L0, %L0, 32\n\t" \ + "dsll\t%M0, %M0, 32\n\t" \ + "or\t%L0, %L0, %M0\n\t" \ + "dmtc0\t%L0, $9, 7\n\t" \ + ".set\tmips0" \ + : : "r" (val)); \ + __flags = (__flags & 0xffff00ff) | (((val) & 0xff) << 8);\ + local_irq_restore(__flags); \ + } else \ + __write_64bit_c0_register($9, 7, (val)); \ +} while (0) + +static inline int hard_smp_processor_id(void) +{ + return __read_32bit_c0_register($15, 1) & 0x3ff; +} + +#endif /*_ASM_NLM_MIPS_EXTS_H */ diff --git a/arch/mips/include/asm/netlogic/psb-bootinfo.h b/arch/mips/include/asm/netlogic/psb-bootinfo.h new file mode 100644 index 000000000000..6878307f0ee6 --- /dev/null +++ b/arch/mips/include/asm/netlogic/psb-bootinfo.h @@ -0,0 +1,109 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ASM_NETLOGIC_BOOTINFO_H +#define _ASM_NETLOGIC_BOOTINFO_H + +struct psb_info { + uint64_t boot_level; + uint64_t io_base; + uint64_t output_device; + uint64_t uart_print; + uint64_t led_output; + uint64_t init; + uint64_t exit; + uint64_t warm_reset; + uint64_t wakeup; + uint64_t online_cpu_map; + uint64_t master_reentry_sp; + uint64_t master_reentry_gp; + uint64_t master_reentry_fn; + uint64_t slave_reentry_fn; + uint64_t magic_dword; + uint64_t uart_putchar; + uint64_t size; + uint64_t uart_getchar; + uint64_t nmi_handler; + uint64_t psb_version; + uint64_t mac_addr; + uint64_t cpu_frequency; + uint64_t board_version; + uint64_t malloc; + uint64_t free; + uint64_t global_shmem_addr; + uint64_t global_shmem_size; + uint64_t psb_os_cpu_map; + uint64_t userapp_cpu_map; + uint64_t wakeup_os; + uint64_t psb_mem_map; + uint64_t board_major_version; + uint64_t board_minor_version; + uint64_t board_manf_revision; + uint64_t board_serial_number; + uint64_t psb_physaddr_map; + uint64_t xlr_loaderip_config; + uint64_t bldr_envp; + uint64_t avail_mem_map; +}; + +enum { + NETLOGIC_IO_SPACE = 0x10, + PCIX_IO_SPACE, + PCIX_CFG_SPACE, + PCIX_MEMORY_SPACE, + HT_IO_SPACE, + HT_CFG_SPACE, + HT_MEMORY_SPACE, + SRAM_SPACE, + FLASH_CONTROLLER_SPACE +}; + +#define NLM_MAX_ARGS 64 +#define NLM_MAX_ENVS 32 + +/* This is what netlboot passes and linux boot_mem_map is subtly different */ +#define NLM_BOOT_MEM_MAP_MAX 32 +struct nlm_boot_mem_map { + int nr_map; + struct nlm_boot_mem_map_entry { + uint64_t addr; /* start of memory segment */ + uint64_t size; /* size of memory segment */ + uint32_t type; /* type of memory segment */ + } map[NLM_BOOT_MEM_MAP_MAX]; +}; + +/* Pointer to saved boot loader info */ +extern struct psb_info nlm_prom_info; + +#endif diff --git a/arch/mips/include/asm/netlogic/xlr/gpio.h b/arch/mips/include/asm/netlogic/xlr/gpio.h new file mode 100644 index 000000000000..51f6ad4aeb14 --- /dev/null +++ b/arch/mips/include/asm/netlogic/xlr/gpio.h @@ -0,0 +1,73 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ASM_NLM_GPIO_H +#define _ASM_NLM_GPIO_H + +#define NETLOGIC_GPIO_INT_EN_REG 0 +#define NETLOGIC_GPIO_INPUT_INVERSION_REG 1 +#define NETLOGIC_GPIO_IO_DIR_REG 2 +#define NETLOGIC_GPIO_IO_DATA_WR_REG 3 +#define NETLOGIC_GPIO_IO_DATA_RD_REG 4 + +#define NETLOGIC_GPIO_SWRESET_REG 8 +#define NETLOGIC_GPIO_DRAM1_CNTRL_REG 9 +#define NETLOGIC_GPIO_DRAM1_RATIO_REG 10 +#define NETLOGIC_GPIO_DRAM1_RESET_REG 11 +#define NETLOGIC_GPIO_DRAM1_STATUS_REG 12 +#define NETLOGIC_GPIO_DRAM2_CNTRL_REG 13 +#define NETLOGIC_GPIO_DRAM2_RATIO_REG 14 +#define NETLOGIC_GPIO_DRAM2_RESET_REG 15 +#define NETLOGIC_GPIO_DRAM2_STATUS_REG 16 + +#define NETLOGIC_GPIO_PWRON_RESET_CFG_REG 21 +#define NETLOGIC_GPIO_BIST_ALL_GO_STATUS_REG 24 +#define NETLOGIC_GPIO_BIST_CPU_GO_STATUS_REG 25 +#define NETLOGIC_GPIO_BIST_DEV_GO_STATUS_REG 26 + +#define NETLOGIC_GPIO_FUSE_BANK_REG 35 +#define NETLOGIC_GPIO_CPU_RESET_REG 40 +#define NETLOGIC_GPIO_RNG_REG 43 + +#define NETLOGIC_PWRON_RESET_PCMCIA_BOOT 17 +#define NETLOGIC_GPIO_LED_BITMAP 0x1700000 +#define NETLOGIC_GPIO_LED_0_SHIFT 20 +#define NETLOGIC_GPIO_LED_1_SHIFT 24 + +#define NETLOGIC_GPIO_LED_OUTPUT_CODE_RESET 0x01 +#define NETLOGIC_GPIO_LED_OUTPUT_CODE_HARD_RESET 0x02 +#define NETLOGIC_GPIO_LED_OUTPUT_CODE_SOFT_RESET 0x03 +#define NETLOGIC_GPIO_LED_OUTPUT_CODE_MAIN 0x04 + +#endif diff --git a/arch/mips/include/asm/netlogic/xlr/iomap.h b/arch/mips/include/asm/netlogic/xlr/iomap.h new file mode 100644 index 000000000000..2e3a4dd53045 --- /dev/null +++ b/arch/mips/include/asm/netlogic/xlr/iomap.h @@ -0,0 +1,131 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ASM_NLM_IOMAP_H +#define _ASM_NLM_IOMAP_H + +#define DEFAULT_NETLOGIC_IO_BASE CKSEG1ADDR(0x1ef00000) +#define NETLOGIC_IO_DDR2_CHN0_OFFSET 0x01000 +#define NETLOGIC_IO_DDR2_CHN1_OFFSET 0x02000 +#define NETLOGIC_IO_DDR2_CHN2_OFFSET 0x03000 +#define NETLOGIC_IO_DDR2_CHN3_OFFSET 0x04000 +#define NETLOGIC_IO_PIC_OFFSET 0x08000 +#define NETLOGIC_IO_UART_0_OFFSET 0x14000 +#define NETLOGIC_IO_UART_1_OFFSET 0x15100 + +#define NETLOGIC_IO_SIZE 0x1000 + +#define NETLOGIC_IO_BRIDGE_OFFSET 0x00000 + +#define NETLOGIC_IO_RLD2_CHN0_OFFSET 0x05000 +#define NETLOGIC_IO_RLD2_CHN1_OFFSET 0x06000 + +#define NETLOGIC_IO_SRAM_OFFSET 0x07000 + +#define NETLOGIC_IO_PCIX_OFFSET 0x09000 +#define NETLOGIC_IO_HT_OFFSET 0x0A000 + +#define NETLOGIC_IO_SECURITY_OFFSET 0x0B000 + +#define NETLOGIC_IO_GMAC_0_OFFSET 0x0C000 +#define NETLOGIC_IO_GMAC_1_OFFSET 0x0D000 +#define NETLOGIC_IO_GMAC_2_OFFSET 0x0E000 +#define NETLOGIC_IO_GMAC_3_OFFSET 0x0F000 + +/* XLS devices */ +#define NETLOGIC_IO_GMAC_4_OFFSET 0x20000 +#define NETLOGIC_IO_GMAC_5_OFFSET 0x21000 +#define NETLOGIC_IO_GMAC_6_OFFSET 0x22000 +#define NETLOGIC_IO_GMAC_7_OFFSET 0x23000 + +#define NETLOGIC_IO_PCIE_0_OFFSET 0x1E000 +#define NETLOGIC_IO_PCIE_1_OFFSET 0x1F000 +#define NETLOGIC_IO_SRIO_0_OFFSET 0x1E000 +#define NETLOGIC_IO_SRIO_1_OFFSET 0x1F000 + +#define NETLOGIC_IO_USB_0_OFFSET 0x24000 +#define NETLOGIC_IO_USB_1_OFFSET 0x25000 + +#define NETLOGIC_IO_COMP_OFFSET 0x1D000 +/* end XLS devices */ + +/* XLR devices */ +#define NETLOGIC_IO_SPI4_0_OFFSET 0x10000 +#define NETLOGIC_IO_XGMAC_0_OFFSET 0x11000 +#define NETLOGIC_IO_SPI4_1_OFFSET 0x12000 +#define NETLOGIC_IO_XGMAC_1_OFFSET 0x13000 +/* end XLR devices */ + +#define NETLOGIC_IO_I2C_0_OFFSET 0x16000 +#define NETLOGIC_IO_I2C_1_OFFSET 0x17000 + +#define NETLOGIC_IO_GPIO_OFFSET 0x18000 +#define NETLOGIC_IO_FLASH_OFFSET 0x19000 +#define NETLOGIC_IO_TB_OFFSET 0x1C000 + +#define NETLOGIC_CPLD_OFFSET KSEG1ADDR(0x1d840000) + +/* + * Base Address (Virtual) of the PCI Config address space + * For now, choose 256M phys in kseg1 = 0xA0000000 + (1<<28) + * Config space spans 256 (num of buses) * 256 (num functions) * 256 bytes + * ie 1<<24 = 16M + */ +#define DEFAULT_PCI_CONFIG_BASE 0x18000000 +#define DEFAULT_HT_TYPE0_CFG_BASE 0x16000000 +#define DEFAULT_HT_TYPE1_CFG_BASE 0x17000000 + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#include <asm/byteorder.h> + +typedef volatile __u32 nlm_reg_t; +extern unsigned long netlogic_io_base; + +/* FIXME read once in write_reg */ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define netlogic_read_reg(base, offset) ((base)[(offset)]) +#define netlogic_write_reg(base, offset, value) ((base)[(offset)] = (value)) +#else +#define netlogic_read_reg(base, offset) (be32_to_cpu((base)[(offset)])) +#define netlogic_write_reg(base, offset, value) \ + ((base)[(offset)] = cpu_to_be32((value))) +#endif + +#define netlogic_read_reg_le32(base, offset) (le32_to_cpu((base)[(offset)])) +#define netlogic_write_reg_le32(base, offset, value) \ + ((base)[(offset)] = cpu_to_le32((value))) +#define netlogic_io_mmio(offset) ((nlm_reg_t *)(netlogic_io_base+(offset))) +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/mips/include/asm/netlogic/xlr/pic.h b/arch/mips/include/asm/netlogic/xlr/pic.h new file mode 100644 index 000000000000..5cceb746f080 --- /dev/null +++ b/arch/mips/include/asm/netlogic/xlr/pic.h @@ -0,0 +1,231 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ASM_NLM_XLR_PIC_H +#define _ASM_NLM_XLR_PIC_H + +#define PIC_CLKS_PER_SEC 66666666ULL +/* PIC hardware interrupt numbers */ +#define PIC_IRT_WD_INDEX 0 +#define PIC_IRT_TIMER_0_INDEX 1 +#define PIC_IRT_TIMER_1_INDEX 2 +#define PIC_IRT_TIMER_2_INDEX 3 +#define PIC_IRT_TIMER_3_INDEX 4 +#define PIC_IRT_TIMER_4_INDEX 5 +#define PIC_IRT_TIMER_5_INDEX 6 +#define PIC_IRT_TIMER_6_INDEX 7 +#define PIC_IRT_TIMER_7_INDEX 8 +#define PIC_IRT_CLOCK_INDEX PIC_IRT_TIMER_7_INDEX +#define PIC_IRT_UART_0_INDEX 9 +#define PIC_IRT_UART_1_INDEX 10 +#define PIC_IRT_I2C_0_INDEX 11 +#define PIC_IRT_I2C_1_INDEX 12 +#define PIC_IRT_PCMCIA_INDEX 13 +#define PIC_IRT_GPIO_INDEX 14 +#define PIC_IRT_HYPER_INDEX 15 +#define PIC_IRT_PCIX_INDEX 16 +/* XLS */ +#define PIC_IRT_CDE_INDEX 15 +#define PIC_IRT_BRIDGE_TB_XLS_INDEX 16 +/* XLS */ +#define PIC_IRT_GMAC0_INDEX 17 +#define PIC_IRT_GMAC1_INDEX 18 +#define PIC_IRT_GMAC2_INDEX 19 +#define PIC_IRT_GMAC3_INDEX 20 +#define PIC_IRT_XGS0_INDEX 21 +#define PIC_IRT_XGS1_INDEX 22 +#define PIC_IRT_HYPER_FATAL_INDEX 23 +#define PIC_IRT_PCIX_FATAL_INDEX 24 +#define PIC_IRT_BRIDGE_AERR_INDEX 25 +#define PIC_IRT_BRIDGE_BERR_INDEX 26 +#define PIC_IRT_BRIDGE_TB_XLR_INDEX 27 +#define PIC_IRT_BRIDGE_AERR_NMI_INDEX 28 +/* XLS */ +#define PIC_IRT_GMAC4_INDEX 21 +#define PIC_IRT_GMAC5_INDEX 22 +#define PIC_IRT_GMAC6_INDEX 23 +#define PIC_IRT_GMAC7_INDEX 24 +#define PIC_IRT_BRIDGE_ERR_INDEX 25 +#define PIC_IRT_PCIE_LINK0_INDEX 26 +#define PIC_IRT_PCIE_LINK1_INDEX 27 +#define PIC_IRT_PCIE_LINK2_INDEX 23 +#define PIC_IRT_PCIE_LINK3_INDEX 24 +#define PIC_IRT_PCIE_XLSB0_LINK2_INDEX 28 +#define PIC_IRT_PCIE_XLSB0_LINK3_INDEX 29 +#define PIC_IRT_SRIO_LINK0_INDEX 26 +#define PIC_IRT_SRIO_LINK1_INDEX 27 +#define PIC_IRT_SRIO_LINK2_INDEX 28 +#define PIC_IRT_SRIO_LINK3_INDEX 29 +#define PIC_IRT_PCIE_INT_INDEX 28 +#define PIC_IRT_PCIE_FATAL_INDEX 29 +#define PIC_IRT_GPIO_B_INDEX 30 +#define PIC_IRT_USB_INDEX 31 +/* XLS */ +#define PIC_NUM_IRTS 32 + + +#define PIC_CLOCK_TIMER 7 + +/* PIC Registers */ +#define PIC_CTRL 0x00 +#define PIC_IPI 0x04 +#define PIC_INT_ACK 0x06 + +#define WD_MAX_VAL_0 0x08 +#define WD_MAX_VAL_1 0x09 +#define WD_MASK_0 0x0a +#define WD_MASK_1 0x0b +#define WD_HEARBEAT_0 0x0c +#define WD_HEARBEAT_1 0x0d + +#define PIC_IRT_0_BASE 0x40 +#define PIC_IRT_1_BASE 0x80 +#define PIC_TIMER_MAXVAL_0_BASE 0x100 +#define PIC_TIMER_MAXVAL_1_BASE 0x110 +#define PIC_TIMER_COUNT_0_BASE 0x120 +#define PIC_TIMER_COUNT_1_BASE 0x130 + +#define PIC_IRT_0(picintr) (PIC_IRT_0_BASE + (picintr)) +#define PIC_IRT_1(picintr) (PIC_IRT_1_BASE + (picintr)) + +#define PIC_TIMER_MAXVAL_0(i) (PIC_TIMER_MAXVAL_0_BASE + (i)) +#define PIC_TIMER_MAXVAL_1(i) (PIC_TIMER_MAXVAL_1_BASE + (i)) +#define PIC_TIMER_COUNT_0(i) (PIC_TIMER_COUNT_0_BASE + (i)) +#define PIC_TIMER_COUNT_1(i) (PIC_TIMER_COUNT_0_BASE + (i)) + +/* + * Mapping between hardware interrupt numbers and IRQs on CPU + * we use a simple scheme to map PIC interrupts 0-31 to IRQs + * 8-39. This leaves the IRQ 0-7 for cpu interrupts like + * count/compare and FMN + */ +#define PIC_IRQ_BASE 8 +#define PIC_INTR_TO_IRQ(i) (PIC_IRQ_BASE + (i)) +#define PIC_IRQ_TO_INTR(i) ((i) - PIC_IRQ_BASE) + +#define PIC_IRT_FIRST_IRQ PIC_IRQ_BASE +#define PIC_WD_IRQ PIC_INTR_TO_IRQ(PIC_IRT_WD_INDEX) +#define PIC_TIMER_0_IRQ PIC_INTR_TO_IRQ(PIC_IRT_TIMER_0_INDEX) +#define PIC_TIMER_1_IRQ PIC_INTR_TO_IRQ(PIC_IRT_TIMER_1_INDEX) +#define PIC_TIMER_2_IRQ PIC_INTR_TO_IRQ(PIC_IRT_TIMER_2_INDEX) +#define PIC_TIMER_3_IRQ PIC_INTR_TO_IRQ(PIC_IRT_TIMER_3_INDEX) +#define PIC_TIMER_4_IRQ PIC_INTR_TO_IRQ(PIC_IRT_TIMER_4_INDEX) +#define PIC_TIMER_5_IRQ PIC_INTR_TO_IRQ(PIC_IRT_TIMER_5_INDEX) +#define PIC_TIMER_6_IRQ PIC_INTR_TO_IRQ(PIC_IRT_TIMER_6_INDEX) +#define PIC_TIMER_7_IRQ PIC_INTR_TO_IRQ(PIC_IRT_TIMER_7_INDEX) +#define PIC_CLOCK_IRQ (PIC_TIMER_7_IRQ) +#define PIC_UART_0_IRQ PIC_INTR_TO_IRQ(PIC_IRT_UART_0_INDEX) +#define PIC_UART_1_IRQ PIC_INTR_TO_IRQ(PIC_IRT_UART_1_INDEX) +#define PIC_I2C_0_IRQ PIC_INTR_TO_IRQ(PIC_IRT_I2C_0_INDEX) +#define PIC_I2C_1_IRQ PIC_INTR_TO_IRQ(PIC_IRT_I2C_1_INDEX) +#define PIC_PCMCIA_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCMCIA_INDEX) +#define PIC_GPIO_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GPIO_INDEX) +#define PIC_HYPER_IRQ PIC_INTR_TO_IRQ(PIC_IRT_HYPER_INDEX) +#define PIC_PCIX_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIX_INDEX) +/* XLS */ +#define PIC_CDE_IRQ PIC_INTR_TO_IRQ(PIC_IRT_CDE_INDEX) +#define PIC_BRIDGE_TB_XLS_IRQ PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_TB_XLS_INDEX) +/* end XLS */ +#define PIC_GMAC_0_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GMAC0_INDEX) +#define PIC_GMAC_1_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GMAC1_INDEX) +#define PIC_GMAC_2_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GMAC2_INDEX) +#define PIC_GMAC_3_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GMAC3_INDEX) +#define PIC_XGS_0_IRQ PIC_INTR_TO_IRQ(PIC_IRT_XGS0_INDEX) +#define PIC_XGS_1_IRQ PIC_INTR_TO_IRQ(PIC_IRT_XGS1_INDEX) +#define PIC_HYPER_FATAL_IRQ PIC_INTR_TO_IRQ(PIC_IRT_HYPER_FATAL_INDEX) +#define PIC_PCIX_FATAL_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIX_FATAL_INDEX) +#define PIC_BRIDGE_AERR_IRQ PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_AERR_INDEX) +#define PIC_BRIDGE_BERR_IRQ PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_BERR_INDEX) +#define PIC_BRIDGE_TB_XLR_IRQ PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_TB_XLR_INDEX) +#define PIC_BRIDGE_AERR_NMI_IRQ PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_AERR_NMI_INDEX) +/* XLS defines */ +#define PIC_GMAC_4_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GMAC4_INDEX) +#define PIC_GMAC_5_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GMAC5_INDEX) +#define PIC_GMAC_6_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GMAC6_INDEX) +#define PIC_GMAC_7_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GMAC7_INDEX) +#define PIC_BRIDGE_ERR_IRQ PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_ERR_INDEX) +#define PIC_PCIE_LINK0_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK0_INDEX) +#define PIC_PCIE_LINK1_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK1_INDEX) +#define PIC_PCIE_LINK2_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK2_INDEX) +#define PIC_PCIE_LINK3_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK3_INDEX) +#define PIC_PCIE_XLSB0_LINK2_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_XLSB0_LINK2_INDEX) +#define PIC_PCIE_XLSB0_LINK3_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_XLSB0_LINK3_INDEX) +#define PIC_SRIO_LINK0_IRQ PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK0_INDEX) +#define PIC_SRIO_LINK1_IRQ PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK1_INDEX) +#define PIC_SRIO_LINK2_IRQ PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK2_INDEX) +#define PIC_SRIO_LINK3_IRQ PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK3_INDEX) +#define PIC_PCIE_INT_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_INT__INDEX) +#define PIC_PCIE_FATAL_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_FATAL_INDEX) +#define PIC_GPIO_B_IRQ PIC_INTR_TO_IRQ(PIC_IRT_GPIO_B_INDEX) +#define PIC_USB_IRQ PIC_INTR_TO_IRQ(PIC_IRT_USB_INDEX) +#define PIC_IRT_LAST_IRQ PIC_USB_IRQ +/* end XLS */ + +#ifndef __ASSEMBLY__ +static inline void pic_send_ipi(u32 ipi) +{ + nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET); + + netlogic_write_reg(mmio, PIC_IPI, ipi); +} + +static inline u32 pic_read_control(void) +{ + nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET); + + return netlogic_read_reg(mmio, PIC_CTRL); +} + +static inline void pic_write_control(u32 control) +{ + nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET); + + netlogic_write_reg(mmio, PIC_CTRL, control); +} + +static inline void pic_update_control(u32 control) +{ + nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET); + + netlogic_write_reg(mmio, PIC_CTRL, + (control | netlogic_read_reg(mmio, PIC_CTRL))); +} + +#define PIC_IRQ_IS_EDGE_TRIGGERED(irq) (((irq) >= PIC_TIMER_0_IRQ) && \ + ((irq) <= PIC_TIMER_7_IRQ)) +#define PIC_IRQ_IS_IRT(irq) (((irq) >= PIC_IRT_FIRST_IRQ) && \ + ((irq) <= PIC_IRT_LAST_IRQ)) +#endif + +#endif /* _ASM_NLM_XLR_PIC_H */ diff --git a/arch/mips/include/asm/netlogic/xlr/xlr.h b/arch/mips/include/asm/netlogic/xlr/xlr.h new file mode 100644 index 000000000000..3e6372692a04 --- /dev/null +++ b/arch/mips/include/asm/netlogic/xlr/xlr.h @@ -0,0 +1,75 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ASM_NLM_XLR_H +#define _ASM_NLM_XLR_H + +/* Platform UART functions */ +struct uart_port; +unsigned int nlm_xlr_uart_in(struct uart_port *, int); +void nlm_xlr_uart_out(struct uart_port *, int, int); + +/* SMP support functions */ +struct irq_desc; +void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc); +void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc); +int nlm_wakeup_secondary_cpus(u32 wakeup_mask); +void nlm_smp_irq_init(void); +void nlm_boot_smp_nmi(void); +void prom_pre_boot_secondary_cpus(void); + +extern struct plat_smp_ops nlm_smp_ops; +extern unsigned long nlm_common_ebase; + +/* XLS B silicon "Rook" */ +static inline unsigned int nlm_chip_is_xls_b(void) +{ + uint32_t prid = read_c0_prid(); + + return ((prid & 0xf000) == 0x4000); +} + +/* + * XLR chip types + */ + /* The XLS product line has chip versions 0x[48c]? */ +static inline unsigned int nlm_chip_is_xls(void) +{ + uint32_t prid = read_c0_prid(); + + return ((prid & 0xf000) == 0x8000 || (prid & 0xf000) == 0x4000 || + (prid & 0xf000) == 0xc000); +} + +#endif /* _ASM_NLM_XLR_H */ diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 9f1b8dba2c81..de39b1f343ea 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -141,7 +141,8 @@ extern int ptrace_set_watch_regs(struct task_struct *child, #define instruction_pointer(regs) ((regs)->cp0_epc) #define profile_pc(regs) instruction_pointer(regs) -extern asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit); +extern asmlinkage void syscall_trace_enter(struct pt_regs *regs); +extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET; diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index d71160de4d10..97f8bf6639e7 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -149,6 +149,9 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define _TIF_FPUBOUND (1<<TIF_FPUBOUND) #define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH) +/* work to do in syscall_trace_leave() */ +#define _TIF_WORK_SYSCALL_EXIT (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT) + /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK (0x0000ffef & \ ~(_TIF_SECCOMP | _TIF_SYSCALL_AUDIT)) diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h index c7f1bfef1574..bc14447e69b5 100644 --- a/arch/mips/include/asm/time.h +++ b/arch/mips/include/asm/time.h @@ -84,12 +84,6 @@ static inline int init_mips_clocksource(void) #endif } -static inline void clocksource_set_clock(struct clocksource *cs, - unsigned int clock) -{ - clocksource_calc_mult_shift(cs, clock, 4); -} - static inline void clockevent_set_clock(struct clock_event_device *cd, unsigned int clock) { diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index 6a9e14dab91e..d97cfbf882f5 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de> + * Copyright (C) 2011, Maarten ter Huurne <maarten@treewalker.org> * JZ4740 setup code * * This program is free software; you can redistribute it and/or modify it @@ -14,13 +15,44 @@ */ #include <linux/init.h> +#include <linux/io.h> #include <linux/kernel.h> +#include <asm/bootinfo.h> + +#include <asm/mach-jz4740/base.h> + #include "reset.h" + +#define JZ4740_EMC_SDRAM_CTRL 0x80 + + +static void __init jz4740_detect_mem(void) +{ + void __iomem *jz_emc_base; + u32 ctrl, bus, bank, rows, cols; + phys_t size; + + jz_emc_base = ioremap(JZ4740_EMC_BASE_ADDR, 0x100); + ctrl = readl(jz_emc_base + JZ4740_EMC_SDRAM_CTRL); + bus = 2 - ((ctrl >> 31) & 1); + bank = 1 + ((ctrl >> 19) & 1); + cols = 8 + ((ctrl >> 26) & 7); + rows = 11 + ((ctrl >> 20) & 3); + printk(KERN_DEBUG + "SDRAM preconfigured: bus:%u bank:%u rows:%u cols:%u\n", + bus, bank, rows, cols); + iounmap(jz_emc_base); + + size = 1 << (bus + bank + cols + rows); + add_memory_region(0, size, BOOT_MEM_RAM); +} + void __init plat_mem_setup(void) { jz4740_reset_init(); + jz4740_detect_mem(); } const char *get_system_type(void) diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c index eaa853a54af6..f83c2dd07a27 100644 --- a/arch/mips/jz4740/time.c +++ b/arch/mips/jz4740/time.c @@ -121,8 +121,7 @@ void __init plat_time_init(void) clockevents_register_device(&jz4740_clockevent); - clocksource_set_clock(&jz4740_clocksource, clk_rate); - ret = clocksource_register(&jz4740_clocksource); + ret = clocksource_register_hz(&jz4740_clocksource, clk_rate); if (ret) printk(KERN_ERR "Failed to register clocksource: %d\n", ret); diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index cedee2bcbd18..83bba332bbfc 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_CPU_TX39XX) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX49XX) += r4k_fpu.o r4k_switch.o obj-$(CONFIG_CPU_VR41XX) += r4k_fpu.o r4k_switch.o obj-$(CONFIG_CPU_CAVIUM_OCTEON) += octeon_switch.o +obj-$(CONFIG_CPU_XLR) += r4k_fpu.o r4k_switch.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP_UP) += smp-up.o diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c index 0b7377361e22..f0ab92a1b057 100644 --- a/arch/mips/kernel/cevt-txx9.c +++ b/arch/mips/kernel/cevt-txx9.c @@ -51,8 +51,7 @@ void __init txx9_clocksource_init(unsigned long baseaddr, { struct txx9_tmr_reg __iomem *tmrptr; - clocksource_set_clock(&txx9_clocksource.cs, TIMER_CLK(imbusclk)); - clocksource_register(&txx9_clocksource.cs); + clocksource_register_hz(&txx9_clocksource.cs, TIMER_CLK(imbusclk)); tmrptr = ioremap(baseaddr, sizeof(struct txx9_tmr_reg)); __raw_writel(TCR_BASE, &tmrptr->tcr); diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index f65d4c8c65a6..bb133d10b145 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -291,6 +291,12 @@ static inline int cpu_has_confreg(void) #endif } +static inline void set_elf_platform(int cpu, const char *plat) +{ + if (cpu == 0) + __elf_platform = plat; +} + /* * Get the FPU Implementation/Revision. */ @@ -614,6 +620,16 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) case PRID_IMP_LOONGSON2: c->cputype = CPU_LOONGSON2; __cpu_name[cpu] = "ICT Loongson-2"; + + switch (c->processor_id & PRID_REV_MASK) { + case PRID_REV_LOONGSON2E: + set_elf_platform(cpu, "loongson2e"); + break; + case PRID_REV_LOONGSON2F: + set_elf_platform(cpu, "loongson2f"); + break; + } + c->isa_level = MIPS_CPU_ISA_III; c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_LLSC | @@ -911,12 +927,14 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) case PRID_IMP_BMIPS32_REV8: c->cputype = CPU_BMIPS32; __cpu_name[cpu] = "Broadcom BMIPS32"; + set_elf_platform(cpu, "bmips32"); break; case PRID_IMP_BMIPS3300: case PRID_IMP_BMIPS3300_ALT: case PRID_IMP_BMIPS3300_BUG: c->cputype = CPU_BMIPS3300; __cpu_name[cpu] = "Broadcom BMIPS3300"; + set_elf_platform(cpu, "bmips3300"); break; case PRID_IMP_BMIPS43XX: { int rev = c->processor_id & 0xff; @@ -925,15 +943,18 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) rev <= PRID_REV_BMIPS4380_HI) { c->cputype = CPU_BMIPS4380; __cpu_name[cpu] = "Broadcom BMIPS4380"; + set_elf_platform(cpu, "bmips4380"); } else { c->cputype = CPU_BMIPS4350; __cpu_name[cpu] = "Broadcom BMIPS4350"; + set_elf_platform(cpu, "bmips4350"); } break; } case PRID_IMP_BMIPS5000: c->cputype = CPU_BMIPS5000; __cpu_name[cpu] = "Broadcom BMIPS5000"; + set_elf_platform(cpu, "bmips5000"); c->options |= MIPS_CPU_ULRI; break; } @@ -956,14 +977,12 @@ static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_CAVIUM_OCTEON_PLUS; __cpu_name[cpu] = "Cavium Octeon+"; platform: - if (cpu == 0) - __elf_platform = "octeon"; + set_elf_platform(cpu, "octeon"); break; case PRID_IMP_CAVIUM_CN63XX: c->cputype = CPU_CAVIUM_OCTEON2; __cpu_name[cpu] = "Cavium Octeon II"; - if (cpu == 0) - __elf_platform = "octeon2"; + set_elf_platform(cpu, "octeon2"); break; default: printk(KERN_INFO "Unknown Octeon chip!\n"); @@ -988,6 +1007,59 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) } } +static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) +{ + decode_configs(c); + + c->options = (MIPS_CPU_TLB | + MIPS_CPU_4KEX | + MIPS_CPU_COUNTER | + MIPS_CPU_DIVEC | + MIPS_CPU_WATCH | + MIPS_CPU_EJTAG | + MIPS_CPU_LLSC); + + switch (c->processor_id & 0xff00) { + case PRID_IMP_NETLOGIC_XLR732: + case PRID_IMP_NETLOGIC_XLR716: + case PRID_IMP_NETLOGIC_XLR532: + case PRID_IMP_NETLOGIC_XLR308: + case PRID_IMP_NETLOGIC_XLR532C: + case PRID_IMP_NETLOGIC_XLR516C: + case PRID_IMP_NETLOGIC_XLR508C: + case PRID_IMP_NETLOGIC_XLR308C: + c->cputype = CPU_XLR; + __cpu_name[cpu] = "Netlogic XLR"; + break; + + case PRID_IMP_NETLOGIC_XLS608: + case PRID_IMP_NETLOGIC_XLS408: + case PRID_IMP_NETLOGIC_XLS404: + case PRID_IMP_NETLOGIC_XLS208: + case PRID_IMP_NETLOGIC_XLS204: + case PRID_IMP_NETLOGIC_XLS108: + case PRID_IMP_NETLOGIC_XLS104: + case PRID_IMP_NETLOGIC_XLS616B: + case PRID_IMP_NETLOGIC_XLS608B: + case PRID_IMP_NETLOGIC_XLS416B: + case PRID_IMP_NETLOGIC_XLS412B: + case PRID_IMP_NETLOGIC_XLS408B: + case PRID_IMP_NETLOGIC_XLS404B: + c->cputype = CPU_XLR; + __cpu_name[cpu] = "Netlogic XLS"; + break; + + default: + printk(KERN_INFO "Unknown Netlogic chip id [%02x]!\n", + c->processor_id); + c->cputype = CPU_XLR; + break; + } + + c->isa_level = MIPS_CPU_ISA_M64R1; + c->tlbsize = ((read_c0_config1() >> 25) & 0x3f) + 1; +} + #ifdef CONFIG_64BIT /* For use by uaccess.h */ u64 __ua_limit; @@ -1035,6 +1107,9 @@ __cpuinit void cpu_probe(void) case PRID_COMP_INGENIC: cpu_probe_ingenic(c, cpu); break; + case PRID_COMP_NETLOGIC: + cpu_probe_netlogic(c, cpu); + break; } BUG_ON(!__cpu_name[cpu]); diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c index 51489f8a825e..f96f99c794a3 100644 --- a/arch/mips/kernel/csrc-bcm1480.c +++ b/arch/mips/kernel/csrc-bcm1480.c @@ -49,6 +49,5 @@ void __init sb1480_clocksource_init(void) plldiv = G_BCM1480_SYS_PLL_DIV(__raw_readq(IOADDR(A_SCD_SYSTEM_CFG))); zbbus = ((plldiv >> 1) * 50000000) + ((plldiv & 1) * 25000000); - clocksource_set_clock(cs, zbbus); - clocksource_register(cs); + clocksource_register_hz(cs, zbbus); } diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c index 23da108506b0..46bd7fa98d6c 100644 --- a/arch/mips/kernel/csrc-ioasic.c +++ b/arch/mips/kernel/csrc-ioasic.c @@ -59,7 +59,5 @@ void __init dec_ioasic_clocksource_init(void) printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq); clocksource_dec.rating = 200 + freq / 10000000; - clocksource_set_clock(&clocksource_dec, freq); - - clocksource_register(&clocksource_dec); + clocksource_register_hz(&clocksource_dec, freq); } diff --git a/arch/mips/kernel/csrc-powertv.c b/arch/mips/kernel/csrc-powertv.c index a27c16c8690e..2e7c5232da8d 100644 --- a/arch/mips/kernel/csrc-powertv.c +++ b/arch/mips/kernel/csrc-powertv.c @@ -78,9 +78,7 @@ static void __init powertv_c0_hpt_clocksource_init(void) clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000; - clocksource_set_clock(&clocksource_mips, mips_hpt_frequency); - - clocksource_register(&clocksource_mips); + clocksource_register_hz(&clocksource_mips, mips_hpt_frequency); } /** @@ -130,43 +128,16 @@ static struct clocksource clocksource_tim_c = { /** * powertv_tim_c_clocksource_init - set up a clock source for the TIM_C clock * - * The hard part here is coming up with a constant k and shift s such that - * the 48-bit TIM_C value multiplied by k doesn't overflow and that value, - * when shifted right by s, yields the corresponding number of nanoseconds. * We know that TIM_C counts at 27 MHz/8, so each cycle corresponds to - * 1 / (27,000,000/8) seconds. Multiply that by a billion and you get the - * number of nanoseconds. Since the TIM_C value has 48 bits and the math is - * done in 64 bits, avoiding an overflow means that k must be less than - * 64 - 48 = 16 bits. + * 1 / (27,000,000/8) seconds. */ static void __init powertv_tim_c_clocksource_init(void) { - int prescale; - unsigned long dividend; - unsigned long k; - int s; - const int max_k_bits = (64 - 48) - 1; - const unsigned long billion = 1000000000; const unsigned long counts_per_second = 27000000 / 8; - prescale = BITS_PER_LONG - ilog2(billion) - 1; - dividend = billion << prescale; - k = dividend / counts_per_second; - s = ilog2(k) - max_k_bits; - - if (s < 0) - s = prescale; - - else { - k >>= s; - s += prescale; - } - - clocksource_tim_c.mult = k; - clocksource_tim_c.shift = s; clocksource_tim_c.rating = 200; - clocksource_register(&clocksource_tim_c); + clocksource_register_hz(&clocksource_tim_c, counts_per_second); tim_c = (struct tim_c *) asic_reg_addr(tim_ch); } diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c index e95a3cd48eea..decd1fa38d55 100644 --- a/arch/mips/kernel/csrc-r4k.c +++ b/arch/mips/kernel/csrc-r4k.c @@ -30,9 +30,7 @@ int __init init_r4k_clocksource(void) /* Calculate a somewhat reasonable rating value */ clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000; - clocksource_set_clock(&clocksource_mips, mips_hpt_frequency); - - clocksource_register(&clocksource_mips); + clocksource_register_hz(&clocksource_mips, mips_hpt_frequency); return 0; } diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c index d14d3d1907fa..e9606d907685 100644 --- a/arch/mips/kernel/csrc-sb1250.c +++ b/arch/mips/kernel/csrc-sb1250.c @@ -65,6 +65,5 @@ void __init sb1250_clocksource_init(void) IOADDR(A_SCD_TIMER_REGISTER(SB1250_HPT_NUM, R_SCD_TIMER_CFG))); - clocksource_set_clock(cs, V_SCD_TIMER_FREQ); - clocksource_register(cs); + clocksource_register_hz(cs, V_SCD_TIMER_FREQ); } diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index ffa331029e08..37acfa036d44 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -167,14 +167,13 @@ work_notifysig: # deal with pending signals and FEXPORT(syscall_exit_work_partial) SAVE_STATIC syscall_exit_work: - li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t0, _TIF_WORK_SYSCALL_EXIT and t0, a2 # a2 is preloaded with TI_FLAGS beqz t0, work_pending # trace bit set? - local_irq_enable # could let do_syscall_trace() + local_irq_enable # could let syscall_trace_leave() # call schedule() instead move a0, sp - li a1, 1 - jal do_syscall_trace + jal syscall_trace_leave b resume_userspace #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT) diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index 2392a7a296d4..391221b6a6aa 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -125,87 +125,11 @@ void __init setup_pit_timer(void) setup_irq(0, &irq0); } -/* - * Since the PIT overflows every tick, its not very useful - * to just read by itself. So use jiffies to emulate a free - * running counter: - */ -static cycle_t pit_read(struct clocksource *cs) -{ - unsigned long flags; - int count; - u32 jifs; - static int old_count; - static u32 old_jifs; - - raw_spin_lock_irqsave(&i8253_lock, flags); - /* - * Although our caller may have the read side of xtime_lock, - * this is now a seqlock, and we are cheating in this routine - * by having side effects on state that we cannot undo if - * there is a collision on the seqlock and our caller has to - * retry. (Namely, old_jifs and old_count.) So we must treat - * jiffies as volatile despite the lock. We read jiffies - * before latching the timer count to guarantee that although - * the jiffies value might be older than the count (that is, - * the counter may underflow between the last point where - * jiffies was incremented and the point where we latch the - * count), it cannot be newer. - */ - jifs = jiffies; - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - count = inb_p(PIT_CH0); /* read the latched count */ - count |= inb_p(PIT_CH0) << 8; - - /* VIA686a test code... reset the latch if count > max + 1 */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - /* - * It's possible for count to appear to go the wrong way for a - * couple of reasons: - * - * 1. The timer counter underflows, but we haven't handled the - * resulting interrupt and incremented jiffies yet. - * 2. Hardware problem with the timer, not giving us continuous time, - * the counter does small "jumps" upwards on some Pentium systems, - * (see c't 95/10 page 335 for Neptun bug.) - * - * Previous attempts to handle these cases intelligently were - * buggy, so we just do the simple thing now. - */ - if (count > old_count && jifs == old_jifs) { - count = old_count; - } - old_count = count; - old_jifs = jifs; - - raw_spin_unlock_irqrestore(&i8253_lock, flags); - - count = (LATCH - 1) - count; - - return (cycle_t)(jifs * LATCH) + count; -} - -static struct clocksource clocksource_pit = { - .name = "pit", - .rating = 110, - .read = pit_read, - .mask = CLOCKSOURCE_MASK(32), - .mult = 0, - .shift = 20, -}; - static int __init init_pit_clocksource(void) { if (num_possible_cpus() > 1) /* PIT does not scale! */ return 0; - clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); - return clocksource_register(&clocksource_pit); + return clocksource_i8253_init(); } arch_initcall(init_pit_clocksource); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 584e6b55c865..4e6ea1ffad46 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -533,15 +533,10 @@ static inline int audit_arch(void) * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ -asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) +asmlinkage void syscall_trace_enter(struct pt_regs *regs) { /* do the secure computing check first */ - if (!entryexit) - secure_computing(regs->regs[2]); - - if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]), - -regs->regs[2]); + secure_computing(regs->regs[2]); if (!(current->ptrace & PT_PTRACED)) goto out; @@ -565,8 +560,40 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) } out: - if (unlikely(current->audit_context) && !entryexit) + if (unlikely(current->audit_context)) audit_syscall_entry(audit_arch(), regs->regs[2], regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); } + +/* + * Notification of system call entry/exit + * - triggered by current->work.syscall_trace + */ +asmlinkage void syscall_trace_leave(struct pt_regs *regs) +{ + if (unlikely(current->audit_context)) + audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]), + -regs->regs[2]); + + if (!(current->ptrace & PT_PTRACED)) + return; + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + + /* The 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? + 0x80 : 0)); + + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 7f1377eb22d3..7a8e1dd7f6f2 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -88,8 +88,7 @@ syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - li a1, 0 - jal do_syscall_trace + jal syscall_trace_enter move t0, s0 RESTORE_STATIC diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 7c0ef7f128bf..2d31c83224f9 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -91,8 +91,7 @@ syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - li a1, 0 - jal do_syscall_trace + jal syscall_trace_enter move t0, s0 RESTORE_STATIC diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index de6c5563beab..38a0503b9a4a 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -89,8 +89,7 @@ n32_syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - li a1, 0 - jal do_syscall_trace + jal syscall_trace_enter move t0, s0 RESTORE_STATIC diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index b0541dda8830..91ea5e4041dd 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -123,8 +123,7 @@ trace_a_syscall: move s0, t2 # Save syscall pointer move a0, sp - li a1, 0 - jal do_syscall_trace + jal syscall_trace_enter move t0, s0 RESTORE_STATIC diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 5a88cc4ccd5a..cedac4633741 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi) static void ipi_resched_interrupt(void) { - /* Return from interrupt should be enough to cause scheduler check */ + scheduler_ipi(); } static void ipi_call_interrupt(void) diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 58beabf50b3c..d02765708ddb 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -10,12 +10,9 @@ #include <linux/capability.h> #include <linux/errno.h> #include <linux/linkage.h> -#include <linux/mm.h> #include <linux/fs.h> #include <linux/smp.h> -#include <linux/mman.h> #include <linux/ptrace.h> -#include <linux/sched.h> #include <linux/string.h> #include <linux/syscalls.h> #include <linux/file.h> @@ -25,11 +22,9 @@ #include <linux/msg.h> #include <linux/shm.h> #include <linux/compiler.h> -#include <linux/module.h> #include <linux/ipc.h> #include <linux/uaccess.h> #include <linux/slab.h> -#include <linux/random.h> #include <linux/elf.h> #include <asm/asm.h> @@ -66,121 +61,6 @@ out: return res; } -unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ - -EXPORT_SYMBOL(shm_align_mask); - -#define COLOUR_ALIGN(addr,pgoff) \ - ((((addr) + shm_align_mask) & ~shm_align_mask) + \ - (((pgoff) << PAGE_SHIFT) & shm_align_mask)) - -unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct vm_area_struct * vmm; - int do_color_align; - unsigned long task_size; - -#ifdef CONFIG_32BIT - task_size = TASK_SIZE; -#else /* Must be CONFIG_64BIT*/ - task_size = test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE; -#endif - - if (len > task_size) - return -ENOMEM; - - if (flags & MAP_FIXED) { - /* Even MAP_FIXED mappings must reside within task_size. */ - if (task_size - len < addr) - return -EINVAL; - - /* - * We do not accept a shared mapping if it would violate - * cache aliasing constraints. - */ - if ((flags & MAP_SHARED) && - ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) - return -EINVAL; - return addr; - } - - do_color_align = 0; - if (filp || (flags & MAP_SHARED)) - do_color_align = 1; - if (addr) { - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - vmm = find_vma(current->mm, addr); - if (task_size - len >= addr && - (!vmm || addr + len <= vmm->vm_start)) - return addr; - } - addr = current->mm->mmap_base; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - - for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { - /* At this point: (!vmm || addr < vmm->vm_end). */ - if (task_size - len < addr) - return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) - return addr; - addr = vmm->vm_end; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - } -} - -void arch_pick_mmap_layout(struct mm_struct *mm) -{ - unsigned long random_factor = 0UL; - - if (current->flags & PF_RANDOMIZE) { - random_factor = get_random_int(); - random_factor = random_factor << PAGE_SHIFT; - if (TASK_IS_32BIT_ADDR) - random_factor &= 0xfffffful; - else - random_factor &= 0xffffffful; - } - - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; -} - -static inline unsigned long brk_rnd(void) -{ - unsigned long rnd = get_random_int(); - - rnd = rnd << PAGE_SHIFT; - /* 8MB for 32bit, 256MB for 64bit */ - if (TASK_IS_32BIT_ADDR) - rnd = rnd & 0x7ffffful; - else - rnd = rnd & 0xffffffful; - - return rnd; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long base = mm->brk; - unsigned long ret; - - ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - - return ret; -} - SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, off_t, offset) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 71350f7f2d88..e9b3af27d844 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs) unsigned long dvpret = dvpe(); #endif /* CONFIG_MIPS_MT_SMTC */ - notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV); + if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) + sig = 0; console_verbose(); spin_lock_irq(&die_lock); @@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs) mips_mt_regdump(dvpret); #endif /* CONFIG_MIPS_MT_SMTC */ - if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) - sig = 0; - printk("%s[#%d]:\n", str, ++die_counter); show_registers(regs); add_taint(TAINT_DIE); diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index e4b0b0bec039..01af3876cf90 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -65,9 +65,11 @@ SECTIONS NOTES :text :note .dummy : { *(.dummy) } :text + _sdata = .; /* Start of data section */ RODATA /* writeable */ + _sdata = .; /* Start of data section */ .data : { /* Data */ . = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */ diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig new file mode 100644 index 000000000000..3fccf2104513 --- /dev/null +++ b/arch/mips/lantiq/Kconfig @@ -0,0 +1,23 @@ +if LANTIQ + +config SOC_TYPE_XWAY + bool + default n + +choice + prompt "SoC Type" + default SOC_XWAY + +config SOC_AMAZON_SE + bool "Amazon SE" + select SOC_TYPE_XWAY + +config SOC_XWAY + bool "XWAY" + select SOC_TYPE_XWAY + select HW_HAS_PCI +endchoice + +source "arch/mips/lantiq/xway/Kconfig" + +endif diff --git a/arch/mips/lantiq/Makefile b/arch/mips/lantiq/Makefile new file mode 100644 index 000000000000..e5dae0e24b00 --- /dev/null +++ b/arch/mips/lantiq/Makefile @@ -0,0 +1,11 @@ +# Copyright (C) 2010 John Crispin <blogic@openwrt.org> +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 as published +# by the Free Software Foundation. + +obj-y := irq.o setup.o clk.o prom.o devices.o + +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o + +obj-$(CONFIG_SOC_TYPE_XWAY) += xway/ diff --git a/arch/mips/lantiq/Platform b/arch/mips/lantiq/Platform new file mode 100644 index 000000000000..f3dff05722de --- /dev/null +++ b/arch/mips/lantiq/Platform @@ -0,0 +1,8 @@ +# +# Lantiq +# + +platform-$(CONFIG_LANTIQ) += lantiq/ +cflags-$(CONFIG_LANTIQ) += -I$(srctree)/arch/mips/include/asm/mach-lantiq +load-$(CONFIG_LANTIQ) = 0xffffffff80002000 +cflags-$(CONFIG_SOC_TYPE_XWAY) += -I$(srctree)/arch/mips/include/asm/mach-lantiq/xway diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c new file mode 100644 index 000000000000..94560899d13e --- /dev/null +++ b/arch/mips/lantiq/clk.c @@ -0,0 +1,140 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com> + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ +#include <linux/io.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/list.h> + +#include <asm/time.h> +#include <asm/irq.h> +#include <asm/div64.h> + +#include <lantiq_soc.h> + +#include "clk.h" + +struct clk { + const char *name; + unsigned long rate; + unsigned long (*get_rate) (void); +}; + +static struct clk *cpu_clk; +static int cpu_clk_cnt; + +/* lantiq socs have 3 static clocks */ +static struct clk cpu_clk_generic[] = { + { + .name = "cpu", + .get_rate = ltq_get_cpu_hz, + }, { + .name = "fpi", + .get_rate = ltq_get_fpi_hz, + }, { + .name = "io", + .get_rate = ltq_get_io_region_clock, + }, +}; + +static struct resource ltq_cgu_resource = { + .name = "cgu", + .start = LTQ_CGU_BASE_ADDR, + .end = LTQ_CGU_BASE_ADDR + LTQ_CGU_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +/* remapped clock register range */ +void __iomem *ltq_cgu_membase; + +void clk_init(void) +{ + cpu_clk = cpu_clk_generic; + cpu_clk_cnt = ARRAY_SIZE(cpu_clk_generic); +} + +static inline int clk_good(struct clk *clk) +{ + return clk && !IS_ERR(clk); +} + +unsigned long clk_get_rate(struct clk *clk) +{ + if (unlikely(!clk_good(clk))) + return 0; + + if (clk->rate != 0) + return clk->rate; + + if (clk->get_rate != NULL) + return clk->get_rate(); + + return 0; +} +EXPORT_SYMBOL(clk_get_rate); + +struct clk *clk_get(struct device *dev, const char *id) +{ + int i; + + for (i = 0; i < cpu_clk_cnt; i++) + if (!strcmp(id, cpu_clk[i].name)) + return &cpu_clk[i]; + BUG(); + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL(clk_get); + +void clk_put(struct clk *clk) +{ + /* not used */ +} +EXPORT_SYMBOL(clk_put); + +static inline u32 ltq_get_counter_resolution(void) +{ + u32 res; + + __asm__ __volatile__( + ".set push\n" + ".set mips32r2\n" + "rdhwr %0, $3\n" + ".set pop\n" + : "=&r" (res) + : /* no input */ + : "memory"); + + return res; +} + +void __init plat_time_init(void) +{ + struct clk *clk; + + if (insert_resource(&iomem_resource, <q_cgu_resource) < 0) + panic("Failed to insert cgu memory\n"); + + if (request_mem_region(ltq_cgu_resource.start, + resource_size(<q_cgu_resource), "cgu") < 0) + panic("Failed to request cgu memory\n"); + + ltq_cgu_membase = ioremap_nocache(ltq_cgu_resource.start, + resource_size(<q_cgu_resource)); + if (!ltq_cgu_membase) { + pr_err("Failed to remap cgu memory\n"); + unreachable(); + } + clk = clk_get(0, "cpu"); + mips_hpt_frequency = clk_get_rate(clk) / ltq_get_counter_resolution(); + write_c0_compare(read_c0_count()); + clk_put(clk); +} diff --git a/arch/mips/lantiq/clk.h b/arch/mips/lantiq/clk.h new file mode 100644 index 000000000000..3328925f2c3f --- /dev/null +++ b/arch/mips/lantiq/clk.h @@ -0,0 +1,18 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LTQ_CLK_H__ +#define _LTQ_CLK_H__ + +extern void clk_init(void); + +extern unsigned long ltq_get_cpu_hz(void); +extern unsigned long ltq_get_fpi_hz(void); +extern unsigned long ltq_get_io_region_clock(void); + +#endif diff --git a/arch/mips/lantiq/devices.c b/arch/mips/lantiq/devices.c new file mode 100644 index 000000000000..7b82c34cb169 --- /dev/null +++ b/arch/mips/lantiq/devices.c @@ -0,0 +1,122 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/platform_device.h> +#include <linux/leds.h> +#include <linux/etherdevice.h> +#include <linux/reboot.h> +#include <linux/time.h> +#include <linux/io.h> +#include <linux/gpio.h> +#include <linux/leds.h> + +#include <asm/bootinfo.h> +#include <asm/irq.h> + +#include <lantiq_soc.h> + +#include "devices.h" + +/* nor flash */ +static struct resource ltq_nor_resource = { + .name = "nor", + .start = LTQ_FLASH_START, + .end = LTQ_FLASH_START + LTQ_FLASH_MAX - 1, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device ltq_nor = { + .name = "ltq_nor", + .resource = <q_nor_resource, + .num_resources = 1, +}; + +void __init ltq_register_nor(struct physmap_flash_data *data) +{ + ltq_nor.dev.platform_data = data; + platform_device_register(<q_nor); +} + +/* watchdog */ +static struct resource ltq_wdt_resource = { + .name = "watchdog", + .start = LTQ_WDT_BASE_ADDR, + .end = LTQ_WDT_BASE_ADDR + LTQ_WDT_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +void __init ltq_register_wdt(void) +{ + platform_device_register_simple("ltq_wdt", 0, <q_wdt_resource, 1); +} + +/* asc ports */ +static struct resource ltq_asc0_resources[] = { + { + .name = "asc0", + .start = LTQ_ASC0_BASE_ADDR, + .end = LTQ_ASC0_BASE_ADDR + LTQ_ASC_SIZE - 1, + .flags = IORESOURCE_MEM, + }, + IRQ_RES(tx, LTQ_ASC_TIR(0)), + IRQ_RES(rx, LTQ_ASC_RIR(0)), + IRQ_RES(err, LTQ_ASC_EIR(0)), +}; + +static struct resource ltq_asc1_resources[] = { + { + .name = "asc1", + .start = LTQ_ASC1_BASE_ADDR, + .end = LTQ_ASC1_BASE_ADDR + LTQ_ASC_SIZE - 1, + .flags = IORESOURCE_MEM, + }, + IRQ_RES(tx, LTQ_ASC_TIR(1)), + IRQ_RES(rx, LTQ_ASC_RIR(1)), + IRQ_RES(err, LTQ_ASC_EIR(1)), +}; + +void __init ltq_register_asc(int port) +{ + switch (port) { + case 0: + platform_device_register_simple("ltq_asc", 0, + ltq_asc0_resources, ARRAY_SIZE(ltq_asc0_resources)); + break; + case 1: + platform_device_register_simple("ltq_asc", 1, + ltq_asc1_resources, ARRAY_SIZE(ltq_asc1_resources)); + break; + default: + break; + } +} + +#ifdef CONFIG_PCI +/* pci */ +static struct platform_device ltq_pci = { + .name = "ltq_pci", + .num_resources = 0, +}; + +void __init ltq_register_pci(struct ltq_pci_data *data) +{ + ltq_pci.dev.platform_data = data; + platform_device_register(<q_pci); +} +#else +void __init ltq_register_pci(struct ltq_pci_data *data) +{ + pr_err("kernel is compiled without PCI support\n"); +} +#endif diff --git a/arch/mips/lantiq/devices.h b/arch/mips/lantiq/devices.h new file mode 100644 index 000000000000..2947bb19a528 --- /dev/null +++ b/arch/mips/lantiq/devices.h @@ -0,0 +1,23 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LTQ_DEVICES_H__ +#define _LTQ_DEVICES_H__ + +#include <lantiq_platform.h> +#include <linux/mtd/physmap.h> + +#define IRQ_RES(resname, irq) \ + {.name = #resname, .start = (irq), .flags = IORESOURCE_IRQ} + +extern void ltq_register_nor(struct physmap_flash_data *data); +extern void ltq_register_wdt(void); +extern void ltq_register_asc(int port); +extern void ltq_register_pci(struct ltq_pci_data *data); + +#endif diff --git a/arch/mips/lantiq/early_printk.c b/arch/mips/lantiq/early_printk.c new file mode 100644 index 000000000000..972e05f87631 --- /dev/null +++ b/arch/mips/lantiq/early_printk.c @@ -0,0 +1,33 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/init.h> +#include <linux/cpu.h> + +#include <lantiq.h> +#include <lantiq_soc.h> + +/* no ioremap possible at this early stage, lets use KSEG1 instead */ +#define LTQ_ASC_BASE KSEG1ADDR(LTQ_ASC1_BASE_ADDR) +#define ASC_BUF 1024 +#define LTQ_ASC_FSTAT ((u32 *)(LTQ_ASC_BASE + 0x0048)) +#define LTQ_ASC_TBUF ((u32 *)(LTQ_ASC_BASE + 0x0020)) +#define TXMASK 0x3F00 +#define TXOFFSET 8 + +void prom_putchar(char c) +{ + unsigned long flags; + + local_irq_save(flags); + do { } while ((ltq_r32(LTQ_ASC_FSTAT) & TXMASK) >> TXOFFSET); + if (c == '\n') + ltq_w32('\r', LTQ_ASC_TBUF); + ltq_w32(c, LTQ_ASC_TBUF); + local_irq_restore(flags); +} diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c new file mode 100644 index 000000000000..fc89795cafdb --- /dev/null +++ b/arch/mips/lantiq/irq.c @@ -0,0 +1,326 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com> + */ + +#include <linux/interrupt.h> +#include <linux/ioport.h> + +#include <asm/bootinfo.h> +#include <asm/irq_cpu.h> + +#include <lantiq_soc.h> +#include <irq.h> + +/* register definitions */ +#define LTQ_ICU_IM0_ISR 0x0000 +#define LTQ_ICU_IM0_IER 0x0008 +#define LTQ_ICU_IM0_IOSR 0x0010 +#define LTQ_ICU_IM0_IRSR 0x0018 +#define LTQ_ICU_IM0_IMR 0x0020 +#define LTQ_ICU_IM1_ISR 0x0028 +#define LTQ_ICU_OFFSET (LTQ_ICU_IM1_ISR - LTQ_ICU_IM0_ISR) + +#define LTQ_EIU_EXIN_C 0x0000 +#define LTQ_EIU_EXIN_INIC 0x0004 +#define LTQ_EIU_EXIN_INEN 0x000C + +/* irq numbers used by the external interrupt unit (EIU) */ +#define LTQ_EIU_IR0 (INT_NUM_IM4_IRL0 + 30) +#define LTQ_EIU_IR1 (INT_NUM_IM3_IRL0 + 31) +#define LTQ_EIU_IR2 (INT_NUM_IM1_IRL0 + 26) +#define LTQ_EIU_IR3 INT_NUM_IM1_IRL0 +#define LTQ_EIU_IR4 (INT_NUM_IM1_IRL0 + 1) +#define LTQ_EIU_IR5 (INT_NUM_IM1_IRL0 + 2) +#define LTQ_EIU_IR6 (INT_NUM_IM2_IRL0 + 30) + +#define MAX_EIU 6 + +/* irqs generated by device attached to the EBU need to be acked in + * a special manner + */ +#define LTQ_ICU_EBU_IRQ 22 + +#define ltq_icu_w32(x, y) ltq_w32((x), ltq_icu_membase + (y)) +#define ltq_icu_r32(x) ltq_r32(ltq_icu_membase + (x)) + +#define ltq_eiu_w32(x, y) ltq_w32((x), ltq_eiu_membase + (y)) +#define ltq_eiu_r32(x) ltq_r32(ltq_eiu_membase + (x)) + +static unsigned short ltq_eiu_irq[MAX_EIU] = { + LTQ_EIU_IR0, + LTQ_EIU_IR1, + LTQ_EIU_IR2, + LTQ_EIU_IR3, + LTQ_EIU_IR4, + LTQ_EIU_IR5, +}; + +static struct resource ltq_icu_resource = { + .name = "icu", + .start = LTQ_ICU_BASE_ADDR, + .end = LTQ_ICU_BASE_ADDR + LTQ_ICU_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +static struct resource ltq_eiu_resource = { + .name = "eiu", + .start = LTQ_EIU_BASE_ADDR, + .end = LTQ_EIU_BASE_ADDR + LTQ_ICU_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +static void __iomem *ltq_icu_membase; +static void __iomem *ltq_eiu_membase; + +void ltq_disable_irq(struct irq_data *d) +{ + u32 ier = LTQ_ICU_IM0_IER; + int irq_nr = d->irq - INT_NUM_IRQ0; + + ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET); + irq_nr %= INT_NUM_IM_OFFSET; + ltq_icu_w32(ltq_icu_r32(ier) & ~(1 << irq_nr), ier); +} + +void ltq_mask_and_ack_irq(struct irq_data *d) +{ + u32 ier = LTQ_ICU_IM0_IER; + u32 isr = LTQ_ICU_IM0_ISR; + int irq_nr = d->irq - INT_NUM_IRQ0; + + ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET); + isr += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET); + irq_nr %= INT_NUM_IM_OFFSET; + ltq_icu_w32(ltq_icu_r32(ier) & ~(1 << irq_nr), ier); + ltq_icu_w32((1 << irq_nr), isr); +} + +static void ltq_ack_irq(struct irq_data *d) +{ + u32 isr = LTQ_ICU_IM0_ISR; + int irq_nr = d->irq - INT_NUM_IRQ0; + + isr += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET); + irq_nr %= INT_NUM_IM_OFFSET; + ltq_icu_w32((1 << irq_nr), isr); +} + +void ltq_enable_irq(struct irq_data *d) +{ + u32 ier = LTQ_ICU_IM0_IER; + int irq_nr = d->irq - INT_NUM_IRQ0; + + ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET); + irq_nr %= INT_NUM_IM_OFFSET; + ltq_icu_w32(ltq_icu_r32(ier) | (1 << irq_nr), ier); +} + +static unsigned int ltq_startup_eiu_irq(struct irq_data *d) +{ + int i; + int irq_nr = d->irq - INT_NUM_IRQ0; + + ltq_enable_irq(d); + for (i = 0; i < MAX_EIU; i++) { + if (irq_nr == ltq_eiu_irq[i]) { + /* low level - we should really handle set_type */ + ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_C) | + (0x6 << (i * 4)), LTQ_EIU_EXIN_C); + /* clear all pending */ + ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INIC) & ~(1 << i), + LTQ_EIU_EXIN_INIC); + /* enable */ + ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) | (1 << i), + LTQ_EIU_EXIN_INEN); + break; + } + } + + return 0; +} + +static void ltq_shutdown_eiu_irq(struct irq_data *d) +{ + int i; + int irq_nr = d->irq - INT_NUM_IRQ0; + + ltq_disable_irq(d); + for (i = 0; i < MAX_EIU; i++) { + if (irq_nr == ltq_eiu_irq[i]) { + /* disable */ + ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~(1 << i), + LTQ_EIU_EXIN_INEN); + break; + } + } +} + +static struct irq_chip ltq_irq_type = { + "icu", + .irq_enable = ltq_enable_irq, + .irq_disable = ltq_disable_irq, + .irq_unmask = ltq_enable_irq, + .irq_ack = ltq_ack_irq, + .irq_mask = ltq_disable_irq, + .irq_mask_ack = ltq_mask_and_ack_irq, +}; + +static struct irq_chip ltq_eiu_type = { + "eiu", + .irq_startup = ltq_startup_eiu_irq, + .irq_shutdown = ltq_shutdown_eiu_irq, + .irq_enable = ltq_enable_irq, + .irq_disable = ltq_disable_irq, + .irq_unmask = ltq_enable_irq, + .irq_ack = ltq_ack_irq, + .irq_mask = ltq_disable_irq, + .irq_mask_ack = ltq_mask_and_ack_irq, +}; + +static void ltq_hw_irqdispatch(int module) +{ + u32 irq; + + irq = ltq_icu_r32(LTQ_ICU_IM0_IOSR + (module * LTQ_ICU_OFFSET)); + if (irq == 0) + return; + + /* silicon bug causes only the msb set to 1 to be valid. all + * other bits might be bogus + */ + irq = __fls(irq); + do_IRQ((int)irq + INT_NUM_IM0_IRL0 + (INT_NUM_IM_OFFSET * module)); + + /* if this is a EBU irq, we need to ack it or get a deadlock */ + if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0)) + ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_ISTAT) | 0x10, + LTQ_EBU_PCC_ISTAT); +} + +#define DEFINE_HWx_IRQDISPATCH(x) \ + static void ltq_hw ## x ## _irqdispatch(void) \ + { \ + ltq_hw_irqdispatch(x); \ + } +DEFINE_HWx_IRQDISPATCH(0) +DEFINE_HWx_IRQDISPATCH(1) +DEFINE_HWx_IRQDISPATCH(2) +DEFINE_HWx_IRQDISPATCH(3) +DEFINE_HWx_IRQDISPATCH(4) + +static void ltq_hw5_irqdispatch(void) +{ + do_IRQ(MIPS_CPU_TIMER_IRQ); +} + +asmlinkage void plat_irq_dispatch(void) +{ + unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; + unsigned int i; + + if (pending & CAUSEF_IP7) { + do_IRQ(MIPS_CPU_TIMER_IRQ); + goto out; + } else { + for (i = 0; i < 5; i++) { + if (pending & (CAUSEF_IP2 << i)) { + ltq_hw_irqdispatch(i); + goto out; + } + } + } + pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status()); + +out: + return; +} + +static struct irqaction cascade = { + .handler = no_action, + .flags = IRQF_DISABLED, + .name = "cascade", +}; + +void __init arch_init_irq(void) +{ + int i; + + if (insert_resource(&iomem_resource, <q_icu_resource) < 0) + panic("Failed to insert icu memory\n"); + + if (request_mem_region(ltq_icu_resource.start, + resource_size(<q_icu_resource), "icu") < 0) + panic("Failed to request icu memory\n"); + + ltq_icu_membase = ioremap_nocache(ltq_icu_resource.start, + resource_size(<q_icu_resource)); + if (!ltq_icu_membase) + panic("Failed to remap icu memory\n"); + + if (insert_resource(&iomem_resource, <q_eiu_resource) < 0) + panic("Failed to insert eiu memory\n"); + + if (request_mem_region(ltq_eiu_resource.start, + resource_size(<q_eiu_resource), "eiu") < 0) + panic("Failed to request eiu memory\n"); + + ltq_eiu_membase = ioremap_nocache(ltq_eiu_resource.start, + resource_size(<q_eiu_resource)); + if (!ltq_eiu_membase) + panic("Failed to remap eiu memory\n"); + + /* make sure all irqs are turned off by default */ + for (i = 0; i < 5; i++) + ltq_icu_w32(0, LTQ_ICU_IM0_IER + (i * LTQ_ICU_OFFSET)); + + /* clear all possibly pending interrupts */ + ltq_icu_w32(~0, LTQ_ICU_IM0_ISR + (i * LTQ_ICU_OFFSET)); + + mips_cpu_irq_init(); + + for (i = 2; i <= 6; i++) + setup_irq(i, &cascade); + + if (cpu_has_vint) { + pr_info("Setting up vectored interrupts\n"); + set_vi_handler(2, ltq_hw0_irqdispatch); + set_vi_handler(3, ltq_hw1_irqdispatch); + set_vi_handler(4, ltq_hw2_irqdispatch); + set_vi_handler(5, ltq_hw3_irqdispatch); + set_vi_handler(6, ltq_hw4_irqdispatch); + set_vi_handler(7, ltq_hw5_irqdispatch); + } + + for (i = INT_NUM_IRQ0; + i <= (INT_NUM_IRQ0 + (5 * INT_NUM_IM_OFFSET)); i++) + if ((i == LTQ_EIU_IR0) || (i == LTQ_EIU_IR1) || + (i == LTQ_EIU_IR2)) + irq_set_chip_and_handler(i, <q_eiu_type, + handle_level_irq); + /* EIU3-5 only exist on ar9 and vr9 */ + else if (((i == LTQ_EIU_IR3) || (i == LTQ_EIU_IR4) || + (i == LTQ_EIU_IR5)) && (ltq_is_ar9() || ltq_is_vr9())) + irq_set_chip_and_handler(i, <q_eiu_type, + handle_level_irq); + else + irq_set_chip_and_handler(i, <q_irq_type, + handle_level_irq); + +#if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC) + set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | + IE_IRQ3 | IE_IRQ4 | IE_IRQ5); +#else + set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 | + IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5); +#endif +} + +unsigned int __cpuinit get_c0_compare_int(void) +{ + return CP0_LEGACY_COMPARE_IRQ; +} diff --git a/arch/mips/lantiq/machtypes.h b/arch/mips/lantiq/machtypes.h new file mode 100644 index 000000000000..7e01b8c484eb --- /dev/null +++ b/arch/mips/lantiq/machtypes.h @@ -0,0 +1,20 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LANTIQ_MACH_H__ +#define _LANTIQ_MACH_H__ + +#include <asm/mips_machine.h> + +enum lantiq_mach_type { + LTQ_MACH_GENERIC = 0, + LTQ_MACH_EASY50712, /* Danube evaluation board */ + LTQ_MACH_EASY50601, /* Amazon SE evaluation board */ +}; + +#endif diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c new file mode 100644 index 000000000000..56ba007bf1e5 --- /dev/null +++ b/arch/mips/lantiq/prom.c @@ -0,0 +1,71 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/module.h> +#include <linux/clk.h> +#include <asm/bootinfo.h> +#include <asm/time.h> + +#include <lantiq.h> + +#include "prom.h" +#include "clk.h" + +static struct ltq_soc_info soc_info; + +unsigned int ltq_get_cpu_ver(void) +{ + return soc_info.rev; +} +EXPORT_SYMBOL(ltq_get_cpu_ver); + +unsigned int ltq_get_soc_type(void) +{ + return soc_info.type; +} +EXPORT_SYMBOL(ltq_get_soc_type); + +const char *get_system_type(void) +{ + return soc_info.sys_type; +} + +void prom_free_prom_memory(void) +{ +} + +static void __init prom_init_cmdline(void) +{ + int argc = fw_arg0; + char **argv = (char **) KSEG1ADDR(fw_arg1); + int i; + + for (i = 0; i < argc; i++) { + char *p = (char *) KSEG1ADDR(argv[i]); + + if (p && *p) { + strlcat(arcs_cmdline, p, sizeof(arcs_cmdline)); + strlcat(arcs_cmdline, " ", sizeof(arcs_cmdline)); + } + } +} + +void __init prom_init(void) +{ + struct clk *clk; + + ltq_soc_detect(&soc_info); + clk_init(); + clk = clk_get(0, "cpu"); + snprintf(soc_info.sys_type, LTQ_SYS_TYPE_LEN - 1, "%s rev1.%d", + soc_info.name, soc_info.rev); + clk_put(clk); + soc_info.sys_type[LTQ_SYS_TYPE_LEN - 1] = '\0'; + pr_info("SoC: %s\n", soc_info.sys_type); + prom_init_cmdline(); +} diff --git a/arch/mips/lantiq/prom.h b/arch/mips/lantiq/prom.h new file mode 100644 index 000000000000..b4229d94280f --- /dev/null +++ b/arch/mips/lantiq/prom.h @@ -0,0 +1,25 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LTQ_PROM_H__ +#define _LTQ_PROM_H__ + +#define LTQ_SYS_TYPE_LEN 0x100 + +struct ltq_soc_info { + unsigned char *name; + unsigned int rev; + unsigned int partnum; + unsigned int type; + unsigned char sys_type[LTQ_SYS_TYPE_LEN]; +}; + +extern void ltq_soc_detect(struct ltq_soc_info *i); +extern void ltq_soc_setup(void); + +#endif diff --git a/arch/mips/lantiq/setup.c b/arch/mips/lantiq/setup.c new file mode 100644 index 000000000000..9b8af77ed0f9 --- /dev/null +++ b/arch/mips/lantiq/setup.c @@ -0,0 +1,66 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <asm/bootinfo.h> + +#include <lantiq_soc.h> + +#include "machtypes.h" +#include "devices.h" +#include "prom.h" + +void __init plat_mem_setup(void) +{ + /* assume 16M as default incase uboot fails to pass proper ramsize */ + unsigned long memsize = 16; + char **envp = (char **) KSEG1ADDR(fw_arg2); + + ioport_resource.start = IOPORT_RESOURCE_START; + ioport_resource.end = IOPORT_RESOURCE_END; + iomem_resource.start = IOMEM_RESOURCE_START; + iomem_resource.end = IOMEM_RESOURCE_END; + + set_io_port_base((unsigned long) KSEG1); + + while (*envp) { + char *e = (char *)KSEG1ADDR(*envp); + if (!strncmp(e, "memsize=", 8)) { + e += 8; + if (strict_strtoul(e, 0, &memsize)) + pr_warn("bad memsize specified\n"); + } + envp++; + } + memsize *= 1024 * 1024; + add_memory_region(0x00000000, memsize, BOOT_MEM_RAM); +} + +static int __init +lantiq_setup(void) +{ + ltq_soc_setup(); + mips_machine_setup(); + return 0; +} + +arch_initcall(lantiq_setup); + +static void __init +lantiq_generic_init(void) +{ + /* Nothing to do */ +} + +MIPS_MACHINE(LTQ_MACH_GENERIC, + "Generic", + "Generic Lantiq based board", + lantiq_generic_init); diff --git a/arch/mips/lantiq/xway/Kconfig b/arch/mips/lantiq/xway/Kconfig new file mode 100644 index 000000000000..2b857de36620 --- /dev/null +++ b/arch/mips/lantiq/xway/Kconfig @@ -0,0 +1,23 @@ +if SOC_XWAY + +menu "MIPS Machine" + +config LANTIQ_MACH_EASY50712 + bool "Easy50712 - Danube" + default y + +endmenu + +endif + +if SOC_AMAZON_SE + +menu "MIPS Machine" + +config LANTIQ_MACH_EASY50601 + bool "Easy50601 - Amazon SE" + default y + +endmenu + +endif diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile new file mode 100644 index 000000000000..c517f2e77563 --- /dev/null +++ b/arch/mips/lantiq/xway/Makefile @@ -0,0 +1,7 @@ +obj-y := pmu.o ebu.o reset.o gpio.o gpio_stp.o gpio_ebu.o devices.o dma.o + +obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o setup-xway.o +obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o setup-ase.o + +obj-$(CONFIG_LANTIQ_MACH_EASY50712) += mach-easy50712.o +obj-$(CONFIG_LANTIQ_MACH_EASY50601) += mach-easy50601.o diff --git a/arch/mips/lantiq/xway/clk-ase.c b/arch/mips/lantiq/xway/clk-ase.c new file mode 100644 index 000000000000..22d823acd536 --- /dev/null +++ b/arch/mips/lantiq/xway/clk-ase.c @@ -0,0 +1,48 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2011 John Crispin <blogic@openwrt.org> + */ + +#include <linux/io.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/clk.h> + +#include <asm/time.h> +#include <asm/irq.h> +#include <asm/div64.h> + +#include <lantiq_soc.h> + +/* cgu registers */ +#define LTQ_CGU_SYS 0x0010 + +unsigned int ltq_get_io_region_clock(void) +{ + return CLOCK_133M; +} +EXPORT_SYMBOL(ltq_get_io_region_clock); + +unsigned int ltq_get_fpi_bus_clock(int fpi) +{ + return CLOCK_133M; +} +EXPORT_SYMBOL(ltq_get_fpi_bus_clock); + +unsigned int ltq_get_cpu_hz(void) +{ + if (ltq_cgu_r32(LTQ_CGU_SYS) & (1 << 5)) + return CLOCK_266M; + else + return CLOCK_133M; +} +EXPORT_SYMBOL(ltq_get_cpu_hz); + +unsigned int ltq_get_fpi_hz(void) +{ + return CLOCK_133M; +} +EXPORT_SYMBOL(ltq_get_fpi_hz); diff --git a/arch/mips/lantiq/xway/clk-xway.c b/arch/mips/lantiq/xway/clk-xway.c new file mode 100644 index 000000000000..ddd39593c581 --- /dev/null +++ b/arch/mips/lantiq/xway/clk-xway.c @@ -0,0 +1,223 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/io.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/clk.h> + +#include <asm/time.h> +#include <asm/irq.h> +#include <asm/div64.h> + +#include <lantiq_soc.h> + +static unsigned int ltq_ram_clocks[] = { + CLOCK_167M, CLOCK_133M, CLOCK_111M, CLOCK_83M }; +#define DDR_HZ ltq_ram_clocks[ltq_cgu_r32(LTQ_CGU_SYS) & 0x3] + +#define BASIC_FREQUENCY_1 35328000 +#define BASIC_FREQUENCY_2 36000000 +#define BASIS_REQUENCY_USB 12000000 + +#define GET_BITS(x, msb, lsb) \ + (((x) & ((1 << ((msb) + 1)) - 1)) >> (lsb)) + +#define LTQ_CGU_PLL0_CFG 0x0004 +#define LTQ_CGU_PLL1_CFG 0x0008 +#define LTQ_CGU_PLL2_CFG 0x000C +#define LTQ_CGU_SYS 0x0010 +#define LTQ_CGU_UPDATE 0x0014 +#define LTQ_CGU_IF_CLK 0x0018 +#define LTQ_CGU_OSC_CON 0x001C +#define LTQ_CGU_SMD 0x0020 +#define LTQ_CGU_CT1SR 0x0028 +#define LTQ_CGU_CT2SR 0x002C +#define LTQ_CGU_PCMCR 0x0030 +#define LTQ_CGU_PCI_CR 0x0034 +#define LTQ_CGU_PD_PC 0x0038 +#define LTQ_CGU_FMR 0x003C + +#define CGU_PLL0_PHASE_DIVIDER_ENABLE \ + (ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 31)) +#define CGU_PLL0_BYPASS \ + (ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 30)) +#define CGU_PLL0_CFG_DSMSEL \ + (ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 28)) +#define CGU_PLL0_CFG_FRAC_EN \ + (ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 27)) +#define CGU_PLL1_SRC \ + (ltq_cgu_r32(LTQ_CGU_PLL1_CFG) & (1 << 31)) +#define CGU_PLL2_PHASE_DIVIDER_ENABLE \ + (ltq_cgu_r32(LTQ_CGU_PLL2_CFG) & (1 << 20)) +#define CGU_SYS_FPI_SEL (1 << 6) +#define CGU_SYS_DDR_SEL 0x3 +#define CGU_PLL0_SRC (1 << 29) + +#define CGU_PLL0_CFG_PLLK GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 26, 17) +#define CGU_PLL0_CFG_PLLN GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 12, 6) +#define CGU_PLL0_CFG_PLLM GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 5, 2) +#define CGU_PLL2_SRC GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL2_CFG), 18, 17) +#define CGU_PLL2_CFG_INPUT_DIV GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL2_CFG), 16, 13) + +static unsigned int ltq_get_pll0_fdiv(void); + +static inline unsigned int get_input_clock(int pll) +{ + switch (pll) { + case 0: + if (ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & CGU_PLL0_SRC) + return BASIS_REQUENCY_USB; + else if (CGU_PLL0_PHASE_DIVIDER_ENABLE) + return BASIC_FREQUENCY_1; + else + return BASIC_FREQUENCY_2; + case 1: + if (CGU_PLL1_SRC) + return BASIS_REQUENCY_USB; + else if (CGU_PLL0_PHASE_DIVIDER_ENABLE) + return BASIC_FREQUENCY_1; + else + return BASIC_FREQUENCY_2; + case 2: + switch (CGU_PLL2_SRC) { + case 0: + return ltq_get_pll0_fdiv(); + case 1: + return CGU_PLL2_PHASE_DIVIDER_ENABLE ? + BASIC_FREQUENCY_1 : + BASIC_FREQUENCY_2; + case 2: + return BASIS_REQUENCY_USB; + } + default: + return 0; + } +} + +static inline unsigned int cal_dsm(int pll, unsigned int num, unsigned int den) +{ + u64 res, clock = get_input_clock(pll); + + res = num * clock; + do_div(res, den); + return res; +} + +static inline unsigned int mash_dsm(int pll, unsigned int M, unsigned int N, + unsigned int K) +{ + unsigned int num = ((N + 1) << 10) + K; + unsigned int den = (M + 1) << 10; + + return cal_dsm(pll, num, den); +} + +static inline unsigned int ssff_dsm_1(int pll, unsigned int M, unsigned int N, + unsigned int K) +{ + unsigned int num = ((N + 1) << 11) + K + 512; + unsigned int den = (M + 1) << 11; + + return cal_dsm(pll, num, den); +} + +static inline unsigned int ssff_dsm_2(int pll, unsigned int M, unsigned int N, + unsigned int K) +{ + unsigned int num = K >= 512 ? + ((N + 1) << 12) + K - 512 : ((N + 1) << 12) + K + 3584; + unsigned int den = (M + 1) << 12; + + return cal_dsm(pll, num, den); +} + +static inline unsigned int dsm(int pll, unsigned int M, unsigned int N, + unsigned int K, unsigned int dsmsel, unsigned int phase_div_en) +{ + if (!dsmsel) + return mash_dsm(pll, M, N, K); + else if (!phase_div_en) + return mash_dsm(pll, M, N, K); + else + return ssff_dsm_2(pll, M, N, K); +} + +static inline unsigned int ltq_get_pll0_fosc(void) +{ + if (CGU_PLL0_BYPASS) + return get_input_clock(0); + else + return !CGU_PLL0_CFG_FRAC_EN + ? dsm(0, CGU_PLL0_CFG_PLLM, CGU_PLL0_CFG_PLLN, 0, + CGU_PLL0_CFG_DSMSEL, + CGU_PLL0_PHASE_DIVIDER_ENABLE) + : dsm(0, CGU_PLL0_CFG_PLLM, CGU_PLL0_CFG_PLLN, + CGU_PLL0_CFG_PLLK, CGU_PLL0_CFG_DSMSEL, + CGU_PLL0_PHASE_DIVIDER_ENABLE); +} + +static unsigned int ltq_get_pll0_fdiv(void) +{ + unsigned int div = CGU_PLL2_CFG_INPUT_DIV + 1; + + return (ltq_get_pll0_fosc() + (div >> 1)) / div; +} + +unsigned int ltq_get_io_region_clock(void) +{ + unsigned int ret = ltq_get_pll0_fosc(); + + switch (ltq_cgu_r32(LTQ_CGU_PLL2_CFG) & CGU_SYS_DDR_SEL) { + default: + case 0: + return (ret + 1) / 2; + case 1: + return (ret * 2 + 2) / 5; + case 2: + return (ret + 1) / 3; + case 3: + return (ret + 2) / 4; + } +} +EXPORT_SYMBOL(ltq_get_io_region_clock); + +unsigned int ltq_get_fpi_bus_clock(int fpi) +{ + unsigned int ret = ltq_get_io_region_clock(); + + if ((fpi == 2) && (ltq_cgu_r32(LTQ_CGU_SYS) & CGU_SYS_FPI_SEL)) + ret >>= 1; + return ret; +} +EXPORT_SYMBOL(ltq_get_fpi_bus_clock); + +unsigned int ltq_get_cpu_hz(void) +{ + switch (ltq_cgu_r32(LTQ_CGU_SYS) & 0xc) { + case 0: + return CLOCK_333M; + case 4: + return DDR_HZ; + case 8: + return DDR_HZ << 1; + default: + return DDR_HZ >> 1; + } +} +EXPORT_SYMBOL(ltq_get_cpu_hz); + +unsigned int ltq_get_fpi_hz(void) +{ + unsigned int ddr_clock = DDR_HZ; + + if (ltq_cgu_r32(LTQ_CGU_SYS) & 0x40) + return ddr_clock >> 1; + return ddr_clock; +} +EXPORT_SYMBOL(ltq_get_fpi_hz); diff --git a/arch/mips/lantiq/xway/devices.c b/arch/mips/lantiq/xway/devices.c new file mode 100644 index 000000000000..e09e789dfc27 --- /dev/null +++ b/arch/mips/lantiq/xway/devices.c @@ -0,0 +1,121 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/mtd/physmap.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/platform_device.h> +#include <linux/leds.h> +#include <linux/etherdevice.h> +#include <linux/reboot.h> +#include <linux/time.h> +#include <linux/io.h> +#include <linux/gpio.h> +#include <linux/leds.h> + +#include <asm/bootinfo.h> +#include <asm/irq.h> + +#include <lantiq_soc.h> +#include <lantiq_irq.h> +#include <lantiq_platform.h> + +#include "devices.h" + +/* gpio */ +static struct resource ltq_gpio_resource[] = { + { + .name = "gpio0", + .start = LTQ_GPIO0_BASE_ADDR, + .end = LTQ_GPIO0_BASE_ADDR + LTQ_GPIO_SIZE - 1, + .flags = IORESOURCE_MEM, + }, { + .name = "gpio1", + .start = LTQ_GPIO1_BASE_ADDR, + .end = LTQ_GPIO1_BASE_ADDR + LTQ_GPIO_SIZE - 1, + .flags = IORESOURCE_MEM, + }, { + .name = "gpio2", + .start = LTQ_GPIO2_BASE_ADDR, + .end = LTQ_GPIO2_BASE_ADDR + LTQ_GPIO_SIZE - 1, + .flags = IORESOURCE_MEM, + } +}; + +void __init ltq_register_gpio(void) +{ + platform_device_register_simple("ltq_gpio", 0, + <q_gpio_resource[0], 1); + platform_device_register_simple("ltq_gpio", 1, + <q_gpio_resource[1], 1); + + /* AR9 and VR9 have an extra gpio block */ + if (ltq_is_ar9() || ltq_is_vr9()) { + platform_device_register_simple("ltq_gpio", 2, + <q_gpio_resource[2], 1); + } +} + +/* serial to parallel conversion */ +static struct resource ltq_stp_resource = { + .name = "stp", + .start = LTQ_STP_BASE_ADDR, + .end = LTQ_STP_BASE_ADDR + LTQ_STP_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +void __init ltq_register_gpio_stp(void) +{ + platform_device_register_simple("ltq_stp", 0, <q_stp_resource, 1); +} + +/* asc ports - amazon se has its own serial mapping */ +static struct resource ltq_ase_asc_resources[] = { + { + .name = "asc0", + .start = LTQ_ASC1_BASE_ADDR, + .end = LTQ_ASC1_BASE_ADDR + LTQ_ASC_SIZE - 1, + .flags = IORESOURCE_MEM, + }, + IRQ_RES(tx, LTQ_ASC_ASE_TIR), + IRQ_RES(rx, LTQ_ASC_ASE_RIR), + IRQ_RES(err, LTQ_ASC_ASE_EIR), +}; + +void __init ltq_register_ase_asc(void) +{ + platform_device_register_simple("ltq_asc", 0, + ltq_ase_asc_resources, ARRAY_SIZE(ltq_ase_asc_resources)); +} + +/* ethernet */ +static struct resource ltq_etop_resources = { + .name = "etop", + .start = LTQ_ETOP_BASE_ADDR, + .end = LTQ_ETOP_BASE_ADDR + LTQ_ETOP_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device ltq_etop = { + .name = "ltq_etop", + .resource = <q_etop_resources, + .num_resources = 1, +}; + +void __init +ltq_register_etop(struct ltq_eth_data *eth) +{ + if (eth) { + ltq_etop.dev.platform_data = eth; + platform_device_register(<q_etop); + } +} diff --git a/arch/mips/lantiq/xway/devices.h b/arch/mips/lantiq/xway/devices.h new file mode 100644 index 000000000000..e90493471bc1 --- /dev/null +++ b/arch/mips/lantiq/xway/devices.h @@ -0,0 +1,20 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LTQ_DEVICES_XWAY_H__ +#define _LTQ_DEVICES_XWAY_H__ + +#include "../devices.h" +#include <linux/phy.h> + +extern void ltq_register_gpio(void); +extern void ltq_register_gpio_stp(void); +extern void ltq_register_ase_asc(void); +extern void ltq_register_etop(struct ltq_eth_data *eth); + +#endif diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c new file mode 100644 index 000000000000..4278a459d6c4 --- /dev/null +++ b/arch/mips/lantiq/xway/dma.c @@ -0,0 +1,253 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2011 John Crispin <blogic@openwrt.org> + */ + +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/io.h> +#include <linux/dma-mapping.h> + +#include <lantiq_soc.h> +#include <xway_dma.h> + +#define LTQ_DMA_CTRL 0x10 +#define LTQ_DMA_CPOLL 0x14 +#define LTQ_DMA_CS 0x18 +#define LTQ_DMA_CCTRL 0x1C +#define LTQ_DMA_CDBA 0x20 +#define LTQ_DMA_CDLEN 0x24 +#define LTQ_DMA_CIS 0x28 +#define LTQ_DMA_CIE 0x2C +#define LTQ_DMA_PS 0x40 +#define LTQ_DMA_PCTRL 0x44 +#define LTQ_DMA_IRNEN 0xf4 + +#define DMA_DESCPT BIT(3) /* descriptor complete irq */ +#define DMA_TX BIT(8) /* TX channel direction */ +#define DMA_CHAN_ON BIT(0) /* channel on / off bit */ +#define DMA_PDEN BIT(6) /* enable packet drop */ +#define DMA_CHAN_RST BIT(1) /* channel on / off bit */ +#define DMA_RESET BIT(0) /* channel on / off bit */ +#define DMA_IRQ_ACK 0x7e /* IRQ status register */ +#define DMA_POLL BIT(31) /* turn on channel polling */ +#define DMA_CLK_DIV4 BIT(6) /* polling clock divider */ +#define DMA_2W_BURST BIT(1) /* 2 word burst length */ +#define DMA_MAX_CHANNEL 20 /* the soc has 20 channels */ +#define DMA_ETOP_ENDIANESS (0xf << 8) /* endianess swap etop channels */ +#define DMA_WEIGHT (BIT(17) | BIT(16)) /* default channel wheight */ + +#define ltq_dma_r32(x) ltq_r32(ltq_dma_membase + (x)) +#define ltq_dma_w32(x, y) ltq_w32(x, ltq_dma_membase + (y)) +#define ltq_dma_w32_mask(x, y, z) ltq_w32_mask(x, y, \ + ltq_dma_membase + (z)) + +static struct resource ltq_dma_resource = { + .name = "dma", + .start = LTQ_DMA_BASE_ADDR, + .end = LTQ_DMA_BASE_ADDR + LTQ_DMA_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +static void __iomem *ltq_dma_membase; + +void +ltq_dma_enable_irq(struct ltq_dma_channel *ch) +{ + unsigned long flags; + + local_irq_save(flags); + ltq_dma_w32(ch->nr, LTQ_DMA_CS); + ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ltq_dma_enable_irq); + +void +ltq_dma_disable_irq(struct ltq_dma_channel *ch) +{ + unsigned long flags; + + local_irq_save(flags); + ltq_dma_w32(ch->nr, LTQ_DMA_CS); + ltq_dma_w32_mask(1 << ch->nr, 0, LTQ_DMA_IRNEN); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ltq_dma_disable_irq); + +void +ltq_dma_ack_irq(struct ltq_dma_channel *ch) +{ + unsigned long flags; + + local_irq_save(flags); + ltq_dma_w32(ch->nr, LTQ_DMA_CS); + ltq_dma_w32(DMA_IRQ_ACK, LTQ_DMA_CIS); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ltq_dma_ack_irq); + +void +ltq_dma_open(struct ltq_dma_channel *ch) +{ + unsigned long flag; + + local_irq_save(flag); + ltq_dma_w32(ch->nr, LTQ_DMA_CS); + ltq_dma_w32_mask(0, DMA_CHAN_ON, LTQ_DMA_CCTRL); + ltq_dma_enable_irq(ch); + local_irq_restore(flag); +} +EXPORT_SYMBOL_GPL(ltq_dma_open); + +void +ltq_dma_close(struct ltq_dma_channel *ch) +{ + unsigned long flag; + + local_irq_save(flag); + ltq_dma_w32(ch->nr, LTQ_DMA_CS); + ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL); + ltq_dma_disable_irq(ch); + local_irq_restore(flag); +} +EXPORT_SYMBOL_GPL(ltq_dma_close); + +static void +ltq_dma_alloc(struct ltq_dma_channel *ch) +{ + unsigned long flags; + + ch->desc = 0; + ch->desc_base = dma_alloc_coherent(NULL, + LTQ_DESC_NUM * LTQ_DESC_SIZE, + &ch->phys, GFP_ATOMIC); + memset(ch->desc_base, 0, LTQ_DESC_NUM * LTQ_DESC_SIZE); + + local_irq_save(flags); + ltq_dma_w32(ch->nr, LTQ_DMA_CS); + ltq_dma_w32(ch->phys, LTQ_DMA_CDBA); + ltq_dma_w32(LTQ_DESC_NUM, LTQ_DMA_CDLEN); + ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL); + wmb(); + ltq_dma_w32_mask(0, DMA_CHAN_RST, LTQ_DMA_CCTRL); + while (ltq_dma_r32(LTQ_DMA_CCTRL) & DMA_CHAN_RST) + ; + local_irq_restore(flags); +} + +void +ltq_dma_alloc_tx(struct ltq_dma_channel *ch) +{ + unsigned long flags; + + ltq_dma_alloc(ch); + + local_irq_save(flags); + ltq_dma_w32(DMA_DESCPT, LTQ_DMA_CIE); + ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN); + ltq_dma_w32(DMA_WEIGHT | DMA_TX, LTQ_DMA_CCTRL); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ltq_dma_alloc_tx); + +void +ltq_dma_alloc_rx(struct ltq_dma_channel *ch) +{ + unsigned long flags; + + ltq_dma_alloc(ch); + + local_irq_save(flags); + ltq_dma_w32(DMA_DESCPT, LTQ_DMA_CIE); + ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN); + ltq_dma_w32(DMA_WEIGHT, LTQ_DMA_CCTRL); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ltq_dma_alloc_rx); + +void +ltq_dma_free(struct ltq_dma_channel *ch) +{ + if (!ch->desc_base) + return; + ltq_dma_close(ch); + dma_free_coherent(NULL, LTQ_DESC_NUM * LTQ_DESC_SIZE, + ch->desc_base, ch->phys); +} +EXPORT_SYMBOL_GPL(ltq_dma_free); + +void +ltq_dma_init_port(int p) +{ + ltq_dma_w32(p, LTQ_DMA_PS); + switch (p) { + case DMA_PORT_ETOP: + /* + * Tell the DMA engine to swap the endianess of data frames and + * drop packets if the channel arbitration fails. + */ + ltq_dma_w32_mask(0, DMA_ETOP_ENDIANESS | DMA_PDEN, + LTQ_DMA_PCTRL); + break; + + case DMA_PORT_DEU: + ltq_dma_w32((DMA_2W_BURST << 4) | (DMA_2W_BURST << 2), + LTQ_DMA_PCTRL); + break; + + default: + break; + } +} +EXPORT_SYMBOL_GPL(ltq_dma_init_port); + +int __init +ltq_dma_init(void) +{ + int i; + + /* insert and request the memory region */ + if (insert_resource(&iomem_resource, <q_dma_resource) < 0) + panic("Failed to insert dma memory\n"); + + if (request_mem_region(ltq_dma_resource.start, + resource_size(<q_dma_resource), "dma") < 0) + panic("Failed to request dma memory\n"); + + /* remap dma register range */ + ltq_dma_membase = ioremap_nocache(ltq_dma_resource.start, + resource_size(<q_dma_resource)); + if (!ltq_dma_membase) + panic("Failed to remap dma memory\n"); + + /* power up and reset the dma engine */ + ltq_pmu_enable(PMU_DMA); + ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL); + + /* disable all interrupts */ + ltq_dma_w32(0, LTQ_DMA_IRNEN); + + /* reset/configure each channel */ + for (i = 0; i < DMA_MAX_CHANNEL; i++) { + ltq_dma_w32(i, LTQ_DMA_CS); + ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL); + ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL); + ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL); + } + return 0; +} + +postcore_initcall(ltq_dma_init); diff --git a/arch/mips/lantiq/xway/ebu.c b/arch/mips/lantiq/xway/ebu.c new file mode 100644 index 000000000000..66eb52fa50a1 --- /dev/null +++ b/arch/mips/lantiq/xway/ebu.c @@ -0,0 +1,53 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * EBU - the external bus unit attaches PCI, NOR and NAND + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/version.h> +#include <linux/ioport.h> + +#include <lantiq_soc.h> + +/* all access to the ebu must be locked */ +DEFINE_SPINLOCK(ebu_lock); +EXPORT_SYMBOL_GPL(ebu_lock); + +static struct resource ltq_ebu_resource = { + .name = "ebu", + .start = LTQ_EBU_BASE_ADDR, + .end = LTQ_EBU_BASE_ADDR + LTQ_EBU_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +/* remapped base addr of the clock unit and external bus unit */ +void __iomem *ltq_ebu_membase; + +static int __init lantiq_ebu_init(void) +{ + /* insert and request the memory region */ + if (insert_resource(&iomem_resource, <q_ebu_resource) < 0) + panic("Failed to insert ebu memory\n"); + + if (request_mem_region(ltq_ebu_resource.start, + resource_size(<q_ebu_resource), "ebu") < 0) + panic("Failed to request ebu memory\n"); + + /* remap ebu register range */ + ltq_ebu_membase = ioremap_nocache(ltq_ebu_resource.start, + resource_size(<q_ebu_resource)); + if (!ltq_ebu_membase) + panic("Failed to remap ebu memory\n"); + + /* make sure to unprotect the memory region where flash is located */ + ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_BUSCON0) & ~EBU_WRDIS, LTQ_EBU_BUSCON0); + return 0; +} + +postcore_initcall(lantiq_ebu_init); diff --git a/arch/mips/lantiq/xway/gpio.c b/arch/mips/lantiq/xway/gpio.c new file mode 100644 index 000000000000..a321451a5455 --- /dev/null +++ b/arch/mips/lantiq/xway/gpio.c @@ -0,0 +1,195 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/gpio.h> +#include <linux/ioport.h> +#include <linux/io.h> + +#include <lantiq_soc.h> + +#define LTQ_GPIO_OUT 0x00 +#define LTQ_GPIO_IN 0x04 +#define LTQ_GPIO_DIR 0x08 +#define LTQ_GPIO_ALTSEL0 0x0C +#define LTQ_GPIO_ALTSEL1 0x10 +#define LTQ_GPIO_OD 0x14 + +#define PINS_PER_PORT 16 +#define MAX_PORTS 3 + +#define ltq_gpio_getbit(m, r, p) (!!(ltq_r32(m + r) & (1 << p))) +#define ltq_gpio_setbit(m, r, p) ltq_w32_mask(0, (1 << p), m + r) +#define ltq_gpio_clearbit(m, r, p) ltq_w32_mask((1 << p), 0, m + r) + +struct ltq_gpio { + void __iomem *membase; + struct gpio_chip chip; +}; + +static struct ltq_gpio ltq_gpio_port[MAX_PORTS]; + +int gpio_to_irq(unsigned int gpio) +{ + return -EINVAL; +} +EXPORT_SYMBOL(gpio_to_irq); + +int irq_to_gpio(unsigned int gpio) +{ + return -EINVAL; +} +EXPORT_SYMBOL(irq_to_gpio); + +int ltq_gpio_request(unsigned int pin, unsigned int alt0, + unsigned int alt1, unsigned int dir, const char *name) +{ + int id = 0; + + if (pin >= (MAX_PORTS * PINS_PER_PORT)) + return -EINVAL; + if (gpio_request(pin, name)) { + pr_err("failed to setup lantiq gpio: %s\n", name); + return -EBUSY; + } + if (dir) + gpio_direction_output(pin, 1); + else + gpio_direction_input(pin); + while (pin >= PINS_PER_PORT) { + pin -= PINS_PER_PORT; + id++; + } + if (alt0) + ltq_gpio_setbit(ltq_gpio_port[id].membase, + LTQ_GPIO_ALTSEL0, pin); + else + ltq_gpio_clearbit(ltq_gpio_port[id].membase, + LTQ_GPIO_ALTSEL0, pin); + if (alt1) + ltq_gpio_setbit(ltq_gpio_port[id].membase, + LTQ_GPIO_ALTSEL1, pin); + else + ltq_gpio_clearbit(ltq_gpio_port[id].membase, + LTQ_GPIO_ALTSEL1, pin); + return 0; +} +EXPORT_SYMBOL(ltq_gpio_request); + +static void ltq_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) +{ + struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip); + + if (value) + ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_OUT, offset); + else + ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_OUT, offset); +} + +static int ltq_gpio_get(struct gpio_chip *chip, unsigned int offset) +{ + struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip); + + return ltq_gpio_getbit(ltq_gpio->membase, LTQ_GPIO_IN, offset); +} + +static int ltq_gpio_direction_input(struct gpio_chip *chip, unsigned int offset) +{ + struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip); + + ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_OD, offset); + ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_DIR, offset); + + return 0; +} + +static int ltq_gpio_direction_output(struct gpio_chip *chip, + unsigned int offset, int value) +{ + struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip); + + ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_OD, offset); + ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_DIR, offset); + ltq_gpio_set(chip, offset, value); + + return 0; +} + +static int ltq_gpio_req(struct gpio_chip *chip, unsigned offset) +{ + struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip); + + ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_ALTSEL0, offset); + ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_ALTSEL1, offset); + return 0; +} + +static int ltq_gpio_probe(struct platform_device *pdev) +{ + struct resource *res; + + if (pdev->id >= MAX_PORTS) { + dev_err(&pdev->dev, "invalid gpio port %d\n", + pdev->id); + return -EINVAL; + } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "failed to get memory for gpio port %d\n", + pdev->id); + return -ENOENT; + } + res = devm_request_mem_region(&pdev->dev, res->start, + resource_size(res), dev_name(&pdev->dev)); + if (!res) { + dev_err(&pdev->dev, + "failed to request memory for gpio port %d\n", + pdev->id); + return -EBUSY; + } + ltq_gpio_port[pdev->id].membase = devm_ioremap_nocache(&pdev->dev, + res->start, resource_size(res)); + if (!ltq_gpio_port[pdev->id].membase) { + dev_err(&pdev->dev, "failed to remap memory for gpio port %d\n", + pdev->id); + return -ENOMEM; + } + ltq_gpio_port[pdev->id].chip.label = "ltq_gpio"; + ltq_gpio_port[pdev->id].chip.direction_input = ltq_gpio_direction_input; + ltq_gpio_port[pdev->id].chip.direction_output = + ltq_gpio_direction_output; + ltq_gpio_port[pdev->id].chip.get = ltq_gpio_get; + ltq_gpio_port[pdev->id].chip.set = ltq_gpio_set; + ltq_gpio_port[pdev->id].chip.request = ltq_gpio_req; + ltq_gpio_port[pdev->id].chip.base = PINS_PER_PORT * pdev->id; + ltq_gpio_port[pdev->id].chip.ngpio = PINS_PER_PORT; + platform_set_drvdata(pdev, <q_gpio_port[pdev->id]); + return gpiochip_add(<q_gpio_port[pdev->id].chip); +} + +static struct platform_driver +ltq_gpio_driver = { + .probe = ltq_gpio_probe, + .driver = { + .name = "ltq_gpio", + .owner = THIS_MODULE, + }, +}; + +int __init ltq_gpio_init(void) +{ + int ret = platform_driver_register(<q_gpio_driver); + + if (ret) + pr_info("ltq_gpio : Error registering platfom driver!"); + return ret; +} + +postcore_initcall(ltq_gpio_init); diff --git a/arch/mips/lantiq/xway/gpio_ebu.c b/arch/mips/lantiq/xway/gpio_ebu.c new file mode 100644 index 000000000000..a479355abdb9 --- /dev/null +++ b/arch/mips/lantiq/xway/gpio_ebu.c @@ -0,0 +1,126 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/platform_device.h> +#include <linux/mutex.h> +#include <linux/gpio.h> +#include <linux/io.h> + +#include <lantiq_soc.h> + +/* + * By attaching hardware latches to the EBU it is possible to create output + * only gpios. This driver configures a special memory address, which when + * written to outputs 16 bit to the latches. + */ + +#define LTQ_EBU_BUSCON 0x1e7ff /* 16 bit access, slowest timing */ +#define LTQ_EBU_WP 0x80000000 /* write protect bit */ + +/* we keep a shadow value of the last value written to the ebu */ +static int ltq_ebu_gpio_shadow = 0x0; +static void __iomem *ltq_ebu_gpio_membase; + +static void ltq_ebu_apply(void) +{ + unsigned long flags; + + spin_lock_irqsave(&ebu_lock, flags); + ltq_ebu_w32(LTQ_EBU_BUSCON, LTQ_EBU_BUSCON1); + *((__u16 *)ltq_ebu_gpio_membase) = ltq_ebu_gpio_shadow; + ltq_ebu_w32(LTQ_EBU_BUSCON | LTQ_EBU_WP, LTQ_EBU_BUSCON1); + spin_unlock_irqrestore(&ebu_lock, flags); +} + +static void ltq_ebu_set(struct gpio_chip *chip, unsigned offset, int value) +{ + if (value) + ltq_ebu_gpio_shadow |= (1 << offset); + else + ltq_ebu_gpio_shadow &= ~(1 << offset); + ltq_ebu_apply(); +} + +static int ltq_ebu_direction_output(struct gpio_chip *chip, unsigned offset, + int value) +{ + ltq_ebu_set(chip, offset, value); + + return 0; +} + +static struct gpio_chip ltq_ebu_chip = { + .label = "ltq_ebu", + .direction_output = ltq_ebu_direction_output, + .set = ltq_ebu_set, + .base = 72, + .ngpio = 16, + .can_sleep = 1, + .owner = THIS_MODULE, +}; + +static int ltq_ebu_probe(struct platform_device *pdev) +{ + int ret = 0; + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + if (!res) { + dev_err(&pdev->dev, "failed to get memory resource\n"); + return -ENOENT; + } + + res = devm_request_mem_region(&pdev->dev, res->start, + resource_size(res), dev_name(&pdev->dev)); + if (!res) { + dev_err(&pdev->dev, "failed to request memory resource\n"); + return -EBUSY; + } + + ltq_ebu_gpio_membase = devm_ioremap_nocache(&pdev->dev, res->start, + resource_size(res)); + if (!ltq_ebu_gpio_membase) { + dev_err(&pdev->dev, "Failed to ioremap mem region\n"); + return -ENOMEM; + } + + /* grab the default shadow value passed form the platform code */ + ltq_ebu_gpio_shadow = (unsigned int) pdev->dev.platform_data; + + /* tell the ebu controller which memory address we will be using */ + ltq_ebu_w32(pdev->resource->start | 0x1, LTQ_EBU_ADDRSEL1); + + /* write protect the region */ + ltq_ebu_w32(LTQ_EBU_BUSCON | LTQ_EBU_WP, LTQ_EBU_BUSCON1); + + ret = gpiochip_add(<q_ebu_chip); + if (!ret) + ltq_ebu_apply(); + return ret; +} + +static struct platform_driver ltq_ebu_driver = { + .probe = ltq_ebu_probe, + .driver = { + .name = "ltq_ebu", + .owner = THIS_MODULE, + }, +}; + +static int __init ltq_ebu_init(void) +{ + int ret = platform_driver_register(<q_ebu_driver); + + if (ret) + pr_info("ltq_ebu : Error registering platfom driver!"); + return ret; +} + +postcore_initcall(ltq_ebu_init); diff --git a/arch/mips/lantiq/xway/gpio_stp.c b/arch/mips/lantiq/xway/gpio_stp.c new file mode 100644 index 000000000000..67d59d690340 --- /dev/null +++ b/arch/mips/lantiq/xway/gpio_stp.c @@ -0,0 +1,157 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2007 John Crispin <blogic@openwrt.org> + * + */ + +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/platform_device.h> +#include <linux/mutex.h> +#include <linux/io.h> +#include <linux/gpio.h> + +#include <lantiq_soc.h> + +#define LTQ_STP_CON0 0x00 +#define LTQ_STP_CON1 0x04 +#define LTQ_STP_CPU0 0x08 +#define LTQ_STP_CPU1 0x0C +#define LTQ_STP_AR 0x10 + +#define LTQ_STP_CON_SWU (1 << 31) +#define LTQ_STP_2HZ 0 +#define LTQ_STP_4HZ (1 << 23) +#define LTQ_STP_8HZ (2 << 23) +#define LTQ_STP_10HZ (3 << 23) +#define LTQ_STP_SPEED_MASK (0xf << 23) +#define LTQ_STP_UPD_FPI (1 << 31) +#define LTQ_STP_UPD_MASK (3 << 30) +#define LTQ_STP_ADSL_SRC (3 << 24) + +#define LTQ_STP_GROUP0 (1 << 0) + +#define LTQ_STP_RISING 0 +#define LTQ_STP_FALLING (1 << 26) +#define LTQ_STP_EDGE_MASK (1 << 26) + +#define ltq_stp_r32(reg) __raw_readl(ltq_stp_membase + reg) +#define ltq_stp_w32(val, reg) __raw_writel(val, ltq_stp_membase + reg) +#define ltq_stp_w32_mask(clear, set, reg) \ + ltq_w32((ltq_r32(ltq_stp_membase + reg) & ~(clear)) | (set), \ + ltq_stp_membase + (reg)) + +static int ltq_stp_shadow = 0xffff; +static void __iomem *ltq_stp_membase; + +static void ltq_stp_set(struct gpio_chip *chip, unsigned offset, int value) +{ + if (value) + ltq_stp_shadow |= (1 << offset); + else + ltq_stp_shadow &= ~(1 << offset); + ltq_stp_w32(ltq_stp_shadow, LTQ_STP_CPU0); +} + +static int ltq_stp_direction_output(struct gpio_chip *chip, unsigned offset, + int value) +{ + ltq_stp_set(chip, offset, value); + + return 0; +} + +static struct gpio_chip ltq_stp_chip = { + .label = "ltq_stp", + .direction_output = ltq_stp_direction_output, + .set = ltq_stp_set, + .base = 48, + .ngpio = 24, + .can_sleep = 1, + .owner = THIS_MODULE, +}; + +static int ltq_stp_hw_init(void) +{ + /* the 3 pins used to control the external stp */ + ltq_gpio_request(4, 1, 0, 1, "stp-st"); + ltq_gpio_request(5, 1, 0, 1, "stp-d"); + ltq_gpio_request(6, 1, 0, 1, "stp-sh"); + + /* sane defaults */ + ltq_stp_w32(0, LTQ_STP_AR); + ltq_stp_w32(0, LTQ_STP_CPU0); + ltq_stp_w32(0, LTQ_STP_CPU1); + ltq_stp_w32(LTQ_STP_CON_SWU, LTQ_STP_CON0); + ltq_stp_w32(0, LTQ_STP_CON1); + + /* rising or falling edge */ + ltq_stp_w32_mask(LTQ_STP_EDGE_MASK, LTQ_STP_FALLING, LTQ_STP_CON0); + + /* per default stp 15-0 are set */ + ltq_stp_w32_mask(0, LTQ_STP_GROUP0, LTQ_STP_CON1); + + /* stp are update periodically by the FPI bus */ + ltq_stp_w32_mask(LTQ_STP_UPD_MASK, LTQ_STP_UPD_FPI, LTQ_STP_CON1); + + /* set stp update speed */ + ltq_stp_w32_mask(LTQ_STP_SPEED_MASK, LTQ_STP_8HZ, LTQ_STP_CON1); + + /* tell the hardware that pin (led) 0 and 1 are controlled + * by the dsl arc + */ + ltq_stp_w32_mask(0, LTQ_STP_ADSL_SRC, LTQ_STP_CON0); + + ltq_pmu_enable(PMU_LED); + return 0; +} + +static int __devinit ltq_stp_probe(struct platform_device *pdev) +{ + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int ret = 0; + + if (!res) + return -ENOENT; + res = devm_request_mem_region(&pdev->dev, res->start, + resource_size(res), dev_name(&pdev->dev)); + if (!res) { + dev_err(&pdev->dev, "failed to request STP memory\n"); + return -EBUSY; + } + ltq_stp_membase = devm_ioremap_nocache(&pdev->dev, res->start, + resource_size(res)); + if (!ltq_stp_membase) { + dev_err(&pdev->dev, "failed to remap STP memory\n"); + return -ENOMEM; + } + ret = gpiochip_add(<q_stp_chip); + if (!ret) + ret = ltq_stp_hw_init(); + + return ret; +} + +static struct platform_driver ltq_stp_driver = { + .probe = ltq_stp_probe, + .driver = { + .name = "ltq_stp", + .owner = THIS_MODULE, + }, +}; + +int __init ltq_stp_init(void) +{ + int ret = platform_driver_register(<q_stp_driver); + + if (ret) + pr_info("ltq_stp: error registering platfom driver"); + return ret; +} + +postcore_initcall(ltq_stp_init); diff --git a/arch/mips/lantiq/xway/mach-easy50601.c b/arch/mips/lantiq/xway/mach-easy50601.c new file mode 100644 index 000000000000..d5aaf637ab19 --- /dev/null +++ b/arch/mips/lantiq/xway/mach-easy50601.c @@ -0,0 +1,57 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/physmap.h> +#include <linux/input.h> + +#include <lantiq.h> + +#include "../machtypes.h" +#include "devices.h" + +static struct mtd_partition easy50601_partitions[] = { + { + .name = "uboot", + .offset = 0x0, + .size = 0x10000, + }, + { + .name = "uboot_env", + .offset = 0x10000, + .size = 0x10000, + }, + { + .name = "linux", + .offset = 0x20000, + .size = 0xE0000, + }, + { + .name = "rootfs", + .offset = 0x100000, + .size = 0x300000, + }, +}; + +static struct physmap_flash_data easy50601_flash_data = { + .nr_parts = ARRAY_SIZE(easy50601_partitions), + .parts = easy50601_partitions, +}; + +static void __init easy50601_init(void) +{ + ltq_register_nor(&easy50601_flash_data); +} + +MIPS_MACHINE(LTQ_MACH_EASY50601, + "EASY50601", + "EASY50601 Eval Board", + easy50601_init); diff --git a/arch/mips/lantiq/xway/mach-easy50712.c b/arch/mips/lantiq/xway/mach-easy50712.c new file mode 100644 index 000000000000..ea5027b3239d --- /dev/null +++ b/arch/mips/lantiq/xway/mach-easy50712.c @@ -0,0 +1,74 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/physmap.h> +#include <linux/input.h> +#include <linux/phy.h> + +#include <lantiq_soc.h> +#include <irq.h> + +#include "../machtypes.h" +#include "devices.h" + +static struct mtd_partition easy50712_partitions[] = { + { + .name = "uboot", + .offset = 0x0, + .size = 0x10000, + }, + { + .name = "uboot_env", + .offset = 0x10000, + .size = 0x10000, + }, + { + .name = "linux", + .offset = 0x20000, + .size = 0xe0000, + }, + { + .name = "rootfs", + .offset = 0x100000, + .size = 0x300000, + }, +}; + +static struct physmap_flash_data easy50712_flash_data = { + .nr_parts = ARRAY_SIZE(easy50712_partitions), + .parts = easy50712_partitions, +}; + +static struct ltq_pci_data ltq_pci_data = { + .clock = PCI_CLOCK_INT, + .gpio = PCI_GNT1 | PCI_REQ1, + .irq = { + [14] = INT_NUM_IM0_IRL0 + 22, + }, +}; + +static struct ltq_eth_data ltq_eth_data = { + .mii_mode = PHY_INTERFACE_MODE_MII, +}; + +static void __init easy50712_init(void) +{ + ltq_register_gpio_stp(); + ltq_register_nor(&easy50712_flash_data); + ltq_register_pci(<q_pci_data); + ltq_register_etop(<q_eth_data); +} + +MIPS_MACHINE(LTQ_MACH_EASY50712, + "EASY50712", + "EASY50712 Eval Board", + easy50712_init); diff --git a/arch/mips/lantiq/xway/pmu.c b/arch/mips/lantiq/xway/pmu.c new file mode 100644 index 000000000000..9d69f01e352b --- /dev/null +++ b/arch/mips/lantiq/xway/pmu.c @@ -0,0 +1,70 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/version.h> +#include <linux/ioport.h> + +#include <lantiq_soc.h> + +/* PMU - the power management unit allows us to turn part of the core + * on and off + */ + +/* the enable / disable registers */ +#define LTQ_PMU_PWDCR 0x1C +#define LTQ_PMU_PWDSR 0x20 + +#define ltq_pmu_w32(x, y) ltq_w32((x), ltq_pmu_membase + (y)) +#define ltq_pmu_r32(x) ltq_r32(ltq_pmu_membase + (x)) + +static struct resource ltq_pmu_resource = { + .name = "pmu", + .start = LTQ_PMU_BASE_ADDR, + .end = LTQ_PMU_BASE_ADDR + LTQ_PMU_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +static void __iomem *ltq_pmu_membase; + +void ltq_pmu_enable(unsigned int module) +{ + int err = 1000000; + + ltq_pmu_w32(ltq_pmu_r32(LTQ_PMU_PWDCR) & ~module, LTQ_PMU_PWDCR); + do {} while (--err && (ltq_pmu_r32(LTQ_PMU_PWDSR) & module)); + + if (!err) + panic("activating PMU module failed!\n"); +} +EXPORT_SYMBOL(ltq_pmu_enable); + +void ltq_pmu_disable(unsigned int module) +{ + ltq_pmu_w32(ltq_pmu_r32(LTQ_PMU_PWDCR) | module, LTQ_PMU_PWDCR); +} +EXPORT_SYMBOL(ltq_pmu_disable); + +int __init ltq_pmu_init(void) +{ + if (insert_resource(&iomem_resource, <q_pmu_resource) < 0) + panic("Failed to insert pmu memory\n"); + + if (request_mem_region(ltq_pmu_resource.start, + resource_size(<q_pmu_resource), "pmu") < 0) + panic("Failed to request pmu memory\n"); + + ltq_pmu_membase = ioremap_nocache(ltq_pmu_resource.start, + resource_size(<q_pmu_resource)); + if (!ltq_pmu_membase) + panic("Failed to remap pmu memory\n"); + return 0; +} + +core_initcall(ltq_pmu_init); diff --git a/arch/mips/lantiq/xway/prom-ase.c b/arch/mips/lantiq/xway/prom-ase.c new file mode 100644 index 000000000000..abe49f4db57f --- /dev/null +++ b/arch/mips/lantiq/xway/prom-ase.c @@ -0,0 +1,39 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/module.h> +#include <linux/clk.h> +#include <asm/bootinfo.h> +#include <asm/time.h> + +#include <lantiq_soc.h> + +#include "../prom.h" + +#define SOC_AMAZON_SE "Amazon_SE" + +#define PART_SHIFT 12 +#define PART_MASK 0x0FFFFFFF +#define REV_SHIFT 28 +#define REV_MASK 0xF0000000 + +void __init ltq_soc_detect(struct ltq_soc_info *i) +{ + i->partnum = (ltq_r32(LTQ_MPS_CHIPID) & PART_MASK) >> PART_SHIFT; + i->rev = (ltq_r32(LTQ_MPS_CHIPID) & REV_MASK) >> REV_SHIFT; + switch (i->partnum) { + case SOC_ID_AMAZON_SE: + i->name = SOC_AMAZON_SE; + i->type = SOC_TYPE_AMAZON_SE; + break; + + default: + unreachable(); + break; + } +} diff --git a/arch/mips/lantiq/xway/prom-xway.c b/arch/mips/lantiq/xway/prom-xway.c new file mode 100644 index 000000000000..1686692ac24d --- /dev/null +++ b/arch/mips/lantiq/xway/prom-xway.c @@ -0,0 +1,54 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/module.h> +#include <linux/clk.h> +#include <asm/bootinfo.h> +#include <asm/time.h> + +#include <lantiq_soc.h> + +#include "../prom.h" + +#define SOC_DANUBE "Danube" +#define SOC_TWINPASS "Twinpass" +#define SOC_AR9 "AR9" + +#define PART_SHIFT 12 +#define PART_MASK 0x0FFFFFFF +#define REV_SHIFT 28 +#define REV_MASK 0xF0000000 + +void __init ltq_soc_detect(struct ltq_soc_info *i) +{ + i->partnum = (ltq_r32(LTQ_MPS_CHIPID) & PART_MASK) >> PART_SHIFT; + i->rev = (ltq_r32(LTQ_MPS_CHIPID) & REV_MASK) >> REV_SHIFT; + switch (i->partnum) { + case SOC_ID_DANUBE1: + case SOC_ID_DANUBE2: + i->name = SOC_DANUBE; + i->type = SOC_TYPE_DANUBE; + break; + + case SOC_ID_TWINPASS: + i->name = SOC_TWINPASS; + i->type = SOC_TYPE_DANUBE; + break; + + case SOC_ID_ARX188: + case SOC_ID_ARX168: + case SOC_ID_ARX182: + i->name = SOC_AR9; + i->type = SOC_TYPE_AR9; + break; + + default: + unreachable(); + break; + } +} diff --git a/arch/mips/lantiq/xway/reset.c b/arch/mips/lantiq/xway/reset.c new file mode 100644 index 000000000000..a1be36d0e490 --- /dev/null +++ b/arch/mips/lantiq/xway/reset.c @@ -0,0 +1,91 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/pm.h> +#include <linux/module.h> +#include <asm/reboot.h> + +#include <lantiq_soc.h> + +#define ltq_rcu_w32(x, y) ltq_w32((x), ltq_rcu_membase + (y)) +#define ltq_rcu_r32(x) ltq_r32(ltq_rcu_membase + (x)) + +/* register definitions */ +#define LTQ_RCU_RST 0x0010 +#define LTQ_RCU_RST_ALL 0x40000000 + +#define LTQ_RCU_RST_STAT 0x0014 +#define LTQ_RCU_STAT_SHIFT 26 + +static struct resource ltq_rcu_resource = { + .name = "rcu", + .start = LTQ_RCU_BASE_ADDR, + .end = LTQ_RCU_BASE_ADDR + LTQ_RCU_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +/* remapped base addr of the reset control unit */ +static void __iomem *ltq_rcu_membase; + +/* This function is used by the watchdog driver */ +int ltq_reset_cause(void) +{ + u32 val = ltq_rcu_r32(LTQ_RCU_RST_STAT); + return val >> LTQ_RCU_STAT_SHIFT; +} +EXPORT_SYMBOL_GPL(ltq_reset_cause); + +static void ltq_machine_restart(char *command) +{ + pr_notice("System restart\n"); + local_irq_disable(); + ltq_rcu_w32(ltq_rcu_r32(LTQ_RCU_RST) | LTQ_RCU_RST_ALL, LTQ_RCU_RST); + unreachable(); +} + +static void ltq_machine_halt(void) +{ + pr_notice("System halted.\n"); + local_irq_disable(); + unreachable(); +} + +static void ltq_machine_power_off(void) +{ + pr_notice("Please turn off the power now.\n"); + local_irq_disable(); + unreachable(); +} + +static int __init mips_reboot_setup(void) +{ + /* insert and request the memory region */ + if (insert_resource(&iomem_resource, <q_rcu_resource) < 0) + panic("Failed to insert rcu memory\n"); + + if (request_mem_region(ltq_rcu_resource.start, + resource_size(<q_rcu_resource), "rcu") < 0) + panic("Failed to request rcu memory\n"); + + /* remap rcu register range */ + ltq_rcu_membase = ioremap_nocache(ltq_rcu_resource.start, + resource_size(<q_rcu_resource)); + if (!ltq_rcu_membase) + panic("Failed to remap rcu memory\n"); + + _machine_restart = ltq_machine_restart; + _machine_halt = ltq_machine_halt; + pm_power_off = ltq_machine_power_off; + + return 0; +} + +arch_initcall(mips_reboot_setup); diff --git a/arch/mips/lantiq/xway/setup-ase.c b/arch/mips/lantiq/xway/setup-ase.c new file mode 100644 index 000000000000..f6f326798a39 --- /dev/null +++ b/arch/mips/lantiq/xway/setup-ase.c @@ -0,0 +1,19 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2011 John Crispin <blogic@openwrt.org> + */ + +#include <lantiq_soc.h> + +#include "../prom.h" +#include "devices.h" + +void __init ltq_soc_setup(void) +{ + ltq_register_ase_asc(); + ltq_register_gpio(); + ltq_register_wdt(); +} diff --git a/arch/mips/lantiq/xway/setup-xway.c b/arch/mips/lantiq/xway/setup-xway.c new file mode 100644 index 000000000000..c292f643a858 --- /dev/null +++ b/arch/mips/lantiq/xway/setup-xway.c @@ -0,0 +1,20 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2011 John Crispin <blogic@openwrt.org> + */ + +#include <lantiq_soc.h> + +#include "../prom.h" +#include "devices.h" + +void __init ltq_soc_setup(void) +{ + ltq_register_asc(0); + ltq_register_asc(1); + ltq_register_gpio(); + ltq_register_wdt(); +} diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 2adead5a8a37..b2cad4fd5fc4 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o obj-$(CONFIG_CPU_TX49XX) += dump_tlb.o obj-$(CONFIG_CPU_VR41XX) += dump_tlb.o obj-$(CONFIG_CPU_CAVIUM_OCTEON) += dump_tlb.o +obj-$(CONFIG_CPU_XLR) += dump_tlb.o # libgcc-style stuff needed in the kernel obj-y += ashldi3.o ashrdi3.o cmpdi2.o lshrdi3.o ucmpdi2.o diff --git a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c index 8c807c965199..0cb1b9760e34 100644 --- a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c +++ b/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c @@ -201,8 +201,6 @@ static struct clocksource clocksource_mfgpt = { .rating = 120, /* Functional for real use, but not desired */ .read = mfgpt_read, .mask = CLOCKSOURCE_MASK(32), - .mult = 0, - .shift = 22, }; int __init init_mfgpt_clocksource(void) @@ -210,8 +208,7 @@ int __init init_mfgpt_clocksource(void) if (num_possible_cpus() > 1) /* MFGPT does not scale! */ return 0; - clocksource_mfgpt.mult = clocksource_hz2mult(MFGPT_TICK_RATE, 22); - return clocksource_register(&clocksource_mfgpt); + return clocksource_register_hz(&clocksource_mfgpt, MFGPT_TICK_RATE); } arch_initcall(init_mfgpt_clocksource); diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index d679c772d082..4d8c1623eee2 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -3,7 +3,8 @@ # obj-y += cache.o dma-default.o extable.o fault.o \ - init.o tlbex.o tlbex-fault.o uasm.o page.o + init.o mmap.o tlbex.o tlbex-fault.o uasm.o \ + page.o obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o obj-$(CONFIG_64BIT) += pgtable-64.o @@ -29,6 +30,7 @@ obj-$(CONFIG_CPU_TX39XX) += c-tx39.o tlb-r3k.o obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o tlb-r4k.o obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o tlb-r4k.o obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o +obj-$(CONFIG_CPU_XLR) += c-r4k.o tlb-r4k.o cex-gen.o obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 71bddf8f7d25..d9bc5d3593b6 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1006,6 +1006,7 @@ static void __cpuinit probe_pcache(void) case CPU_25KF: case CPU_SB1: case CPU_SB1A: + case CPU_XLR: c->dcache.flags |= MIPS_CACHE_PINDEX; break; diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c new file mode 100644 index 000000000000..ae3c20a9556e --- /dev/null +++ b/arch/mips/mm/mmap.c @@ -0,0 +1,122 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2011 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/sched.h> + +unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ + +EXPORT_SYMBOL(shm_align_mask); + +#define COLOUR_ALIGN(addr,pgoff) \ + ((((addr) + shm_align_mask) & ~shm_align_mask) + \ + (((pgoff) << PAGE_SHIFT) & shm_align_mask)) + +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct vm_area_struct * vmm; + int do_color_align; + + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + /* Even MAP_FIXED mappings must reside within TASK_SIZE. */ + if (TASK_SIZE - len < addr) + return -EINVAL; + + /* + * We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + return -EINVAL; + return addr; + } + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + vmm = find_vma(current->mm, addr); + if (TASK_SIZE - len >= addr && + (!vmm || addr + len <= vmm->vm_start)) + return addr; + } + addr = current->mm->mmap_base; + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { + /* At this point: (!vmm || addr < vmm->vm_end). */ + if (TASK_SIZE - len < addr) + return -ENOMEM; + if (!vmm || addr + len <= vmm->vm_start) + return addr; + addr = vmm->vm_end; + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + } +} + +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) { + random_factor = get_random_int(); + random_factor = random_factor << PAGE_SHIFT; + if (TASK_IS_32BIT_ADDR) + random_factor &= 0xfffffful; + else + random_factor &= 0xffffffful; + } + + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; +} + +static inline unsigned long brk_rnd(void) +{ + unsigned long rnd = get_random_int(); + + rnd = rnd << PAGE_SHIFT; + /* 8MB for 32bit, 256MB for 64bit */ + if (TASK_IS_32BIT_ADDR) + rnd = rnd & 0x7ffffful; + else + rnd = rnd & 0xffffffful; + + return rnd; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long base = mm->brk; + unsigned long ret; + + ret = PAGE_ALIGN(base + brk_rnd()); + + if (ret < mm->brk) + return mm->brk; + + return ret; +} diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index f5734c2c8097..424ed4b92e6d 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -404,6 +404,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_5KC: case CPU_TX49XX: case CPU_PR4450: + case CPU_XLR: uasm_i_nop(p); tlbw(p); break; diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index e85c977328da..1d36c511a7a5 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -308,6 +308,8 @@ static void ipi_call_dispatch(void) static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) { + scheduler_ipi(); + return IRQ_HANDLED; } diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig new file mode 100644 index 000000000000..a5ca743613f2 --- /dev/null +++ b/arch/mips/netlogic/Kconfig @@ -0,0 +1,5 @@ +config NLM_COMMON + bool + +config NLM_XLR + bool diff --git a/arch/mips/netlogic/xlr/Makefile b/arch/mips/netlogic/xlr/Makefile new file mode 100644 index 000000000000..9bd3f731f62e --- /dev/null +++ b/arch/mips/netlogic/xlr/Makefile @@ -0,0 +1,5 @@ +obj-y += setup.o platform.o irq.o setup.o time.o +obj-$(CONFIG_SMP) += smp.o smpboot.o +obj-$(CONFIG_EARLY_PRINTK) += xlr_console.o + +EXTRA_CFLAGS += -Werror diff --git a/arch/mips/netlogic/xlr/irq.c b/arch/mips/netlogic/xlr/irq.c new file mode 100644 index 000000000000..1446d58e364c --- /dev/null +++ b/arch/mips/netlogic/xlr/irq.c @@ -0,0 +1,300 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/mm.h> + +#include <asm/mipsregs.h> + +#include <asm/netlogic/xlr/iomap.h> +#include <asm/netlogic/xlr/pic.h> +#include <asm/netlogic/xlr/xlr.h> + +#include <asm/netlogic/interrupt.h> +#include <asm/netlogic/mips-extns.h> + +static u64 nlm_irq_mask; +static DEFINE_SPINLOCK(nlm_pic_lock); + +static void xlr_pic_enable(struct irq_data *d) +{ + nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET); + unsigned long flags; + nlm_reg_t reg; + int irq = d->irq; + + WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq); + + spin_lock_irqsave(&nlm_pic_lock, flags); + reg = netlogic_read_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE); + netlogic_write_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE, + reg | (1 << 6) | (1 << 30) | (1 << 31)); + spin_unlock_irqrestore(&nlm_pic_lock, flags); +} + +static void xlr_pic_mask(struct irq_data *d) +{ + nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET); + unsigned long flags; + nlm_reg_t reg; + int irq = d->irq; + + WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq); + + spin_lock_irqsave(&nlm_pic_lock, flags); + reg = netlogic_read_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE); + netlogic_write_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE, + reg | (1 << 6) | (1 << 30) | (0 << 31)); + spin_unlock_irqrestore(&nlm_pic_lock, flags); +} + +#ifdef CONFIG_PCI +/* Extra ACK needed for XLR on chip PCI controller */ +static void xlr_pci_ack(struct irq_data *d) +{ + nlm_reg_t *pci_mmio = netlogic_io_mmio(NETLOGIC_IO_PCIX_OFFSET); + + netlogic_read_reg(pci_mmio, (0x140 >> 2)); +} + +/* Extra ACK needed for XLS on chip PCIe controller */ +static void xls_pcie_ack(struct irq_data *d) +{ + nlm_reg_t *pcie_mmio_le = netlogic_io_mmio(NETLOGIC_IO_PCIE_1_OFFSET); + + switch (d->irq) { + case PIC_PCIE_LINK0_IRQ: + netlogic_write_reg(pcie_mmio_le, (0x90 >> 2), 0xffffffff); + break; + case PIC_PCIE_LINK1_IRQ: + netlogic_write_reg(pcie_mmio_le, (0x94 >> 2), 0xffffffff); + break; + case PIC_PCIE_LINK2_IRQ: + netlogic_write_reg(pcie_mmio_le, (0x190 >> 2), 0xffffffff); + break; + case PIC_PCIE_LINK3_IRQ: + netlogic_write_reg(pcie_mmio_le, (0x194 >> 2), 0xffffffff); + break; + } +} + +/* For XLS B silicon, the 3,4 PCI interrupts are different */ +static void xls_pcie_ack_b(struct irq_data *d) +{ + nlm_reg_t *pcie_mmio_le = netlogic_io_mmio(NETLOGIC_IO_PCIE_1_OFFSET); + + switch (d->irq) { + case PIC_PCIE_LINK0_IRQ: + netlogic_write_reg(pcie_mmio_le, (0x90 >> 2), 0xffffffff); + break; + case PIC_PCIE_LINK1_IRQ: + netlogic_write_reg(pcie_mmio_le, (0x94 >> 2), 0xffffffff); + break; + case PIC_PCIE_XLSB0_LINK2_IRQ: + netlogic_write_reg(pcie_mmio_le, (0x190 >> 2), 0xffffffff); + break; + case PIC_PCIE_XLSB0_LINK3_IRQ: + netlogic_write_reg(pcie_mmio_le, (0x194 >> 2), 0xffffffff); + break; + } +} +#endif + +static void xlr_pic_ack(struct irq_data *d) +{ + unsigned long flags; + nlm_reg_t *mmio; + int irq = d->irq; + void *hd = irq_data_get_irq_handler_data(d); + + WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq); + + if (hd) { + void (*extra_ack)(void *) = hd; + extra_ack(d); + } + mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET); + spin_lock_irqsave(&nlm_pic_lock, flags); + netlogic_write_reg(mmio, PIC_INT_ACK, (1 << (irq - PIC_IRQ_BASE))); + spin_unlock_irqrestore(&nlm_pic_lock, flags); +} + +/* + * This chip definition handles interrupts routed thru the XLR + * hardware PIC, currently IRQs 8-39 are mapped to hardware intr + * 0-31 wired the XLR PIC + */ +static struct irq_chip xlr_pic = { + .name = "XLR-PIC", + .irq_enable = xlr_pic_enable, + .irq_mask = xlr_pic_mask, + .irq_ack = xlr_pic_ack, +}; + +static void rsvd_irq_handler(struct irq_data *d) +{ + WARN(d->irq >= PIC_IRQ_BASE, "Bad irq %d", d->irq); +} + +/* + * Chip definition for CPU originated interrupts(timer, msg) and + * IPIs + */ +struct irq_chip nlm_cpu_intr = { + .name = "XLR-CPU-INTR", + .irq_enable = rsvd_irq_handler, + .irq_mask = rsvd_irq_handler, + .irq_ack = rsvd_irq_handler, +}; + +void __init init_xlr_irqs(void) +{ + nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET); + uint32_t thread_mask = 1; + int level, i; + + pr_info("Interrupt thread mask [%x]\n", thread_mask); + for (i = 0; i < PIC_NUM_IRTS; i++) { + level = PIC_IRQ_IS_EDGE_TRIGGERED(i); + + /* Bind all PIC irqs to boot cpu */ + netlogic_write_reg(mmio, PIC_IRT_0_BASE + i, thread_mask); + + /* + * Use local scheduling and high polarity for all IRTs + * Invalidate all IRTs, by default + */ + netlogic_write_reg(mmio, PIC_IRT_1_BASE + i, + (level << 30) | (1 << 6) | (PIC_IRQ_BASE + i)); + } + + /* Make all IRQs as level triggered by default */ + for (i = 0; i < NR_IRQS; i++) { + if (PIC_IRQ_IS_IRT(i)) + irq_set_chip_and_handler(i, &xlr_pic, handle_level_irq); + else + irq_set_chip_and_handler(i, &nlm_cpu_intr, + handle_level_irq); + } +#ifdef CONFIG_SMP + irq_set_chip_and_handler(IRQ_IPI_SMP_FUNCTION, &nlm_cpu_intr, + nlm_smp_function_ipi_handler); + irq_set_chip_and_handler(IRQ_IPI_SMP_RESCHEDULE, &nlm_cpu_intr, + nlm_smp_resched_ipi_handler); + nlm_irq_mask |= + ((1ULL << IRQ_IPI_SMP_FUNCTION) | (1ULL << IRQ_IPI_SMP_RESCHEDULE)); +#endif + +#ifdef CONFIG_PCI + /* + * For PCI interrupts, we need to ack the PIC controller too, overload + * irq handler data to do this + */ + if (nlm_chip_is_xls()) { + if (nlm_chip_is_xls_b()) { + irq_set_handler_data(PIC_PCIE_LINK0_IRQ, + xls_pcie_ack_b); + irq_set_handler_data(PIC_PCIE_LINK1_IRQ, + xls_pcie_ack_b); + irq_set_handler_data(PIC_PCIE_XLSB0_LINK2_IRQ, + xls_pcie_ack_b); + irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, + xls_pcie_ack_b); + } else { + irq_set_handler_data(PIC_PCIE_LINK0_IRQ, xls_pcie_ack); + irq_set_handler_data(PIC_PCIE_LINK1_IRQ, xls_pcie_ack); + irq_set_handler_data(PIC_PCIE_LINK2_IRQ, xls_pcie_ack); + irq_set_handler_data(PIC_PCIE_LINK3_IRQ, xls_pcie_ack); + } + } else { + /* XLR PCI controller ACK */ + irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack); + } +#endif + /* unmask all PIC related interrupts. If no handler is installed by the + * drivers, it'll just ack the interrupt and return + */ + for (i = PIC_IRT_FIRST_IRQ; i <= PIC_IRT_LAST_IRQ; i++) + nlm_irq_mask |= (1ULL << i); + + nlm_irq_mask |= (1ULL << IRQ_TIMER); +} + +void __init arch_init_irq(void) +{ + /* Initialize the irq descriptors */ + init_xlr_irqs(); + write_c0_eimr(nlm_irq_mask); +} + +void __cpuinit nlm_smp_irq_init(void) +{ + /* set interrupt mask for non-zero cpus */ + write_c0_eimr(nlm_irq_mask); +} + +asmlinkage void plat_irq_dispatch(void) +{ + uint64_t eirr; + int i; + + eirr = read_c0_eirr() & read_c0_eimr(); + if (!eirr) + return; + + /* no need of EIRR here, writing compare clears interrupt */ + if (eirr & (1 << IRQ_TIMER)) { + do_IRQ(IRQ_TIMER); + return; + } + + /* use dcltz: optimize below code */ + for (i = 63; i != -1; i--) { + if (eirr & (1ULL << i)) + break; + } + if (i == -1) { + pr_err("no interrupt !!\n"); + return; + } + + /* Ack eirr */ + write_c0_eirr(1ULL << i); + + do_IRQ(i); +} diff --git a/arch/mips/netlogic/xlr/platform.c b/arch/mips/netlogic/xlr/platform.c new file mode 100644 index 000000000000..609ec2534642 --- /dev/null +++ b/arch/mips/netlogic/xlr/platform.c @@ -0,0 +1,98 @@ +/* + * Copyright 2011, Netlogic Microsystems. + * Copyright 2004, Matt Porter <mporter@kernel.crashing.org> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/resource.h> +#include <linux/serial_8250.h> +#include <linux/serial_reg.h> + +#include <asm/netlogic/xlr/iomap.h> +#include <asm/netlogic/xlr/pic.h> +#include <asm/netlogic/xlr/xlr.h> + +unsigned int nlm_xlr_uart_in(struct uart_port *p, int offset) +{ + nlm_reg_t *mmio; + unsigned int value; + + /* XLR uart does not need any mapping of regs */ + mmio = (nlm_reg_t *)(p->membase + (offset << p->regshift)); + value = netlogic_read_reg(mmio, 0); + + /* See XLR/XLS errata */ + if (offset == UART_MSR) + value ^= 0xF0; + else if (offset == UART_MCR) + value ^= 0x3; + + return value; +} + +void nlm_xlr_uart_out(struct uart_port *p, int offset, int value) +{ + nlm_reg_t *mmio; + + /* XLR uart does not need any mapping of regs */ + mmio = (nlm_reg_t *)(p->membase + (offset << p->regshift)); + + /* See XLR/XLS errata */ + if (offset == UART_MSR) + value ^= 0xF0; + else if (offset == UART_MCR) + value ^= 0x3; + + netlogic_write_reg(mmio, 0, value); +} + +#define PORT(_irq) \ + { \ + .irq = _irq, \ + .regshift = 2, \ + .iotype = UPIO_MEM32, \ + .flags = (UPF_SKIP_TEST | \ + UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF),\ + .uartclk = PIC_CLKS_PER_SEC, \ + .type = PORT_16550A, \ + .serial_in = nlm_xlr_uart_in, \ + .serial_out = nlm_xlr_uart_out, \ + } + +static struct plat_serial8250_port xlr_uart_data[] = { + PORT(PIC_UART_0_IRQ), + PORT(PIC_UART_1_IRQ), + {}, +}; + +static struct platform_device uart_device = { + .name = "serial8250", + .id = PLAT8250_DEV_PLATFORM, + .dev = { + .platform_data = xlr_uart_data, + }, +}; + +static int __init nlm_uart_init(void) +{ + nlm_reg_t *mmio; + + mmio = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET); + xlr_uart_data[0].membase = (void __iomem *)mmio; + xlr_uart_data[0].mapbase = CPHYSADDR((unsigned long)mmio); + + mmio = netlogic_io_mmio(NETLOGIC_IO_UART_1_OFFSET); + xlr_uart_data[1].membase = (void __iomem *)mmio; + xlr_uart_data[1].mapbase = CPHYSADDR((unsigned long)mmio); + + return platform_device_register(&uart_device); +} + +arch_initcall(nlm_uart_init); diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c new file mode 100644 index 000000000000..482802569e74 --- /dev/null +++ b/arch/mips/netlogic/xlr/setup.c @@ -0,0 +1,188 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/serial_8250.h> +#include <linux/pm.h> + +#include <asm/reboot.h> +#include <asm/time.h> +#include <asm/bootinfo.h> +#include <asm/smp-ops.h> + +#include <asm/netlogic/interrupt.h> +#include <asm/netlogic/psb-bootinfo.h> + +#include <asm/netlogic/xlr/xlr.h> +#include <asm/netlogic/xlr/iomap.h> +#include <asm/netlogic/xlr/pic.h> +#include <asm/netlogic/xlr/gpio.h> + +unsigned long netlogic_io_base = (unsigned long)(DEFAULT_NETLOGIC_IO_BASE); +unsigned long nlm_common_ebase = 0x0; +struct psb_info nlm_prom_info; + +static void nlm_early_serial_setup(void) +{ + struct uart_port s; + nlm_reg_t *uart_base; + + uart_base = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET); + memset(&s, 0, sizeof(s)); + s.flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST; + s.iotype = UPIO_MEM32; + s.regshift = 2; + s.irq = PIC_UART_0_IRQ; + s.uartclk = PIC_CLKS_PER_SEC; + s.serial_in = nlm_xlr_uart_in; + s.serial_out = nlm_xlr_uart_out; + s.mapbase = (unsigned long)uart_base; + s.membase = (unsigned char __iomem *)uart_base; + early_serial_setup(&s); +} + +static void nlm_linux_exit(void) +{ + nlm_reg_t *mmio; + + mmio = netlogic_io_mmio(NETLOGIC_IO_GPIO_OFFSET); + /* trigger a chip reset by writing 1 to GPIO_SWRESET_REG */ + netlogic_write_reg(mmio, NETLOGIC_GPIO_SWRESET_REG, 1); + for ( ; ; ) + cpu_wait(); +} + +void __init plat_mem_setup(void) +{ + panic_timeout = 5; + _machine_restart = (void (*)(char *))nlm_linux_exit; + _machine_halt = nlm_linux_exit; + pm_power_off = nlm_linux_exit; +} + +const char *get_system_type(void) +{ + return "Netlogic XLR/XLS Series"; +} + +void __init prom_free_prom_memory(void) +{ + /* Nothing yet */ +} + +static void build_arcs_cmdline(int *argv) +{ + int i, remain, len; + char *arg; + + remain = sizeof(arcs_cmdline) - 1; + arcs_cmdline[0] = '\0'; + for (i = 0; argv[i] != 0; i++) { + arg = (char *)(long)argv[i]; + len = strlen(arg); + if (len + 1 > remain) + break; + strcat(arcs_cmdline, arg); + strcat(arcs_cmdline, " "); + remain -= len + 1; + } + + /* Add the default options here */ + if ((strstr(arcs_cmdline, "console=")) == NULL) { + arg = "console=ttyS0,38400 "; + len = strlen(arg); + if (len > remain) + goto fail; + strcat(arcs_cmdline, arg); + remain -= len; + } +#ifdef CONFIG_BLK_DEV_INITRD + if ((strstr(arcs_cmdline, "rdinit=")) == NULL) { + arg = "rdinit=/sbin/init "; + len = strlen(arg); + if (len > remain) + goto fail; + strcat(arcs_cmdline, arg); + remain -= len; + } +#endif + return; +fail: + panic("Cannot add %s, command line too big!", arg); +} + +static void prom_add_memory(void) +{ + struct nlm_boot_mem_map *bootm; + u64 start, size; + u64 pref_backup = 512; /* avoid pref walking beyond end */ + int i; + + bootm = (void *)(long)nlm_prom_info.psb_mem_map; + for (i = 0; i < bootm->nr_map; i++) { + if (bootm->map[i].type != BOOT_MEM_RAM) + continue; + start = bootm->map[i].addr; + size = bootm->map[i].size; + + /* Work around for using bootloader mem */ + if (i == 0 && start == 0 && size == 0x0c000000) + size = 0x0ff00000; + + add_memory_region(start, size - pref_backup, BOOT_MEM_RAM); + } +} + +void __init prom_init(void) +{ + int *argv, *envp; /* passed as 32 bit ptrs */ + struct psb_info *prom_infop; + + /* truncate to 32 bit and sign extend all args */ + argv = (int *)(long)(int)fw_arg1; + envp = (int *)(long)(int)fw_arg2; + prom_infop = (struct psb_info *)(long)(int)fw_arg3; + + nlm_prom_info = *prom_infop; + + nlm_early_serial_setup(); + build_arcs_cmdline(argv); + nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1)); + prom_add_memory(); + +#ifdef CONFIG_SMP + nlm_wakeup_secondary_cpus(nlm_prom_info.online_cpu_map); + register_smp_ops(&nlm_smp_ops); +#endif +} diff --git a/arch/mips/netlogic/xlr/smp.c b/arch/mips/netlogic/xlr/smp.c new file mode 100644 index 000000000000..b495a7f1433b --- /dev/null +++ b/arch/mips/netlogic/xlr/smp.c @@ -0,0 +1,225 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/irq.h> + +#include <asm/mmu_context.h> + +#include <asm/netlogic/interrupt.h> +#include <asm/netlogic/mips-extns.h> + +#include <asm/netlogic/xlr/iomap.h> +#include <asm/netlogic/xlr/pic.h> +#include <asm/netlogic/xlr/xlr.h> + +void core_send_ipi(int logical_cpu, unsigned int action) +{ + int cpu = cpu_logical_map(logical_cpu); + u32 tid = cpu & 0x3; + u32 pid = (cpu >> 2) & 0x07; + u32 ipi = (tid << 16) | (pid << 20); + + if (action & SMP_CALL_FUNCTION) + ipi |= IRQ_IPI_SMP_FUNCTION; + else if (action & SMP_RESCHEDULE_YOURSELF) + ipi |= IRQ_IPI_SMP_RESCHEDULE; + else + return; + + pic_send_ipi(ipi); +} + +void nlm_send_ipi_single(int cpu, unsigned int action) +{ + core_send_ipi(cpu, action); +} + +void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action) +{ + int cpu; + + for_each_cpu(cpu, mask) { + core_send_ipi(cpu, action); + } +} + +/* IRQ_IPI_SMP_FUNCTION Handler */ +void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc) +{ + smp_call_function_interrupt(); +} + +/* IRQ_IPI_SMP_RESCHEDULE handler */ +void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc) +{ + set_need_resched(); +} + +void nlm_common_ipi_handler(int irq, struct pt_regs *regs) +{ + if (irq == IRQ_IPI_SMP_FUNCTION) { + smp_call_function_interrupt(); + } else { + /* Announce that we are for reschduling */ + set_need_resched(); + } +} + +/* + * Called before going into mips code, early cpu init + */ +void nlm_early_init_secondary(void) +{ + write_c0_ebase((uint32_t)nlm_common_ebase); + /* TLB partition here later */ +} + +/* + * Code to run on secondary just after probing the CPU + */ +static void __cpuinit nlm_init_secondary(void) +{ + nlm_smp_irq_init(); +} + +void nlm_smp_finish(void) +{ +#ifdef notyet + nlm_common_msgring_cpu_init(); +#endif +} + +void nlm_cpus_done(void) +{ +} + +/* + * Boot all other cpus in the system, initialize them, and bring them into + * the boot function + */ +int nlm_cpu_unblock[NR_CPUS]; +int nlm_cpu_ready[NR_CPUS]; +unsigned long nlm_next_gp; +unsigned long nlm_next_sp; +cpumask_t phys_cpu_present_map; + +void nlm_boot_secondary(int logical_cpu, struct task_struct *idle) +{ + unsigned long gp = (unsigned long)task_thread_info(idle); + unsigned long sp = (unsigned long)__KSTK_TOS(idle); + int cpu = cpu_logical_map(logical_cpu); + + nlm_next_sp = sp; + nlm_next_gp = gp; + + /* barrier */ + __sync(); + nlm_cpu_unblock[cpu] = 1; +} + +void __init nlm_smp_setup(void) +{ + unsigned int boot_cpu; + int num_cpus, i; + + boot_cpu = hard_smp_processor_id(); + cpus_clear(phys_cpu_present_map); + + cpu_set(boot_cpu, phys_cpu_present_map); + __cpu_number_map[boot_cpu] = 0; + __cpu_logical_map[0] = boot_cpu; + cpu_set(0, cpu_possible_map); + + num_cpus = 1; + for (i = 0; i < NR_CPUS; i++) { + if (nlm_cpu_ready[i]) { + cpu_set(i, phys_cpu_present_map); + __cpu_number_map[i] = num_cpus; + __cpu_logical_map[num_cpus] = i; + cpu_set(num_cpus, cpu_possible_map); + ++num_cpus; + } + } + + pr_info("Phys CPU present map: %lx, possible map %lx\n", + (unsigned long)phys_cpu_present_map.bits[0], + (unsigned long)cpu_possible_map.bits[0]); + + pr_info("Detected %i Slave CPU(s)\n", num_cpus); +} + +void nlm_prepare_cpus(unsigned int max_cpus) +{ +} + +struct plat_smp_ops nlm_smp_ops = { + .send_ipi_single = nlm_send_ipi_single, + .send_ipi_mask = nlm_send_ipi_mask, + .init_secondary = nlm_init_secondary, + .smp_finish = nlm_smp_finish, + .cpus_done = nlm_cpus_done, + .boot_secondary = nlm_boot_secondary, + .smp_setup = nlm_smp_setup, + .prepare_cpus = nlm_prepare_cpus, +}; + +unsigned long secondary_entry_point; + +int nlm_wakeup_secondary_cpus(u32 wakeup_mask) +{ + unsigned int tid, pid, ipi, i, boot_cpu; + void *reset_vec; + + secondary_entry_point = (unsigned long)prom_pre_boot_secondary_cpus; + reset_vec = (void *)CKSEG1ADDR(0x1fc00000); + memcpy(reset_vec, nlm_boot_smp_nmi, 0x80); + boot_cpu = hard_smp_processor_id(); + + for (i = 0; i < NR_CPUS; i++) { + if (i == boot_cpu) + continue; + if (wakeup_mask & (1u << i)) { + tid = i & 0x3; + pid = (i >> 2) & 0x7; + ipi = (tid << 16) | (pid << 20) | (1 << 8); + pic_send_ipi(ipi); + } + } + + return 0; +} diff --git a/arch/mips/netlogic/xlr/smpboot.S b/arch/mips/netlogic/xlr/smpboot.S new file mode 100644 index 000000000000..b8e074402c99 --- /dev/null +++ b/arch/mips/netlogic/xlr/smpboot.S @@ -0,0 +1,94 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/regdef.h> +#include <asm/mipsregs.h> + + +/* Don't jump to linux function from Bootloader stack. Change it + * here. Kernel might allocate bootloader memory before all the CPUs are + * brought up (eg: Inode cache region) and we better don't overwrite this + * memory + */ +NESTED(prom_pre_boot_secondary_cpus, 16, sp) + .set mips64 + mfc0 t0, $15, 1 # read ebase + andi t0, 0x1f # t0 has the processor_id() + sll t0, 2 # offset in cpu array + + PTR_LA t1, nlm_cpu_ready # mark CPU ready + PTR_ADDU t1, t0 + li t2, 1 + sw t2, 0(t1) + + PTR_LA t1, nlm_cpu_unblock + PTR_ADDU t1, t0 +1: lw t2, 0(t1) # wait till unblocked + beqz t2, 1b + nop + + PTR_LA t1, nlm_next_sp + PTR_L sp, 0(t1) + PTR_LA t1, nlm_next_gp + PTR_L gp, 0(t1) + + PTR_LA t0, nlm_early_init_secondary + jalr t0 + nop + + PTR_LA t0, smp_bootstrap + jr t0 + nop +END(prom_pre_boot_secondary_cpus) + +NESTED(nlm_boot_smp_nmi, 0, sp) + .set push + .set noat + .set mips64 + .set noreorder + + /* Clear the NMI and BEV bits */ + MFC0 k0, CP0_STATUS + li k1, 0xffb7ffff + and k0, k0, k1 + MTC0 k0, CP0_STATUS + + PTR_LA k1, secondary_entry_point + PTR_L k0, 0(k1) + jr k0 + nop + .set pop +END(nlm_boot_smp_nmi) diff --git a/arch/mips/netlogic/xlr/time.c b/arch/mips/netlogic/xlr/time.c new file mode 100644 index 000000000000..0d81b262593c --- /dev/null +++ b/arch/mips/netlogic/xlr/time.c @@ -0,0 +1,51 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/init.h> + +#include <asm/time.h> +#include <asm/netlogic/interrupt.h> +#include <asm/netlogic/psb-bootinfo.h> + +unsigned int __cpuinit get_c0_compare_int(void) +{ + return IRQ_TIMER; +} + +void __init plat_time_init(void) +{ + mips_hpt_frequency = nlm_prom_info.cpu_frequency; + pr_info("MIPS counter frequency [%ld]\n", + (unsigned long)mips_hpt_frequency); +} diff --git a/arch/mips/netlogic/xlr/xlr_console.c b/arch/mips/netlogic/xlr/xlr_console.c new file mode 100644 index 000000000000..759df0692201 --- /dev/null +++ b/arch/mips/netlogic/xlr/xlr_console.c @@ -0,0 +1,46 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/types.h> +#include <asm/netlogic/xlr/iomap.h> + +void prom_putchar(char c) +{ + nlm_reg_t *mmio; + + mmio = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET); + while (netlogic_read_reg(mmio, 0x5) == 0) + ; + netlogic_write_reg(mmio, 0x0, c); +} diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index c9209ca6c8e7..4df879937446 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_SIBYTE_SB1250) += fixup-sb1250.o pci-sb1250.o obj-$(CONFIG_SIBYTE_BCM112X) += fixup-sb1250.o pci-sb1250.o obj-$(CONFIG_SIBYTE_BCM1x80) += pci-bcm1480.o pci-bcm1480ht.o obj-$(CONFIG_SNI_RM) += fixup-sni.o ops-sni.o +obj-$(CONFIG_SOC_XWAY) += pci-lantiq.o ops-lantiq.o obj-$(CONFIG_TANBAC_TB0219) += fixup-tb0219.o obj-$(CONFIG_TANBAC_TB0226) += fixup-tb0226.o obj-$(CONFIG_TANBAC_TB0287) += fixup-tb0287.o @@ -55,6 +56,7 @@ obj-$(CONFIG_ZAO_CAPCELLA) += fixup-capcella.o obj-$(CONFIG_WR_PPMC) += fixup-wrppmc.o obj-$(CONFIG_MIKROTIK_RB532) += pci-rc32434.o ops-rc32434.o fixup-rc32434.o obj-$(CONFIG_CPU_CAVIUM_OCTEON) += pci-octeon.o pcie-octeon.o +obj-$(CONFIG_NLM_XLR) += pci-xlr.o ifdef CONFIG_PCI_MSI obj-$(CONFIG_CPU_CAVIUM_OCTEON) += msi-octeon.o diff --git a/arch/mips/pci/ops-lantiq.c b/arch/mips/pci/ops-lantiq.c new file mode 100644 index 000000000000..1f2afb55cc71 --- /dev/null +++ b/arch/mips/pci/ops-lantiq.c @@ -0,0 +1,116 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/mm.h> +#include <asm/addrspace.h> +#include <linux/vmalloc.h> + +#include <lantiq_soc.h> + +#include "pci-lantiq.h" + +#define LTQ_PCI_CFG_BUSNUM_SHF 16 +#define LTQ_PCI_CFG_DEVNUM_SHF 11 +#define LTQ_PCI_CFG_FUNNUM_SHF 8 + +#define PCI_ACCESS_READ 0 +#define PCI_ACCESS_WRITE 1 + +static int ltq_pci_config_access(unsigned char access_type, struct pci_bus *bus, + unsigned int devfn, unsigned int where, u32 *data) +{ + unsigned long cfg_base; + unsigned long flags; + u32 temp; + + /* we support slot from 0 to 15 dev_fn & 0x68 (AD29) is the + SoC itself */ + if ((bus->number != 0) || ((devfn & 0xf8) > 0x78) + || ((devfn & 0xf8) == 0) || ((devfn & 0xf8) == 0x68)) + return 1; + + spin_lock_irqsave(&ebu_lock, flags); + + cfg_base = (unsigned long) ltq_pci_mapped_cfg; + cfg_base |= (bus->number << LTQ_PCI_CFG_BUSNUM_SHF) | (devfn << + LTQ_PCI_CFG_FUNNUM_SHF) | (where & ~0x3); + + /* Perform access */ + if (access_type == PCI_ACCESS_WRITE) { + ltq_w32(swab32(*data), ((u32 *)cfg_base)); + } else { + *data = ltq_r32(((u32 *)(cfg_base))); + *data = swab32(*data); + } + wmb(); + + /* clean possible Master abort */ + cfg_base = (unsigned long) ltq_pci_mapped_cfg; + cfg_base |= (0x0 << LTQ_PCI_CFG_FUNNUM_SHF) + 4; + temp = ltq_r32(((u32 *)(cfg_base))); + temp = swab32(temp); + cfg_base = (unsigned long) ltq_pci_mapped_cfg; + cfg_base |= (0x68 << LTQ_PCI_CFG_FUNNUM_SHF) + 4; + ltq_w32(temp, ((u32 *)cfg_base)); + + spin_unlock_irqrestore(&ebu_lock, flags); + + if (((*data) == 0xffffffff) && (access_type == PCI_ACCESS_READ)) + return 1; + + return 0; +} + +int ltq_pci_read_config_dword(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + u32 data = 0; + + if (ltq_pci_config_access(PCI_ACCESS_READ, bus, devfn, where, &data)) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (size == 1) + *val = (data >> ((where & 3) << 3)) & 0xff; + else if (size == 2) + *val = (data >> ((where & 3) << 3)) & 0xffff; + else + *val = data; + + return PCIBIOS_SUCCESSFUL; +} + +int ltq_pci_write_config_dword(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + u32 data = 0; + + if (size == 4) { + data = val; + } else { + if (ltq_pci_config_access(PCI_ACCESS_READ, bus, + devfn, where, &data)) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (size == 1) + data = (data & ~(0xff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + else if (size == 2) + data = (data & ~(0xffff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + } + + if (ltq_pci_config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data)) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c new file mode 100644 index 000000000000..603d7493e966 --- /dev/null +++ b/arch/mips/pci/pci-lantiq.c @@ -0,0 +1,297 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/platform_device.h> + +#include <asm/pci.h> +#include <asm/gpio.h> +#include <asm/addrspace.h> + +#include <lantiq_soc.h> +#include <lantiq_irq.h> +#include <lantiq_platform.h> + +#include "pci-lantiq.h" + +#define LTQ_PCI_CFG_BASE 0x17000000 +#define LTQ_PCI_CFG_SIZE 0x00008000 +#define LTQ_PCI_MEM_BASE 0x18000000 +#define LTQ_PCI_MEM_SIZE 0x02000000 +#define LTQ_PCI_IO_BASE 0x1AE00000 +#define LTQ_PCI_IO_SIZE 0x00200000 + +#define PCI_CR_FCI_ADDR_MAP0 0x00C0 +#define PCI_CR_FCI_ADDR_MAP1 0x00C4 +#define PCI_CR_FCI_ADDR_MAP2 0x00C8 +#define PCI_CR_FCI_ADDR_MAP3 0x00CC +#define PCI_CR_FCI_ADDR_MAP4 0x00D0 +#define PCI_CR_FCI_ADDR_MAP5 0x00D4 +#define PCI_CR_FCI_ADDR_MAP6 0x00D8 +#define PCI_CR_FCI_ADDR_MAP7 0x00DC +#define PCI_CR_CLK_CTRL 0x0000 +#define PCI_CR_PCI_MOD 0x0030 +#define PCI_CR_PC_ARB 0x0080 +#define PCI_CR_FCI_ADDR_MAP11hg 0x00E4 +#define PCI_CR_BAR11MASK 0x0044 +#define PCI_CR_BAR12MASK 0x0048 +#define PCI_CR_BAR13MASK 0x004C +#define PCI_CS_BASE_ADDR1 0x0010 +#define PCI_CR_PCI_ADDR_MAP11 0x0064 +#define PCI_CR_FCI_BURST_LENGTH 0x00E8 +#define PCI_CR_PCI_EOI 0x002C +#define PCI_CS_STS_CMD 0x0004 + +#define PCI_MASTER0_REQ_MASK_2BITS 8 +#define PCI_MASTER1_REQ_MASK_2BITS 10 +#define PCI_MASTER2_REQ_MASK_2BITS 12 +#define INTERNAL_ARB_ENABLE_BIT 0 + +#define LTQ_CGU_IFCCR 0x0018 +#define LTQ_CGU_PCICR 0x0034 + +#define ltq_pci_w32(x, y) ltq_w32((x), ltq_pci_membase + (y)) +#define ltq_pci_r32(x) ltq_r32(ltq_pci_membase + (x)) + +#define ltq_pci_cfg_w32(x, y) ltq_w32((x), ltq_pci_mapped_cfg + (y)) +#define ltq_pci_cfg_r32(x) ltq_r32(ltq_pci_mapped_cfg + (x)) + +struct ltq_pci_gpio_map { + int pin; + int alt0; + int alt1; + int dir; + char *name; +}; + +/* the pci core can make use of the following gpios */ +static struct ltq_pci_gpio_map ltq_pci_gpio_map[] = { + { 0, 1, 0, 0, "pci-exin0" }, + { 1, 1, 0, 0, "pci-exin1" }, + { 2, 1, 0, 0, "pci-exin2" }, + { 39, 1, 0, 0, "pci-exin3" }, + { 10, 1, 0, 0, "pci-exin4" }, + { 9, 1, 0, 0, "pci-exin5" }, + { 30, 1, 0, 1, "pci-gnt1" }, + { 23, 1, 0, 1, "pci-gnt2" }, + { 19, 1, 0, 1, "pci-gnt3" }, + { 38, 1, 0, 1, "pci-gnt4" }, + { 29, 1, 0, 0, "pci-req1" }, + { 31, 1, 0, 0, "pci-req2" }, + { 3, 1, 0, 0, "pci-req3" }, + { 37, 1, 0, 0, "pci-req4" }, +}; + +__iomem void *ltq_pci_mapped_cfg; +static __iomem void *ltq_pci_membase; + +int (*ltqpci_plat_dev_init)(struct pci_dev *dev) = NULL; + +/* Since the PCI REQ pins can be reused for other functionality, make it + possible to exclude those from interpretation by the PCI controller */ +static int ltq_pci_req_mask = 0xf; + +static int *ltq_pci_irq_map; + +struct pci_ops ltq_pci_ops = { + .read = ltq_pci_read_config_dword, + .write = ltq_pci_write_config_dword +}; + +static struct resource pci_io_resource = { + .name = "pci io space", + .start = LTQ_PCI_IO_BASE, + .end = LTQ_PCI_IO_BASE + LTQ_PCI_IO_SIZE - 1, + .flags = IORESOURCE_IO +}; + +static struct resource pci_mem_resource = { + .name = "pci memory space", + .start = LTQ_PCI_MEM_BASE, + .end = LTQ_PCI_MEM_BASE + LTQ_PCI_MEM_SIZE - 1, + .flags = IORESOURCE_MEM +}; + +static struct pci_controller ltq_pci_controller = { + .pci_ops = <q_pci_ops, + .mem_resource = &pci_mem_resource, + .mem_offset = 0x00000000UL, + .io_resource = &pci_io_resource, + .io_offset = 0x00000000UL, +}; + +int pcibios_plat_dev_init(struct pci_dev *dev) +{ + if (ltqpci_plat_dev_init) + return ltqpci_plat_dev_init(dev); + + return 0; +} + +static u32 ltq_calc_bar11mask(void) +{ + u32 mem, bar11mask; + + /* BAR11MASK value depends on available memory on system. */ + mem = num_physpages * PAGE_SIZE; + bar11mask = (0x0ffffff0 & ~((1 << (fls(mem) - 1)) - 1)) | 8; + + return bar11mask; +} + +static void ltq_pci_setup_gpio(int gpio) +{ + int i; + for (i = 0; i < ARRAY_SIZE(ltq_pci_gpio_map); i++) { + if (gpio & (1 << i)) { + ltq_gpio_request(ltq_pci_gpio_map[i].pin, + ltq_pci_gpio_map[i].alt0, + ltq_pci_gpio_map[i].alt1, + ltq_pci_gpio_map[i].dir, + ltq_pci_gpio_map[i].name); + } + } + ltq_gpio_request(21, 0, 0, 1, "pci-reset"); + ltq_pci_req_mask = (gpio >> PCI_REQ_SHIFT) & PCI_REQ_MASK; +} + +static int __devinit ltq_pci_startup(struct ltq_pci_data *conf) +{ + u32 temp_buffer; + + /* set clock to 33Mhz */ + ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0xf00000, LTQ_CGU_IFCCR); + ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0x800000, LTQ_CGU_IFCCR); + + /* external or internal clock ? */ + if (conf->clock) { + ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~(1 << 16), + LTQ_CGU_IFCCR); + ltq_cgu_w32((1 << 30), LTQ_CGU_PCICR); + } else { + ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | (1 << 16), + LTQ_CGU_IFCCR); + ltq_cgu_w32((1 << 31) | (1 << 30), LTQ_CGU_PCICR); + } + + /* setup pci clock and gpis used by pci */ + ltq_pci_setup_gpio(conf->gpio); + + /* enable auto-switching between PCI and EBU */ + ltq_pci_w32(0xa, PCI_CR_CLK_CTRL); + + /* busy, i.e. configuration is not done, PCI access has to be retried */ + ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_MOD) & ~(1 << 24), PCI_CR_PCI_MOD); + wmb(); + /* BUS Master/IO/MEM access */ + ltq_pci_cfg_w32(ltq_pci_cfg_r32(PCI_CS_STS_CMD) | 7, PCI_CS_STS_CMD); + + /* enable external 2 PCI masters */ + temp_buffer = ltq_pci_r32(PCI_CR_PC_ARB); + temp_buffer &= (~(ltq_pci_req_mask << 16)); + /* enable internal arbiter */ + temp_buffer |= (1 << INTERNAL_ARB_ENABLE_BIT); + /* enable internal PCI master reqest */ + temp_buffer &= (~(3 << PCI_MASTER0_REQ_MASK_2BITS)); + + /* enable EBU request */ + temp_buffer &= (~(3 << PCI_MASTER1_REQ_MASK_2BITS)); + + /* enable all external masters request */ + temp_buffer &= (~(3 << PCI_MASTER2_REQ_MASK_2BITS)); + ltq_pci_w32(temp_buffer, PCI_CR_PC_ARB); + wmb(); + + /* setup BAR memory regions */ + ltq_pci_w32(0x18000000, PCI_CR_FCI_ADDR_MAP0); + ltq_pci_w32(0x18400000, PCI_CR_FCI_ADDR_MAP1); + ltq_pci_w32(0x18800000, PCI_CR_FCI_ADDR_MAP2); + ltq_pci_w32(0x18c00000, PCI_CR_FCI_ADDR_MAP3); + ltq_pci_w32(0x19000000, PCI_CR_FCI_ADDR_MAP4); + ltq_pci_w32(0x19400000, PCI_CR_FCI_ADDR_MAP5); + ltq_pci_w32(0x19800000, PCI_CR_FCI_ADDR_MAP6); + ltq_pci_w32(0x19c00000, PCI_CR_FCI_ADDR_MAP7); + ltq_pci_w32(0x1ae00000, PCI_CR_FCI_ADDR_MAP11hg); + ltq_pci_w32(ltq_calc_bar11mask(), PCI_CR_BAR11MASK); + ltq_pci_w32(0, PCI_CR_PCI_ADDR_MAP11); + ltq_pci_w32(0, PCI_CS_BASE_ADDR1); + /* both TX and RX endian swap are enabled */ + ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_EOI) | 3, PCI_CR_PCI_EOI); + wmb(); + ltq_pci_w32(ltq_pci_r32(PCI_CR_BAR12MASK) | 0x80000000, + PCI_CR_BAR12MASK); + ltq_pci_w32(ltq_pci_r32(PCI_CR_BAR13MASK) | 0x80000000, + PCI_CR_BAR13MASK); + /*use 8 dw burst length */ + ltq_pci_w32(0x303, PCI_CR_FCI_BURST_LENGTH); + ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_MOD) | (1 << 24), PCI_CR_PCI_MOD); + wmb(); + + /* setup irq line */ + ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_CON) | 0xc, LTQ_EBU_PCC_CON); + ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_IEN) | 0x10, LTQ_EBU_PCC_IEN); + + /* toggle reset pin */ + __gpio_set_value(21, 0); + wmb(); + mdelay(1); + __gpio_set_value(21, 1); + return 0; +} + +int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + if (ltq_pci_irq_map[slot]) + return ltq_pci_irq_map[slot]; + printk(KERN_ERR "lq_pci: trying to map irq for unknown slot %d\n", + slot); + + return 0; +} + +static int __devinit ltq_pci_probe(struct platform_device *pdev) +{ + struct ltq_pci_data *ltq_pci_data = + (struct ltq_pci_data *) pdev->dev.platform_data; + pci_probe_only = 0; + ltq_pci_irq_map = ltq_pci_data->irq; + ltq_pci_membase = ioremap_nocache(PCI_CR_BASE_ADDR, PCI_CR_SIZE); + ltq_pci_mapped_cfg = + ioremap_nocache(LTQ_PCI_CFG_BASE, LTQ_PCI_CFG_BASE); + ltq_pci_controller.io_map_base = + (unsigned long)ioremap(LTQ_PCI_IO_BASE, LTQ_PCI_IO_SIZE - 1); + ltq_pci_startup(ltq_pci_data); + register_pci_controller(<q_pci_controller); + + return 0; +} + +static struct platform_driver +ltq_pci_driver = { + .probe = ltq_pci_probe, + .driver = { + .name = "ltq_pci", + .owner = THIS_MODULE, + }, +}; + +int __init pcibios_init(void) +{ + int ret = platform_driver_register(<q_pci_driver); + if (ret) + printk(KERN_INFO "ltq_pci: Error registering platfom driver!"); + return ret; +} + +arch_initcall(pcibios_init); diff --git a/arch/mips/pci/pci-lantiq.h b/arch/mips/pci/pci-lantiq.h new file mode 100644 index 000000000000..66bf6cd6be3c --- /dev/null +++ b/arch/mips/pci/pci-lantiq.h @@ -0,0 +1,18 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2010 John Crispin <blogic@openwrt.org> + */ + +#ifndef _LTQ_PCI_H__ +#define _LTQ_PCI_H__ + +extern __iomem void *ltq_pci_mapped_cfg; +extern int ltq_pci_read_config_dword(struct pci_bus *bus, + unsigned int devfn, int where, int size, u32 *val); +extern int ltq_pci_write_config_dword(struct pci_bus *bus, + unsigned int devfn, int where, int size, u32 val); + +#endif diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c new file mode 100644 index 000000000000..38fece16c435 --- /dev/null +++ b/arch/mips/pci/pci-xlr.c @@ -0,0 +1,214 @@ +/* + * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights + * reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the NetLogic + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/console.h> + +#include <asm/io.h> + +#include <asm/netlogic/interrupt.h> +#include <asm/netlogic/xlr/iomap.h> +#include <asm/netlogic/xlr/pic.h> +#include <asm/netlogic/xlr/xlr.h> + +static void *pci_config_base; + +#define pci_cfg_addr(bus, devfn, off) (((bus) << 16) | ((devfn) << 8) | (off)) + +/* PCI ops */ +static inline u32 pci_cfg_read_32bit(struct pci_bus *bus, unsigned int devfn, + int where) +{ + u32 data; + u32 *cfgaddr; + + cfgaddr = (u32 *)(pci_config_base + + pci_cfg_addr(bus->number, devfn, where & ~3)); + data = *cfgaddr; + return cpu_to_le32(data); +} + +static inline void pci_cfg_write_32bit(struct pci_bus *bus, unsigned int devfn, + int where, u32 data) +{ + u32 *cfgaddr; + + cfgaddr = (u32 *)(pci_config_base + + pci_cfg_addr(bus->number, devfn, where & ~3)); + *cfgaddr = cpu_to_le32(data); +} + +static int nlm_pcibios_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + u32 data; + + if ((size == 2) && (where & 1)) + return PCIBIOS_BAD_REGISTER_NUMBER; + else if ((size == 4) && (where & 3)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + data = pci_cfg_read_32bit(bus, devfn, where); + + if (size == 1) + *val = (data >> ((where & 3) << 3)) & 0xff; + else if (size == 2) + *val = (data >> ((where & 3) << 3)) & 0xffff; + else + *val = data; + + return PCIBIOS_SUCCESSFUL; +} + + +static int nlm_pcibios_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + u32 data; + + if ((size == 2) && (where & 1)) + return PCIBIOS_BAD_REGISTER_NUMBER; + else if ((size == 4) && (where & 3)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + data = pci_cfg_read_32bit(bus, devfn, where); + + if (size == 1) + data = (data & ~(0xff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + else if (size == 2) + data = (data & ~(0xffff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + else + data = val; + + pci_cfg_write_32bit(bus, devfn, where, data); + + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops nlm_pci_ops = { + .read = nlm_pcibios_read, + .write = nlm_pcibios_write +}; + +static struct resource nlm_pci_mem_resource = { + .name = "XLR PCI MEM", + .start = 0xd0000000UL, /* 256MB PCI mem @ 0xd000_0000 */ + .end = 0xdfffffffUL, + .flags = IORESOURCE_MEM, +}; + +static struct resource nlm_pci_io_resource = { + .name = "XLR IO MEM", + .start = 0x10000000UL, /* 16MB PCI IO @ 0x1000_0000 */ + .end = 0x100fffffUL, + .flags = IORESOURCE_IO, +}; + +struct pci_controller nlm_pci_controller = { + .index = 0, + .pci_ops = &nlm_pci_ops, + .mem_resource = &nlm_pci_mem_resource, + .mem_offset = 0x00000000UL, + .io_resource = &nlm_pci_io_resource, + .io_offset = 0x00000000UL, +}; + +int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + if (!nlm_chip_is_xls()) + return PIC_PCIX_IRQ; /* for XLR just one IRQ*/ + + /* + * For XLS PCIe, there is an IRQ per Link, find out which + * link the device is on to assign interrupts + */ + if (dev->bus->self == NULL) + return 0; + + switch (dev->bus->self->devfn) { + case 0x0: + return PIC_PCIE_LINK0_IRQ; + case 0x8: + return PIC_PCIE_LINK1_IRQ; + case 0x10: + if (nlm_chip_is_xls_b()) + return PIC_PCIE_XLSB0_LINK2_IRQ; + else + return PIC_PCIE_LINK2_IRQ; + case 0x18: + if (nlm_chip_is_xls_b()) + return PIC_PCIE_XLSB0_LINK3_IRQ; + else + return PIC_PCIE_LINK3_IRQ; + } + WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn); + return 0; +} + +/* Do platform specific device initialization at pci_enable_device() time */ +int pcibios_plat_dev_init(struct pci_dev *dev) +{ + return 0; +} + +static int __init pcibios_init(void) +{ + /* PSB assigns PCI resources */ + pci_probe_only = 1; + pci_config_base = ioremap(DEFAULT_PCI_CONFIG_BASE, 16 << 20); + + /* Extend IO port for memory mapped io */ + ioport_resource.start = 0; + ioport_resource.end = ~0; + + set_io_port_base(CKSEG1); + nlm_pci_controller.io_map_base = CKSEG1; + + pr_info("Registering XLR/XLS PCIX/PCIE Controller.\n"); + register_pci_controller(&nlm_pci_controller); + + return 0; +} + +arch_initcall(pcibios_init); + +struct pci_fixup pcibios_fixups[] = { + {0} +}; diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index efc9e889b349..2608752898c0 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -55,6 +55,8 @@ void titan_mailbox_irq(void) if (status & 0x2) smp_call_function_interrupt(); + if (status & 0x4) + scheduler_ipi(); break; case 1: @@ -63,6 +65,8 @@ void titan_mailbox_irq(void) if (status & 0x2) smp_call_function_interrupt(); + if (status & 0x4) + scheduler_ipi(); break; } } diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index 37de05d595e7..6c47dfeb7be3 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -185,7 +185,7 @@ int __init rb532_gpio_init(void) struct resource *r; r = rb532_gpio_reg0_res; - rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start); + rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r)); if (!rb532_gpio_chip->regbase) { printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 0a04603d577c..b18b04e48577 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void) #ifdef CONFIG_SMP if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) { LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ); + scheduler_ipi(); } else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) { LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ); + scheduler_ipi(); } else if (pend0 & (1UL << CPU_CALL_A_IRQ)) { LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ); smp_call_function_interrupt(); diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index a152538d3c97..ef74f3267f91 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -66,18 +66,7 @@ static int rt_next_event(unsigned long delta, struct clock_event_device *evt) static void rt_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { - switch (mode) { - case CLOCK_EVT_MODE_ONESHOT: - /* The only mode supported */ - break; - - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do */ - break; - } + /* Nothing to do ... */ } int rt_timer_irq; @@ -174,8 +163,7 @@ static void __init hub_rt_clocksource_init(void) { struct clocksource *cs = &hub_rt_clocksource; - clocksource_set_clock(cs, CYCLES_PER_SEC); - clocksource_register(cs); + clocksource_register_hz(cs, CYCLES_PER_SEC); } void __init plat_time_init(void) diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index 47b347c992ea..d667875be564 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -20,6 +20,7 @@ #include <linux/delay.h> #include <linux/smp.h> #include <linux/kernel_stat.h> +#include <linux/sched.h> #include <asm/mmu_context.h> #include <asm/io.h> @@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void) /* Clear the mailbox to clear the interrupt */ __raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]); - /* - * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the - * interrupt will do the reschedule for us - */ + if (action & SMP_RESCHEDULE_YOURSELF) + scheduler_ipi(); if (action & SMP_CALL_FUNCTION) smp_call_function_interrupt(); diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index c00a5cb1128d..38e7f6bd7922 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -21,6 +21,7 @@ #include <linux/interrupt.h> #include <linux/smp.h> #include <linux/kernel_stat.h> +#include <linux/sched.h> #include <asm/mmu_context.h> #include <asm/io.h> @@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void) /* Clear the mailbox to clear the interrupt */ ____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]); - /* - * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the - * interrupt will do the reschedule for us - */ + if (action & SMP_RESCHEDULE_YOURSELF) + scheduler_ipi(); if (action & SMP_CALL_FUNCTION) smp_call_function_interrupt(); diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c index 3dc19f482959..e9f95dcde379 100644 --- a/arch/mips/txx9/generic/setup_tx4939.c +++ b/arch/mips/txx9/generic/setup_tx4939.c @@ -318,19 +318,15 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask) } #if defined(CONFIG_TC35815) || defined(CONFIG_TC35815_MODULE) -static int tx4939_get_eth_speed(struct net_device *dev) +static u32 tx4939_get_eth_speed(struct net_device *dev) { - struct ethtool_cmd cmd = { ETHTOOL_GSET }; - int speed = 100; /* default 100Mbps */ - int err; - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) - return speed; - err = dev->ethtool_ops->get_settings(dev, &cmd); - if (err < 0) - return speed; - speed = cmd.speed == SPEED_100 ? 100 : 10; - return speed; + struct ethtool_cmd cmd; + if (dev_ethtool_get_settings(dev, &cmd)) + return 100; /* default 100Mbps */ + + return ethtool_cmd_speed(&cmd); } + static int tx4939_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -343,8 +339,7 @@ static int tx4939_netdev_event(struct notifier_block *this, else if (dev->irq == TXX9_IRQ_BASE + TX4939_IR_ETH(1)) bit = TX4939_PCFG_SPEED1; if (bit) { - int speed = tx4939_get_eth_speed(dev); - if (speed == 100) + if (tx4939_get_eth_speed(dev) == 100) txx9_set64(&tx4939_ccfgptr->pcfg, bit); else txx9_clear64(&tx4939_ccfgptr->pcfg, bit); diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index 226c826a2194..83fb27912231 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -494,14 +494,11 @@ void smp_send_stop(void) * @irq: The interrupt number. * @dev_id: The device ID. * - * We need do nothing here, since the scheduling will be effected on our way - * back through entry.S. - * * Returns IRQ_HANDLED to indicate we handled the interrupt successfully. */ static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) { - /* do nothing */ + scheduler_ipi(); return IRQ_HANDLED; } diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index d18328b3f938..da601dd34c05 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -3,6 +3,7 @@ #include <linux/mm.h> #include <linux/uaccess.h> +#include <asm/tlbflush.h> /* The usual comment is "Caches aren't brain-dead on the <architecture>". * Unfortunately, that doesn't apply to PA-RISC. */ @@ -112,8 +113,10 @@ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { - if (PageAnon(page)) + if (PageAnon(page)) { + flush_tlb_page(vma, vmaddr); flush_dcache_page_asm(page_to_phys(page), vmaddr); + } } #ifdef CONFIG_DEBUG_RODATA diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 5d7b8ce9fdf3..22dadeb58695 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -177,7 +177,10 @@ struct vm_area_struct; #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _PAGE_KERNEL (_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) +#define _PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED) +#define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXEC) +#define _PAGE_KERNEL_RWX (_PAGE_KERNEL_EXEC | _PAGE_WRITE) +#define _PAGE_KERNEL (_PAGE_KERNEL_RO | _PAGE_WRITE) /* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds * are page-aligned, we don't care about the PAGE_OFFSET bits, except @@ -208,7 +211,9 @@ struct vm_area_struct; #define PAGE_COPY PAGE_EXECREAD #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) -#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RWX __pgprot(_PAGE_KERNEL_RWX) +#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) #define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) #define PAGE_GATEWAY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ) diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 3eb82c2a5ec3..9cbc2c3bf630 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -814,8 +814,14 @@ #define __NR_recvmmsg (__NR_Linux + 319) #define __NR_accept4 (__NR_Linux + 320) #define __NR_prlimit64 (__NR_Linux + 321) - -#define __NR_Linux_syscalls (__NR_prlimit64 + 1) +#define __NR_fanotify_init (__NR_Linux + 322) +#define __NR_fanotify_mark (__NR_Linux + 323) +#define __NR_clock_adjtime (__NR_Linux + 324) +#define __NR_name_to_handle_at (__NR_Linux + 325) +#define __NR_open_by_handle_at (__NR_Linux + 326) +#define __NR_syncfs (__NR_Linux + 327) + +#define __NR_Linux_syscalls (__NR_syncfs + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 3f11331c2775..83335f3da5fc 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -304,10 +304,20 @@ void flush_dcache_page(struct page *page) offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; addr = mpnt->vm_start + offset; + /* The TLB is the engine of coherence on parisc: The + * CPU is entitled to speculate any page with a TLB + * mapping, so here we kill the mapping then flush the + * page along a special flush only alias mapping. + * This guarantees that the page is no-longer in the + * cache for any process and nor may it be + * speculatively read in (until the user or kernel + * specifically accesses it, of course) */ + + flush_tlb_page(mpnt, addr); if (old_addr == 0 || (old_addr & (SHMLBA - 1)) != (addr & (SHMLBA - 1))) { __flush_cache_page(mpnt, addr, page_to_phys(page)); if (old_addr) - printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? mpnt->vm_file->f_path.dentry->d_name.name : "(null)"); + printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? (char *)mpnt->vm_file->f_path.dentry->d_name.name : "(null)"); old_addr = addr; } } @@ -499,6 +509,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long { BUG_ON(!vma->vm_mm->context); + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, page_to_phys(pfn_to_page(pfn))); } diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index ead8d2a1034c..6f0594439143 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -692,6 +692,9 @@ ENTRY(fault_vector_11) END(fault_vector_11) #endif + /* Fault vector is separately protected and *must* be on its own page */ + .align PAGE_SIZE +ENTRY(end_fault_vector) .import handle_interruption,code .import do_cpu_irq_mask,code diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index 145c5e4caaa0..37aabd772fbb 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -106,8 +106,9 @@ $bss_loop: #endif - /* Now initialize the PTEs themselves */ - ldo 0+_PAGE_KERNEL(%r0),%r3 /* Hardwired 0 phys addr start */ + /* Now initialize the PTEs themselves. We use RWX for + * everything ... it will get remapped correctly later */ + ldo 0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */ ldi (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */ load32 PA(pg0),%r1 diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 6e81bb596e5b..cedbbb8b18d9 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -61,8 +61,10 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/bug.h> +#include <linux/mm.h> #include <linux/slab.h> +#include <asm/pgtable.h> #include <asm/unwind.h> #if 0 @@ -214,7 +216,13 @@ void *module_alloc(unsigned long size) { if (size == 0) return NULL; - return vmalloc(size); + /* using RWX means less protection for modules, but it's + * easier than trying to map the text, data, init_text and + * init_data correctly */ + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_HIGHMEM, + PAGE_KERNEL_RWX, -1, + __builtin_return_address(0)); } #ifndef CONFIG_64BIT diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index a85823668cba..93ff3d90edd1 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -817,10 +817,7 @@ ENTRY(purge_kernel_dcache_page) .procend ENDPROC(purge_kernel_dcache_page) - - .export flush_user_dcache_range_asm - -flush_user_dcache_range_asm: +ENTRY(flush_user_dcache_range_asm) .proc .callinfo NO_CALLS .entry @@ -839,6 +836,7 @@ flush_user_dcache_range_asm: .exit .procend +ENDPROC(flush_user_dcache_range_asm) ENTRY(flush_kernel_dcache_range_asm) .proc diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 69d63d354ef0..828305f19cff 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id) case IPI_RESCHEDULE: smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu); - /* - * Reschedule callback. Everything to be - * done is done by the interrupt return path. - */ + scheduler_ipi(); break; case IPI_CALL_FUNC: diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 88a0ad14a9c9..dc9a62462323 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -228,3 +228,11 @@ asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, ((loff_t)lenhi << 32) | lenlo); } + +asmlinkage long compat_sys_fanotify_mark(int fan_fd, int flags, u32 mask_hi, + u32 mask_lo, int fd, + const char __user *pathname) +{ + return sys_fanotify_mark(fan_fd, flags, ((u64)mask_hi << 32) | mask_lo, + fd, pathname); +} diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 4be85ee10b85..a5b02ce4d41e 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -420,6 +420,12 @@ ENTRY_COMP(recvmmsg) ENTRY_SAME(accept4) /* 320 */ ENTRY_SAME(prlimit64) + ENTRY_SAME(fanotify_init) + ENTRY_COMP(fanotify_mark) + ENTRY_COMP(clock_adjtime) + ENTRY_SAME(name_to_handle_at) /* 325 */ + ENTRY_COMP(open_by_handle_at) + ENTRY_SAME(syncfs) /* Nothing yet */ diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 8f1e4efd143e..e1a55849bfa7 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -69,6 +69,9 @@ SECTIONS /* End of text section */ _etext = .; + /* Start of data section */ + _sdata = .; + RODATA /* writeable */ @@ -134,6 +137,7 @@ SECTIONS . = ALIGN(16384); __init_begin = .; INIT_TEXT_SECTION(16384) + . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(16) /* we have to discard exit text and such at runtime, not link time */ .exit.text : diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index b1d126258dee..5fa1e273006e 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -371,24 +371,158 @@ static void __init setup_bootmem(void) request_resource(&sysram_resources[0], &pdcdata_resource); } +static void __init map_pages(unsigned long start_vaddr, + unsigned long start_paddr, unsigned long size, + pgprot_t pgprot, int force) +{ + pgd_t *pg_dir; + pmd_t *pmd; + pte_t *pg_table; + unsigned long end_paddr; + unsigned long start_pmd; + unsigned long start_pte; + unsigned long tmp1; + unsigned long tmp2; + unsigned long address; + unsigned long vaddr; + unsigned long ro_start; + unsigned long ro_end; + unsigned long fv_addr; + unsigned long gw_addr; + extern const unsigned long fault_vector_20; + extern void * const linux_gateway_page; + + ro_start = __pa((unsigned long)_text); + ro_end = __pa((unsigned long)&data_start); + fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; + gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; + + end_paddr = start_paddr + size; + + pg_dir = pgd_offset_k(start_vaddr); + +#if PTRS_PER_PMD == 1 + start_pmd = 0; +#else + start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); +#endif + start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); + + address = start_paddr; + vaddr = start_vaddr; + while (address < end_paddr) { +#if PTRS_PER_PMD == 1 + pmd = (pmd_t *)__pa(pg_dir); +#else + pmd = (pmd_t *)pgd_address(*pg_dir); + + /* + * pmd is physical at this point + */ + + if (!pmd) { + pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE << PMD_ORDER); + pmd = (pmd_t *) __pa(pmd); + } + + pgd_populate(NULL, pg_dir, __va(pmd)); +#endif + pg_dir++; + + /* now change pmd to kernel virtual addresses */ + + pmd = (pmd_t *)__va(pmd) + start_pmd; + for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++, pmd++) { + + /* + * pg_table is physical at this point + */ + + pg_table = (pte_t *)pmd_address(*pmd); + if (!pg_table) { + pg_table = (pte_t *) + alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE); + pg_table = (pte_t *) __pa(pg_table); + } + + pmd_populate_kernel(NULL, pmd, __va(pg_table)); + + /* now change pg_table to kernel virtual addresses */ + + pg_table = (pte_t *) __va(pg_table) + start_pte; + for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) { + pte_t pte; + + /* + * Map the fault vector writable so we can + * write the HPMC checksum. + */ + if (force) + pte = __mk_pte(address, pgprot); + else if (core_kernel_text(vaddr) && + address != fv_addr) + pte = __mk_pte(address, PAGE_KERNEL_EXEC); + else +#if defined(CONFIG_PARISC_PAGE_SIZE_4KB) + if (address >= ro_start && address < ro_end + && address != fv_addr + && address != gw_addr) + pte = __mk_pte(address, PAGE_KERNEL_RO); + else +#endif + pte = __mk_pte(address, pgprot); + + if (address >= end_paddr) { + if (force) + break; + else + pte_val(pte) = 0; + } + + set_pte(pg_table, pte); + + address += PAGE_SIZE; + vaddr += PAGE_SIZE; + } + start_pte = 0; + + if (address >= end_paddr) + break; + } + start_pmd = 0; + } +} + void free_initmem(void) { unsigned long addr; unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; -#ifdef CONFIG_DEBUG_KERNEL + /* The init text pages are marked R-X. We have to + * flush the icache and mark them RW- + * + * This is tricky, because map_pages is in the init section. + * Do a dummy remap of the data section first (the data + * section is already PAGE_KERNEL) to pull in the TLB entries + * for map_kernel */ + map_pages(init_begin, __pa(init_begin), init_end - init_begin, + PAGE_KERNEL_RWX, 1); + /* now remap at PAGE_KERNEL since the TLB is pre-primed to execute + * map_pages */ + map_pages(init_begin, __pa(init_begin), init_end - init_begin, + PAGE_KERNEL, 1); + + /* force the kernel to see the new TLB entries */ + __flush_tlb_range(0, init_begin, init_end); /* Attempt to catch anyone trying to execute code here * by filling the page with BRK insns. */ memset((void *)init_begin, 0x00, init_end - init_begin); + /* finally dump all the instructions which were cached, since the + * pages are no-longer executable */ flush_icache_range(init_begin, init_end); -#endif - /* align __init_begin and __init_end to page size, - ignoring linker script where we might have tried to save RAM */ - init_begin = PAGE_ALIGN(init_begin); - init_end = PAGE_ALIGN(init_end); for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); @@ -618,114 +752,6 @@ void show_mem(unsigned int filter) #endif } - -static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot) -{ - pgd_t *pg_dir; - pmd_t *pmd; - pte_t *pg_table; - unsigned long end_paddr; - unsigned long start_pmd; - unsigned long start_pte; - unsigned long tmp1; - unsigned long tmp2; - unsigned long address; - unsigned long ro_start; - unsigned long ro_end; - unsigned long fv_addr; - unsigned long gw_addr; - extern const unsigned long fault_vector_20; - extern void * const linux_gateway_page; - - ro_start = __pa((unsigned long)_text); - ro_end = __pa((unsigned long)&data_start); - fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; - gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; - - end_paddr = start_paddr + size; - - pg_dir = pgd_offset_k(start_vaddr); - -#if PTRS_PER_PMD == 1 - start_pmd = 0; -#else - start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); -#endif - start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); - - address = start_paddr; - while (address < end_paddr) { -#if PTRS_PER_PMD == 1 - pmd = (pmd_t *)__pa(pg_dir); -#else - pmd = (pmd_t *)pgd_address(*pg_dir); - - /* - * pmd is physical at this point - */ - - if (!pmd) { - pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE << PMD_ORDER); - pmd = (pmd_t *) __pa(pmd); - } - - pgd_populate(NULL, pg_dir, __va(pmd)); -#endif - pg_dir++; - - /* now change pmd to kernel virtual addresses */ - - pmd = (pmd_t *)__va(pmd) + start_pmd; - for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++,pmd++) { - - /* - * pg_table is physical at this point - */ - - pg_table = (pte_t *)pmd_address(*pmd); - if (!pg_table) { - pg_table = (pte_t *) - alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE); - pg_table = (pte_t *) __pa(pg_table); - } - - pmd_populate_kernel(NULL, pmd, __va(pg_table)); - - /* now change pg_table to kernel virtual addresses */ - - pg_table = (pte_t *) __va(pg_table) + start_pte; - for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++,pg_table++) { - pte_t pte; - - /* - * Map the fault vector writable so we can - * write the HPMC checksum. - */ -#if defined(CONFIG_PARISC_PAGE_SIZE_4KB) - if (address >= ro_start && address < ro_end - && address != fv_addr - && address != gw_addr) - pte = __mk_pte(address, PAGE_KERNEL_RO); - else -#endif - pte = __mk_pte(address, pgprot); - - if (address >= end_paddr) - pte_val(pte) = 0; - - set_pte(pg_table, pte); - - address += PAGE_SIZE; - } - start_pte = 0; - - if (address >= end_paddr) - break; - } - start_pmd = 0; - } -} - /* * pagetable_init() sets up the page tables * @@ -750,14 +776,14 @@ static void __init pagetable_init(void) size = pmem_ranges[range].pages << PAGE_SHIFT; map_pages((unsigned long)__va(start_paddr), start_paddr, - size, PAGE_KERNEL); + size, PAGE_KERNEL, 0); } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_end && initrd_end > mem_limit) { printk(KERN_INFO "initrd: mapping %08lx-%08lx\n", initrd_start, initrd_end); map_pages(initrd_start, __pa(initrd_start), - initrd_end - initrd_start, PAGE_KERNEL); + initrd_end - initrd_start, PAGE_KERNEL, 0); } #endif @@ -782,7 +808,7 @@ static void __init gateway_init(void) */ map_pages(linux_gateway_page_addr, __pa(&linux_gateway_page), - PAGE_SIZE, PAGE_GATEWAY); + PAGE_SIZE, PAGE_GATEWAY, 1); } #ifdef CONFIG_HPUX diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8f4d50b0adfa..a3128ca0fe11 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -193,6 +193,12 @@ config SYS_SUPPORTS_APM_EMULATION default y if PMAC_APM_EMU bool +config EPAPR_BOOT + bool + help + Used to allow a board to specify it wants an ePAPR compliant wrapper. + default n + config DEFAULT_UIMAGE bool help diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 2d38a50e66ba..a597dd77b903 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -267,6 +267,11 @@ config PPC_EARLY_DEBUG_USBGECKO Select this to enable early debugging for Nintendo GameCube/Wii consoles via an external USB Gecko adapter. +config PPC_EARLY_DEBUG_WSP + bool "Early debugging via WSP's internal UART" + depends on PPC_WSP + select PPC_UDBG_16550 + endchoice config PPC_EARLY_DEBUG_44x_PHYSLOW diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 89178164af5e..c26200b40a47 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -69,7 +69,8 @@ src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \ cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \ fsl-soc.c mpc8xx.c pq2.c ugecon.c src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \ - cuboot-ebony.c cuboot-hotfoot.c treeboot-ebony.c prpmc2800.c \ + cuboot-ebony.c cuboot-hotfoot.c epapr.c treeboot-ebony.c \ + prpmc2800.c \ ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \ cuboot-pq2.c cuboot-sequoia.c treeboot-walnut.c \ cuboot-bamboo.c cuboot-mpc7448hpc2.c cuboot-taishan.c \ @@ -127,7 +128,7 @@ quiet_cmd_bootas = BOOTAS $@ cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< quiet_cmd_bootar = BOOTAR $@ - cmd_bootar = $(CROSS32AR) -cr $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ + cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE $(call if_changed_dep,bootcc) @@ -182,6 +183,7 @@ image-$(CONFIG_PPC_HOLLY) += dtbImage.holly image-$(CONFIG_PPC_PRPMC2800) += dtbImage.prpmc2800 image-$(CONFIG_PPC_ISERIES) += zImage.iseries image-$(CONFIG_DEFAULT_UIMAGE) += uImage +image-$(CONFIG_EPAPR_BOOT) += zImage.epapr # # Targets which embed a device tree blob diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index f1c4dfc635be..0f7428a37efb 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -6,16 +6,28 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * NOTE: this code runs in 32 bit mode and is packaged as ELF32. + * NOTE: this code runs in 32 bit mode, is position-independent, + * and is packaged as ELF32. */ #include "ppc_asm.h" .text - /* a procedure descriptor used when booting this as a COFF file */ + /* A procedure descriptor used when booting this as a COFF file. + * When making COFF, this comes first in the link and we're + * linked at 0x500000. + */ .globl _zimage_start_opd _zimage_start_opd: - .long _zimage_start, 0, 0, 0 + .long 0x500000, 0, 0, 0 + +p_start: .long _start +p_etext: .long _etext +p_bss_start: .long __bss_start +p_end: .long _end + + .weak _platform_stack_top +p_pstack: .long _platform_stack_top .weak _zimage_start .globl _zimage_start @@ -24,37 +36,65 @@ _zimage_start: _zimage_start_lib: /* Work out the offset between the address we were linked at and the address where we're running. */ - bl 1f -1: mflr r0 - lis r9,1b@ha - addi r9,r9,1b@l - subf. r0,r9,r0 - beq 3f /* if running at same address as linked */ + bl .+4 +p_base: mflr r10 /* r10 now points to runtime addr of p_base */ + /* grab the link address of the dynamic section in r11 */ + addis r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha + lwz r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11) + cmpwi r11,0 + beq 3f /* if not linked -pie */ + /* get the runtime address of the dynamic section in r12 */ + .weak __dynamic_start + addis r12,r10,(__dynamic_start-p_base)@ha + addi r12,r12,(__dynamic_start-p_base)@l + subf r11,r11,r12 /* runtime - linktime offset */ + + /* The dynamic section contains a series of tagged entries. + * We need the RELA and RELACOUNT entries. */ +RELA = 7 +RELACOUNT = 0x6ffffff9 + li r9,0 + li r0,0 +9: lwz r8,0(r12) /* get tag */ + cmpwi r8,0 + beq 10f /* end of list */ + cmpwi r8,RELA + bne 11f + lwz r9,4(r12) /* get RELA pointer in r9 */ + b 12f +11: addis r8,r8,(-RELACOUNT)@ha + cmpwi r8,RELACOUNT@l + bne 12f + lwz r0,4(r12) /* get RELACOUNT value in r0 */ +12: addi r12,r12,8 + b 9b - /* The .got2 section contains a list of addresses, so add - the address offset onto each entry. */ - lis r9,__got2_start@ha - addi r9,r9,__got2_start@l - lis r8,__got2_end@ha - addi r8,r8,__got2_end@l - subf. r8,r9,r8 + /* The relocation section contains a list of relocations. + * We now do the R_PPC_RELATIVE ones, which point to words + * which need to be initialized with addend + offset. + * The R_PPC_RELATIVE ones come first and there are RELACOUNT + * of them. */ +10: /* skip relocation if we don't have both */ + cmpwi r0,0 beq 3f - srwi. r8,r8,2 - mtctr r8 - add r9,r0,r9 -2: lwz r8,0(r9) - add r8,r8,r0 - stw r8,0(r9) - addi r9,r9,4 + cmpwi r9,0 + beq 3f + + add r9,r9,r11 /* Relocate RELA pointer */ + mtctr r0 +2: lbz r0,4+3(r9) /* ELF32_R_INFO(reloc->r_info) */ + cmpwi r0,22 /* R_PPC_RELATIVE */ + bne 3f + lwz r12,0(r9) /* reloc->r_offset */ + lwz r0,8(r9) /* reloc->r_addend */ + add r0,r0,r11 + stwx r0,r11,r12 + addi r9,r9,12 bdnz 2b /* Do a cache flush for our text, in case the loader didn't */ -3: lis r9,_start@ha - addi r9,r9,_start@l - add r9,r0,r9 - lis r8,_etext@ha - addi r8,r8,_etext@l - add r8,r0,r8 +3: lwz r9,p_start-p_base(r10) /* note: these are relocated now */ + lwz r8,p_etext-p_base(r10) 4: dcbf r0,r9 icbi r0,r9 addi r9,r9,0x20 @@ -64,27 +104,19 @@ _zimage_start_lib: isync /* Clear the BSS */ - lis r9,__bss_start@ha - addi r9,r9,__bss_start@l - add r9,r0,r9 - lis r8,_end@ha - addi r8,r8,_end@l - add r8,r0,r8 - li r10,0 -5: stw r10,0(r9) + lwz r9,p_bss_start-p_base(r10) + lwz r8,p_end-p_base(r10) + li r0,0 +5: stw r0,0(r9) addi r9,r9,4 cmplw cr0,r9,r8 blt 5b /* Possibly set up a custom stack */ -.weak _platform_stack_top - lis r8,_platform_stack_top@ha - addi r8,r8,_platform_stack_top@l + lwz r8,p_pstack-p_base(r10) cmpwi r8,0 beq 6f - add r8,r0,r8 lwz r1,0(r8) - add r1,r0,r1 li r0,0 stwu r0,-16(r1) /* establish a stack frame */ 6: diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts index e0668f877794..d6a8ae458137 100644 --- a/arch/powerpc/boot/dts/p1020rdb.dts +++ b/arch/powerpc/boot/dts/p1020rdb.dts @@ -9,12 +9,11 @@ * option) any later version. */ -/dts-v1/; +/include/ "p1020si.dtsi" + / { - model = "fsl,P1020"; + model = "fsl,P1020RDB"; compatible = "fsl,P1020RDB"; - #address-cells = <2>; - #size-cells = <2>; aliases { serial0 = &serial0; @@ -26,34 +25,11 @@ pci1 = &pci1; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P1020@0 { - device_type = "cpu"; - reg = <0x0>; - next-level-cache = <&L2>; - }; - - PowerPC,P1020@1 { - device_type = "cpu"; - reg = <0x1>; - next-level-cache = <&L2>; - }; - }; - memory { device_type = "memory"; }; localbus@ffe05000 { - #address-cells = <2>; - #size-cells = <1>; - compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus"; - reg = <0 0xffe05000 0 0x1000>; - interrupts = <19 2>; - interrupt-parent = <&mpic>; /* NOR, NAND Flashes and Vitesse 5 port L2 switch */ ranges = <0x0 0x0 0x0 0xef000000 0x01000000 @@ -165,88 +141,14 @@ }; soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p1020-immr", "simple-bus"; - ranges = <0x0 0x0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <12>; - }; - - ecm@1000 { - compatible = "fsl,p1020-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <16 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,p1020-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <16 2>; - }; - i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; rtc@68 { compatible = "dallas,ds1339"; reg = <0x68>; }; }; - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4500 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - - serial1: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4600 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - spi@7000 { - cell-index = <0>; - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,espi"; - reg = <0x7000 0x1000>; - interrupts = <59 0x2>; - interrupt-parent = <&mpic>; - mode = "cpu"; fsl_m25p80@0 { #address-cells = <1>; @@ -294,66 +196,7 @@ }; }; - gpio: gpio-controller@f000 { - #gpio-cells = <2>; - compatible = "fsl,mpc8572-gpio"; - reg = <0xf000 0x100>; - interrupts = <47 0x2>; - interrupt-parent = <&mpic>; - gpio-controller; - }; - - L2: l2-cache-controller@20000 { - compatible = "fsl,p1020-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <32>; // 32 bytes - cache-size = <0x40000>; // L2,256K - interrupt-parent = <&mpic>; - interrupts = <16 2>; - }; - - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <23 2>; - }; - }; - mdio@24000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,etsec2-mdio"; - reg = <0x24000 0x1000 0xb0030 0x4>; phy0: ethernet-phy@0 { interrupt-parent = <&mpic>; @@ -369,10 +212,6 @@ }; mdio@25000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,etsec2-tbi"; - reg = <0x25000 0x1000 0xb1030 0x4>; tbi0: tbi-phy@11 { reg = <0x11>; @@ -381,97 +220,25 @@ }; enet0: ethernet@b0000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "fsl,etsec2"; - fsl,num_rx_queues = <0x8>; - fsl,num_tx_queues = <0x8>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupt-parent = <&mpic>; fixed-link = <1 1 1000 0 0>; phy-connection-type = "rgmii-id"; - queue-group@0 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb0000 0x1000>; - interrupts = <29 2 30 2 34 2>; - }; - - queue-group@1 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb4000 0x1000>; - interrupts = <17 2 18 2 24 2>; - }; }; enet1: ethernet@b1000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "fsl,etsec2"; - fsl,num_rx_queues = <0x8>; - fsl,num_tx_queues = <0x8>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupt-parent = <&mpic>; phy-handle = <&phy0>; tbi-handle = <&tbi0>; phy-connection-type = "sgmii"; - queue-group@0 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb1000 0x1000>; - interrupts = <35 2 36 2 40 2>; - }; - - queue-group@1 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb5000 0x1000>; - interrupts = <51 2 52 2 67 2>; - }; }; enet2: ethernet@b2000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "fsl,etsec2"; - fsl,num_rx_queues = <0x8>; - fsl,num_tx_queues = <0x8>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupt-parent = <&mpic>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; - queue-group@0 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb2000 0x1000>; - interrupts = <31 2 32 2 33 2>; - }; - - queue-group@1 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0xb6000 0x1000>; - interrupts = <25 2 26 2 27 2>; - }; }; usb@22000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x22000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <28 0x2>; phy_type = "ulpi"; }; @@ -481,82 +248,23 @@ it enables USB2. OTOH, U-Boot does create a new node when there isn't any. So, just comment it out. usb@23000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x23000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <46 0x2>; phy_type = "ulpi"; }; */ - sdhci@2e000 { - compatible = "fsl,p1020-esdhc", "fsl,esdhc"; - reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; - interrupt-parent = <&mpic>; - /* Filled in by U-Boot */ - clock-frequency = <0>; - }; - - crypto@30000 { - compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", - "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xbfe>; - fsl,descriptor-types-mask = <0x3ab0ebf>; - }; - - mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; - }; - - msi@41600 { - compatible = "fsl,p1020-msi", "fsl,mpic-msi"; - reg = <0x41600 0x80>; - msi-available-ranges = <0 0x100>; - interrupts = < - 0xe0 0 - 0xe1 0 - 0xe2 0 - 0xe3 0 - 0xe4 0 - 0xe5 0 - 0xe6 0 - 0xe7 0>; - interrupt-parent = <&mpic>; - }; - - global-utilities@e0000 { //global utilities block - compatible = "fsl,p1020-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; - }; }; pci0: pcie@ffe09000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe09000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <16 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x1 + 0000 0x0 0x0 0x2 &mpic 0x5 0x1 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; @@ -573,18 +281,16 @@ }; pci1: pcie@ffe0a000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe0a000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <16 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts new file mode 100644 index 000000000000..f0bf7f42f097 --- /dev/null +++ b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts @@ -0,0 +1,213 @@ +/* + * P1020 RDB Core0 Device Tree Source in CAMP mode. + * + * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache + * can be shared, all the other devices must be assigned to one core only. + * This dts file allows core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb, + * eth1, eth2, sdhc, crypto, global-util, message, pci0, pci1, msi. + * + * Please note to add "-b 0" for core0's dts compiling. + * + * Copyright 2011 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "p1020si.dtsi" + +/ { + model = "fsl,P1020RDB"; + compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP"; + + aliases { + ethernet1 = &enet1; + ethernet2 = &enet2; + serial0 = &serial0; + pci0 = &pci0; + pci1 = &pci1; + }; + + cpus { + PowerPC,P1020@1 { + status = "disabled"; + }; + }; + + memory { + device_type = "memory"; + }; + + localbus@ffe05000 { + status = "disabled"; + }; + + soc@ffe00000 { + i2c@3000 { + rtc@68 { + compatible = "dallas,ds1339"; + reg = <0x68>; + }; + }; + + serial1: serial@4600 { + status = "disabled"; + }; + + spi@7000 { + fsl_m25p80@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,espi-flash"; + reg = <0>; + linux,modalias = "fsl_m25p80"; + spi-max-frequency = <40000000>; + + partition@0 { + /* 512KB for u-boot Bootloader Image */ + reg = <0x0 0x00080000>; + label = "SPI (RO) U-Boot Image"; + read-only; + }; + + partition@80000 { + /* 512KB for DTB Image */ + reg = <0x00080000 0x00080000>; + label = "SPI (RO) DTB Image"; + read-only; + }; + + partition@100000 { + /* 4MB for Linux Kernel Image */ + reg = <0x00100000 0x00400000>; + label = "SPI (RO) Linux Kernel Image"; + read-only; + }; + + partition@500000 { + /* 4MB for Compressed RFS Image */ + reg = <0x00500000 0x00400000>; + label = "SPI (RO) Compressed RFS Image"; + read-only; + }; + + partition@900000 { + /* 7MB for JFFS2 based RFS */ + reg = <0x00900000 0x00700000>; + label = "SPI (RW) JFFS2 RFS"; + }; + }; + }; + + mdio@24000 { + phy0: ethernet-phy@0 { + interrupt-parent = <&mpic>; + interrupts = <3 1>; + reg = <0x0>; + }; + phy1: ethernet-phy@1 { + interrupt-parent = <&mpic>; + interrupts = <2 1>; + reg = <0x1>; + }; + }; + + mdio@25000 { + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + enet0: ethernet@b0000 { + status = "disabled"; + }; + + enet1: ethernet@b1000 { + phy-handle = <&phy0>; + tbi-handle = <&tbi0>; + phy-connection-type = "sgmii"; + }; + + enet2: ethernet@b2000 { + phy-handle = <&phy1>; + phy-connection-type = "rgmii-id"; + }; + + usb@22000 { + phy_type = "ulpi"; + }; + + /* USB2 is shared with localbus, so it must be disabled + by default. We can't put 'status = "disabled";' here + since U-Boot doesn't clear the status property when + it enables USB2. OTOH, U-Boot does create a new node + when there isn't any. So, just comment it out. + usb@23000 { + phy_type = "ulpi"; + }; + */ + + mpic: pic@40000 { + protected-sources = < + 42 29 30 34 /* serial1, enet0-queue-group0 */ + 17 18 24 45 /* enet0-queue-group1, crypto */ + >; + }; + + }; + + pci0: pcie@ffe09000 { + ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x1 + 0000 0x0 0x0 0x2 &mpic 0x5 0x1 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 + >; + pcie@0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + ranges = <0x2000000 0x0 0xa0000000 + 0x2000000 0x0 0xa0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x100000>; + }; + }; + + pci1: pcie@ffe0a000 { + ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; + pcie@0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + ranges = <0x2000000 0x0 0x80000000 + 0x2000000 0x0 0x80000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x100000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts new file mode 100644 index 000000000000..6ec02204a44e --- /dev/null +++ b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts @@ -0,0 +1,148 @@ +/* + * P1020 RDB Core1 Device Tree Source in CAMP mode. + * + * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache + * can be shared, all the other devices must be assigned to one core only. + * This dts allows core1 to have l2, eth0, crypto. + * + * Please note to add "-b 1" for core1's dts compiling. + * + * Copyright 2011 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "p1020si.dtsi" + +/ { + model = "fsl,P1020RDB"; + compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP"; + + aliases { + ethernet0 = &enet0; + serial0 = &serial1; + }; + + cpus { + PowerPC,P1020@0 { + status = "disabled"; + }; + }; + + memory { + device_type = "memory"; + }; + + localbus@ffe05000 { + status = "disabled"; + }; + + soc@ffe00000 { + ecm-law@0 { + status = "disabled"; + }; + + ecm@1000 { + status = "disabled"; + }; + + memory-controller@2000 { + status = "disabled"; + }; + + i2c@3000 { + status = "disabled"; + }; + + i2c@3100 { + status = "disabled"; + }; + + serial0: serial@4500 { + status = "disabled"; + }; + + spi@7000 { + status = "disabled"; + }; + + gpio: gpio-controller@f000 { + status = "disabled"; + }; + + dma@21300 { + status = "disabled"; + }; + + mdio@24000 { + status = "disabled"; + }; + + mdio@25000 { + status = "disabled"; + }; + + enet0: ethernet@b0000 { + fixed-link = <1 1 1000 0 0>; + phy-connection-type = "rgmii-id"; + + }; + + enet1: ethernet@b1000 { + status = "disabled"; + }; + + enet2: ethernet@b2000 { + status = "disabled"; + }; + + usb@22000 { + status = "disabled"; + }; + + sdhci@2e000 { + status = "disabled"; + }; + + mpic: pic@40000 { + protected-sources = < + 16 /* ecm, mem, L2, pci0, pci1 */ + 43 42 59 /* i2c, serial0, spi */ + 47 63 62 /* gpio, tdm */ + 20 21 22 23 /* dma */ + 03 02 /* mdio */ + 35 36 40 /* enet1-queue-group0 */ + 51 52 67 /* enet1-queue-group1 */ + 31 32 33 /* enet2-queue-group0 */ + 25 26 27 /* enet2-queue-group1 */ + 28 72 58 /* usb, sdhci, crypto */ + 0xb0 0xb1 0xb2 /* message */ + 0xb3 0xb4 0xb5 + 0xb6 0xb7 + 0xe0 0xe1 0xe2 /* msi */ + 0xe3 0xe4 0xe5 + 0xe6 0xe7 /* sdhci, crypto , pci */ + >; + }; + + msi@41600 { + status = "disabled"; + }; + + global-utilities@e0000 { //global utilities block + status = "disabled"; + }; + + }; + + pci0: pcie@ffe09000 { + status = "disabled"; + }; + + pci1: pcie@ffe0a000 { + status = "disabled"; + }; +}; diff --git a/arch/powerpc/boot/dts/p1020si.dtsi b/arch/powerpc/boot/dts/p1020si.dtsi new file mode 100644 index 000000000000..5c5acb66c3fc --- /dev/null +++ b/arch/powerpc/boot/dts/p1020si.dtsi @@ -0,0 +1,377 @@ +/* + * P1020si Device Tree Source + * + * Copyright 2011 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; +/ { + compatible = "fsl,P1020"; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + PowerPC,P1020@0 { + device_type = "cpu"; + reg = <0x0>; + next-level-cache = <&L2>; + }; + + PowerPC,P1020@1 { + device_type = "cpu"; + reg = <0x1>; + next-level-cache = <&L2>; + }; + }; + + localbus@ffe05000 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus"; + reg = <0 0xffe05000 0 0x1000>; + interrupts = <19 2>; + interrupt-parent = <&mpic>; + }; + + soc@ffe00000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "fsl,p1020-immr", "simple-bus"; + ranges = <0x0 0x0 0xffe00000 0x100000>; + bus-frequency = <0>; // Filled out by uboot. + + ecm-law@0 { + compatible = "fsl,ecm-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <12>; + }; + + ecm@1000 { + compatible = "fsl,p1020-ecm", "fsl,ecm"; + reg = <0x1000 0x1000>; + interrupts = <16 2>; + interrupt-parent = <&mpic>; + }; + + memory-controller@2000 { + compatible = "fsl,p1020-memory-controller"; + reg = <0x2000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + i2c@3000 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <0>; + compatible = "fsl-i2c"; + reg = <0x3000 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + i2c@3100 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <1>; + compatible = "fsl-i2c"; + reg = <0x3100 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + serial0: serial@4500 { + cell-index = <0>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4500 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + serial1: serial@4600 { + cell-index = <1>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4600 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + spi@7000 { + cell-index = <0>; + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,espi"; + reg = <0x7000 0x1000>; + interrupts = <59 0x2>; + interrupt-parent = <&mpic>; + mode = "cpu"; + }; + + gpio: gpio-controller@f000 { + #gpio-cells = <2>; + compatible = "fsl,mpc8572-gpio"; + reg = <0xf000 0x100>; + interrupts = <47 0x2>; + interrupt-parent = <&mpic>; + gpio-controller; + }; + + L2: l2-cache-controller@20000 { + compatible = "fsl,p1020-l2-cache-controller"; + reg = <0x20000 0x1000>; + cache-line-size = <32>; // 32 bytes + cache-size = <0x40000>; // L2,256K + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + dma@21300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,eloplus-dma"; + reg = <0x21300 0x4>; + ranges = <0x0 0x21100 0x200>; + cell-index = <0>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + cell-index = <0>; + interrupt-parent = <&mpic>; + interrupts = <20 2>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupt-parent = <&mpic>; + interrupts = <21 2>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupt-parent = <&mpic>; + interrupts = <22 2>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupt-parent = <&mpic>; + interrupts = <23 2>; + }; + }; + + mdio@24000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,etsec2-mdio"; + reg = <0x24000 0x1000 0xb0030 0x4>; + + }; + + mdio@25000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,etsec2-tbi"; + reg = <0x25000 0x1000 0xb1030 0x4>; + + }; + + enet0: ethernet@b0000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "fsl,etsec2"; + fsl,num_rx_queues = <0x8>; + fsl,num_tx_queues = <0x8>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupt-parent = <&mpic>; + + queue-group@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb0000 0x1000>; + interrupts = <29 2 30 2 34 2>; + }; + + queue-group@1 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb4000 0x1000>; + interrupts = <17 2 18 2 24 2>; + }; + }; + + enet1: ethernet@b1000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "fsl,etsec2"; + fsl,num_rx_queues = <0x8>; + fsl,num_tx_queues = <0x8>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupt-parent = <&mpic>; + + queue-group@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb1000 0x1000>; + interrupts = <35 2 36 2 40 2>; + }; + + queue-group@1 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb5000 0x1000>; + interrupts = <51 2 52 2 67 2>; + }; + }; + + enet2: ethernet@b2000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "fsl,etsec2"; + fsl,num_rx_queues = <0x8>; + fsl,num_tx_queues = <0x8>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupt-parent = <&mpic>; + + queue-group@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb2000 0x1000>; + interrupts = <31 2 32 2 33 2>; + }; + + queue-group@1 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0xb6000 0x1000>; + interrupts = <25 2 26 2 27 2>; + }; + }; + + usb@22000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-usb2-dr"; + reg = <0x22000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <28 0x2>; + }; + + /* USB2 is shared with localbus, so it must be disabled + by default. We can't put 'status = "disabled";' here + since U-Boot doesn't clear the status property when + it enables USB2. OTOH, U-Boot does create a new node + when there isn't any. So, just comment it out. + usb@23000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-usb2-dr"; + reg = <0x23000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <46 0x2>; + phy_type = "ulpi"; + }; + */ + + sdhci@2e000 { + compatible = "fsl,p1020-esdhc", "fsl,esdhc"; + reg = <0x2e000 0x1000>; + interrupts = <72 0x2>; + interrupt-parent = <&mpic>; + /* Filled in by U-Boot */ + clock-frequency = <0>; + }; + + crypto@30000 { + compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", + "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; + reg = <0x30000 0x10000>; + interrupts = <45 2 58 2>; + interrupt-parent = <&mpic>; + fsl,num-channels = <4>; + fsl,channel-fifo-len = <24>; + fsl,exec-units-mask = <0xbfe>; + fsl,descriptor-types-mask = <0x3ab0ebf>; + }; + + mpic: pic@40000 { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <2>; + reg = <0x40000 0x40000>; + compatible = "chrp,open-pic"; + device_type = "open-pic"; + }; + + msi@41600 { + compatible = "fsl,p1020-msi", "fsl,mpic-msi"; + reg = <0x41600 0x80>; + msi-available-ranges = <0 0x100>; + interrupts = < + 0xe0 0 + 0xe1 0 + 0xe2 0 + 0xe3 0 + 0xe4 0 + 0xe5 0 + 0xe6 0 + 0xe7 0>; + interrupt-parent = <&mpic>; + }; + + global-utilities@e0000 { //global utilities block + compatible = "fsl,p1020-guts","fsl,p2020-guts"; + reg = <0xe0000 0x1000>; + fsl,has-rstcr; + }; + }; + + pci0: pcie@ffe09000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe09000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + pci1: pcie@ffe0a000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe0a000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; +}; diff --git a/arch/powerpc/boot/dts/p1022ds.dts b/arch/powerpc/boot/dts/p1022ds.dts index 59ef405c1c91..4f685a779f4c 100644 --- a/arch/powerpc/boot/dts/p1022ds.dts +++ b/arch/powerpc/boot/dts/p1022ds.dts @@ -52,7 +52,7 @@ #size-cells = <1>; compatible = "fsl,p1022-elbc", "fsl,elbc", "simple-bus"; reg = <0 0xffe05000 0 0x1000>; - interrupts = <19 2>; + interrupts = <19 2 0 0>; ranges = <0x0 0x0 0xf 0xe8000000 0x08000000 0x1 0x0 0xf 0xe0000000 0x08000000 @@ -157,7 +157,7 @@ * IRQ8 is generated if the "EVENT" switch is pressed * and PX_CTL[EVESEL] is set to 00. */ - interrupts = <8 8>; + interrupts = <8 8 0 0>; }; }; @@ -178,13 +178,13 @@ ecm@1000 { compatible = "fsl,p1022-ecm", "fsl,ecm"; reg = <0x1000 0x1000>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; }; memory-controller@2000 { compatible = "fsl,p1022-memory-controller"; reg = <0x2000 0x1000>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; }; i2c@3000 { @@ -193,7 +193,7 @@ cell-index = <0>; compatible = "fsl-i2c"; reg = <0x3000 0x100>; - interrupts = <43 2>; + interrupts = <43 2 0 0>; dfsrr; }; @@ -203,7 +203,7 @@ cell-index = <1>; compatible = "fsl-i2c"; reg = <0x3100 0x100>; - interrupts = <43 2>; + interrupts = <43 2 0 0>; dfsrr; wm8776:codec@1a { @@ -220,7 +220,7 @@ compatible = "ns16550"; reg = <0x4500 0x100>; clock-frequency = <0>; - interrupts = <42 2>; + interrupts = <42 2 0 0>; }; serial1: serial@4600 { @@ -229,7 +229,7 @@ compatible = "ns16550"; reg = <0x4600 0x100>; clock-frequency = <0>; - interrupts = <42 2>; + interrupts = <42 2 0 0>; }; spi@7000 { @@ -238,7 +238,7 @@ #size-cells = <0>; compatible = "fsl,espi"; reg = <0x7000 0x1000>; - interrupts = <59 0x2>; + interrupts = <59 0x2 0 0>; espi,num-ss-bits = <4>; mode = "cpu"; @@ -275,7 +275,7 @@ compatible = "fsl,mpc8610-ssi"; cell-index = <0>; reg = <0x15000 0x100>; - interrupts = <75 2>; + interrupts = <75 2 0 0>; fsl,mode = "i2s-slave"; codec-handle = <&wm8776>; fsl,playback-dma = <&dma00>; @@ -294,25 +294,25 @@ compatible = "fsl,ssi-dma-channel"; reg = <0x0 0x80>; cell-index = <0>; - interrupts = <76 2>; + interrupts = <76 2 0 0>; }; dma01: dma-channel@80 { compatible = "fsl,ssi-dma-channel"; reg = <0x80 0x80>; cell-index = <1>; - interrupts = <77 2>; + interrupts = <77 2 0 0>; }; dma-channel@100 { compatible = "fsl,eloplus-dma-channel"; reg = <0x100 0x80>; cell-index = <2>; - interrupts = <78 2>; + interrupts = <78 2 0 0>; }; dma-channel@180 { compatible = "fsl,eloplus-dma-channel"; reg = <0x180 0x80>; cell-index = <3>; - interrupts = <79 2>; + interrupts = <79 2 0 0>; }; }; @@ -320,7 +320,7 @@ #gpio-cells = <2>; compatible = "fsl,mpc8572-gpio"; reg = <0xf000 0x100>; - interrupts = <47 0x2>; + interrupts = <47 0x2 0 0>; gpio-controller; }; @@ -329,7 +329,7 @@ reg = <0x20000 0x1000>; cache-line-size = <32>; // 32 bytes cache-size = <0x40000>; // L2, 256K - interrupts = <16 2>; + interrupts = <16 2 0 0>; }; dma@21300 { @@ -343,25 +343,25 @@ compatible = "fsl,eloplus-dma-channel"; reg = <0x0 0x80>; cell-index = <0>; - interrupts = <20 2>; + interrupts = <20 2 0 0>; }; dma-channel@80 { compatible = "fsl,eloplus-dma-channel"; reg = <0x80 0x80>; cell-index = <1>; - interrupts = <21 2>; + interrupts = <21 2 0 0>; }; dma-channel@100 { compatible = "fsl,eloplus-dma-channel"; reg = <0x100 0x80>; cell-index = <2>; - interrupts = <22 2>; + interrupts = <22 2 0 0>; }; dma-channel@180 { compatible = "fsl,eloplus-dma-channel"; reg = <0x180 0x80>; cell-index = <3>; - interrupts = <23 2>; + interrupts = <23 2 0 0>; }; }; @@ -370,7 +370,7 @@ #size-cells = <0>; compatible = "fsl-usb2-dr"; reg = <0x22000 0x1000>; - interrupts = <28 0x2>; + interrupts = <28 0x2 0 0>; phy_type = "ulpi"; }; @@ -381,11 +381,11 @@ reg = <0x24000 0x1000 0xb0030 0x4>; phy0: ethernet-phy@0 { - interrupts = <3 1>; + interrupts = <3 1 0 0>; reg = <0x1>; }; phy1: ethernet-phy@1 { - interrupts = <9 1>; + interrupts = <9 1 0 0>; reg = <0x2>; }; }; @@ -416,13 +416,13 @@ #address-cells = <1>; #size-cells = <1>; reg = <0xB0000 0x1000>; - interrupts = <29 2 30 2 34 2>; + interrupts = <29 2 0 0 30 2 0 0 34 2 0 0>; }; queue-group@1{ #address-cells = <1>; #size-cells = <1>; reg = <0xB4000 0x1000>; - interrupts = <17 2 18 2 24 2>; + interrupts = <17 2 0 0 18 2 0 0 24 2 0 0>; }; }; @@ -443,20 +443,20 @@ #address-cells = <1>; #size-cells = <1>; reg = <0xB1000 0x1000>; - interrupts = <35 2 36 2 40 2>; + interrupts = <35 2 0 0 36 2 0 0 40 2 0 0>; }; queue-group@1{ #address-cells = <1>; #size-cells = <1>; reg = <0xB5000 0x1000>; - interrupts = <51 2 52 2 67 2>; + interrupts = <51 2 0 0 52 2 0 0 67 2 0 0>; }; }; sdhci@2e000 { compatible = "fsl,p1022-esdhc", "fsl,esdhc"; reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; + interrupts = <72 0x2 0 0>; fsl,sdhci-auto-cmd12; /* Filled in by U-Boot */ clock-frequency = <0>; @@ -467,7 +467,7 @@ "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; + interrupts = <45 2 0 0 58 2 0 0>; fsl,num-channels = <4>; fsl,channel-fifo-len = <24>; fsl,exec-units-mask = <0x97c>; @@ -478,14 +478,14 @@ compatible = "fsl,p1022-sata", "fsl,pq-sata-v2"; reg = <0x18000 0x1000>; cell-index = <1>; - interrupts = <74 0x2>; + interrupts = <74 0x2 0 0>; }; sata@19000 { compatible = "fsl,p1022-sata", "fsl,pq-sata-v2"; reg = <0x19000 0x1000>; cell-index = <2>; - interrupts = <41 0x2>; + interrupts = <41 0x2 0 0>; }; power@e0070{ @@ -496,21 +496,33 @@ display@10000 { compatible = "fsl,diu", "fsl,p1022-diu"; reg = <0x10000 1000>; - interrupts = <64 2>; + interrupts = <64 2 0 0>; }; timer@41100 { compatible = "fsl,mpic-global-timer"; - reg = <0x41100 0x204>; - interrupts = <0xf7 0x2>; + reg = <0x41100 0x100 0x41300 4>; + interrupts = <0 0 3 0 + 1 0 3 0 + 2 0 3 0 + 3 0 3 0>; + }; + + timer@42100 { + compatible = "fsl,mpic-global-timer"; + reg = <0x42100 0x100 0x42300 4>; + interrupts = <4 0 3 0 + 5 0 3 0 + 6 0 3 0 + 7 0 3 0>; }; mpic: pic@40000 { interrupt-controller; #address-cells = <0>; - #interrupt-cells = <2>; + #interrupt-cells = <4>; reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; + compatible = "fsl,mpic"; device_type = "open-pic"; }; @@ -519,14 +531,14 @@ reg = <0x41600 0x80>; msi-available-ranges = <0 0x100>; interrupts = < - 0xe0 0 - 0xe1 0 - 0xe2 0 - 0xe3 0 - 0xe4 0 - 0xe5 0 - 0xe6 0 - 0xe7 0>; + 0xe0 0 0 0 + 0xe1 0 0 0 + 0xe2 0 0 0 + 0xe3 0 0 0 + 0xe4 0 0 0 + 0xe5 0 0 0 + 0xe6 0 0 0 + 0xe7 0 0 0>; }; global-utilities@e0000 { //global utilities block @@ -547,7 +559,7 @@ ranges = <0x2000000 0x0 0xa0000000 0xc 0x20000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>; clock-frequency = <33333333>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = < /* IDSEL 0x0 */ @@ -582,7 +594,7 @@ ranges = <0x2000000 0x0 0xc0000000 0xc 0x40000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0xf 0xffc20000 0x0 0x10000>; clock-frequency = <33333333>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = < /* IDSEL 0x0 */ @@ -618,7 +630,7 @@ ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>; clock-frequency = <33333333>; - interrupts = <16 2>; + interrupts = <16 2 0 0>; interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = < /* IDSEL 0x0 */ diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts index 11019142813c..2bcf3683d223 100644 --- a/arch/powerpc/boot/dts/p2020ds.dts +++ b/arch/powerpc/boot/dts/p2020ds.dts @@ -1,7 +1,7 @@ /* * P2020 DS Device Tree Source * - * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2009-2011 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -9,12 +9,11 @@ * option) any later version. */ -/dts-v1/; +/include/ "p2020si.dtsi" + / { - model = "fsl,P2020"; + model = "fsl,P2020DS"; compatible = "fsl,P2020DS"; - #address-cells = <2>; - #size-cells = <2>; aliases { ethernet0 = &enet0; @@ -27,35 +26,13 @@ pci2 = &pci2; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P2020@0 { - device_type = "cpu"; - reg = <0x0>; - next-level-cache = <&L2>; - }; - - PowerPC,P2020@1 { - device_type = "cpu"; - reg = <0x1>; - next-level-cache = <&L2>; - }; - }; memory { device_type = "memory"; }; localbus@ffe05000 { - #address-cells = <2>; - #size-cells = <1>; compatible = "fsl,elbc", "simple-bus"; - reg = <0 0xffe05000 0 0x1000>; - interrupts = <19 2>; - interrupt-parent = <&mpic>; - ranges = <0x0 0x0 0x0 0xe8000000 0x08000000 0x1 0x0 0x0 0xe0000000 0x08000000 0x2 0x0 0x0 0xffa00000 0x00040000 @@ -158,352 +135,77 @@ }; soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p2020-immr", "simple-bus"; - ranges = <0x0 0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <12>; - }; - - ecm@1000 { - compatible = "fsl,p2020-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <17 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,p2020-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <18 2>; - }; - - i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4500 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - - serial1: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4600 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - - spi@7000 { - compatible = "fsl,espi"; - reg = <0x7000 0x1000>; - interrupts = <59 0x2>; - interrupt-parent = <&mpic>; + usb@22000 { + phy_type = "ulpi"; }; - dma@c300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0xc300 0x4>; - ranges = <0x0 0xc100 0x200>; - cell-index = <1>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; + mdio@24520 { + phy0: ethernet-phy@0 { interrupt-parent = <&mpic>; - interrupts = <76 2>; + interrupts = <3 1>; + reg = <0x0>; }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; + phy1: ethernet-phy@1 { interrupt-parent = <&mpic>; - interrupts = <77 2>; + interrupts = <3 1>; + reg = <0x1>; }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; + phy2: ethernet-phy@2 { interrupt-parent = <&mpic>; - interrupts = <78 2>; + interrupts = <3 1>; + reg = <0x2>; }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <79 2>; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; }; - }; - gpio: gpio-controller@f000 { - #gpio-cells = <2>; - compatible = "fsl,mpc8572-gpio"; - reg = <0xf000 0x100>; - interrupts = <47 0x2>; - interrupt-parent = <&mpic>; - gpio-controller; }; - L2: l2-cache-controller@20000 { - compatible = "fsl,p2020-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <32>; // 32 bytes - cache-size = <0x80000>; // L2, 512k - interrupt-parent = <&mpic>; - interrupts = <16 2>; + mdio@25520 { + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; + mdio@26520 { + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <23 2>; - }; - }; - usb@22000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x22000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <28 0x2>; - phy_type = "ulpi"; }; enet0: ethernet@24000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <0>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x24000 0x1000>; - ranges = <0x0 0x24000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <29 2 30 2 34 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "rgmii-id"; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-mdio"; - reg = <0x520 0x20>; - - phy0: ethernet-phy@0 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x0>; - }; - phy1: ethernet-phy@1 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x1>; - }; - phy2: ethernet-phy@2 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x2>; - }; - tbi0: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; }; enet1: ethernet@25000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x25000 0x1000>; - ranges = <0x0 0x25000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <35 2 36 2 40 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi1>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x520 0x20>; - - tbi1: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; }; enet2: ethernet@26000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <2>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x26000 0x1000>; - ranges = <0x0 0x26000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <31 2 32 2 33 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi2>; phy-handle = <&phy2>; phy-connection-type = "rgmii-id"; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x520 0x20>; - - tbi2: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; - }; - - sdhci@2e000 { - compatible = "fsl,p2020-esdhc", "fsl,esdhc"; - reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; - interrupt-parent = <&mpic>; - /* Filled in by U-Boot */ - clock-frequency = <0>; - }; - - crypto@30000 { - compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", - "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xbfe>; - fsl,descriptor-types-mask = <0x3ab0ebf>; }; - mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; - }; msi@41600 { compatible = "fsl,mpic-msi"; - reg = <0x41600 0x80>; - msi-available-ranges = <0 0x100>; - interrupts = < - 0xe0 0 - 0xe1 0 - 0xe2 0 - 0xe3 0 - 0xe4 0 - 0xe5 0 - 0xe6 0 - 0xe7 0>; - interrupt-parent = <&mpic>; - }; - - global-utilities@e0000 { //global utilities block - compatible = "fsl,p2020-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; }; }; pci0: pcie@ffe08000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe08000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <24 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ @@ -528,18 +230,8 @@ }; pci1: pcie@ffe09000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe09000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <25 2>; interrupt-map-mask = <0xff00 0x0 0x0 0x7>; interrupt-map = < @@ -667,18 +359,8 @@ }; pci2: pcie@ffe0a000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe0a000 0 0x1000>; - bus-range = <0 255>; ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <26 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts index e2d48fd4416e..3782a58f13be 100644 --- a/arch/powerpc/boot/dts/p2020rdb.dts +++ b/arch/powerpc/boot/dts/p2020rdb.dts @@ -9,12 +9,11 @@ * option) any later version. */ -/dts-v1/; +/include/ "p2020si.dtsi" + / { - model = "fsl,P2020"; + model = "fsl,P2020RDB"; compatible = "fsl,P2020RDB"; - #address-cells = <2>; - #size-cells = <2>; aliases { ethernet0 = &enet0; @@ -26,34 +25,11 @@ pci1 = &pci1; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P2020@0 { - device_type = "cpu"; - reg = <0x0>; - next-level-cache = <&L2>; - }; - - PowerPC,P2020@1 { - device_type = "cpu"; - reg = <0x1>; - next-level-cache = <&L2>; - }; - }; - memory { device_type = "memory"; }; localbus@ffe05000 { - #address-cells = <2>; - #size-cells = <1>; - compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus"; - reg = <0 0xffe05000 0 0x1000>; - interrupts = <19 2>; - interrupt-parent = <&mpic>; /* NOR and NAND Flashes */ ranges = <0x0 0x0 0x0 0xef000000 0x01000000 @@ -165,90 +141,16 @@ }; soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p2020-immr", "simple-bus"; - ranges = <0x0 0x0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <12>; - }; - - ecm@1000 { - compatible = "fsl,p2020-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <17 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,p2020-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <18 2>; - }; - i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; rtc@68 { compatible = "dallas,ds1339"; reg = <0x68>; }; }; - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4500 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; - - serial1: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4600 0x100>; - clock-frequency = <0>; - interrupts = <42 2>; - interrupt-parent = <&mpic>; - }; + spi@7000 { - spi@7000 { - cell-index = <0>; - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,espi"; - reg = <0x7000 0x1000>; - interrupts = <59 0x2>; - interrupt-parent = <&mpic>; - mode = "cpu"; - - fsl_m25p80@0 { + fsl_m25p80@0 { #address-cells = <1>; #size-cells = <1>; compatible = "fsl,espi-flash"; @@ -294,254 +196,68 @@ }; }; - dma@c300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0xc300 0x4>; - ranges = <0x0 0xc100 0x200>; - cell-index = <1>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <76 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <77 2>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <78 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <79 2>; - }; - }; - - gpio: gpio-controller@f000 { - #gpio-cells = <2>; - compatible = "fsl,mpc8572-gpio"; - reg = <0xf000 0x100>; - interrupts = <47 0x2>; - interrupt-parent = <&mpic>; - gpio-controller; - }; - - L2: l2-cache-controller@20000 { - compatible = "fsl,p2020-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <32>; // 32 bytes - cache-size = <0x80000>; // L2,512K - interrupt-parent = <&mpic>; - interrupts = <16 2>; + usb@22000 { + phy_type = "ulpi"; }; - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; + mdio@24520 { + phy0: ethernet-phy@0 { interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; + interrupts = <3 1>; + reg = <0x0>; + }; + phy1: ethernet-phy@1 { interrupt-parent = <&mpic>; - interrupts = <23 2>; + interrupts = <3 1>; + reg = <0x1>; + }; + }; + + mdio@25520 { + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; }; }; - usb@22000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x22000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <28 0x2>; - phy_type = "ulpi"; + mdio@26520 { + status = "disabled"; }; enet0: ethernet@24000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <0>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x24000 0x1000>; - ranges = <0x0 0x24000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <29 2 30 2 34 2>; - interrupt-parent = <&mpic>; fixed-link = <1 1 1000 0 0>; phy-connection-type = "rgmii-id"; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-mdio"; - reg = <0x520 0x20>; - - phy0: ethernet-phy@0 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x0>; - }; - phy1: ethernet-phy@1 { - interrupt-parent = <&mpic>; - interrupts = <3 1>; - reg = <0x1>; - }; - }; }; enet1: ethernet@25000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x25000 0x1000>; - ranges = <0x0 0x25000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <35 2 36 2 40 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "sgmii"; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x520 0x20>; - - tbi0: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; }; enet2: ethernet@26000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <2>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x26000 0x1000>; - ranges = <0x0 0x26000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <31 2 32 2 33 2>; - interrupt-parent = <&mpic>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; }; - sdhci@2e000 { - compatible = "fsl,p2020-esdhc", "fsl,esdhc"; - reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; - interrupt-parent = <&mpic>; - /* Filled in by U-Boot */ - clock-frequency = <0>; - }; - - crypto@30000 { - compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", - "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xbfe>; - fsl,descriptor-types-mask = <0x3ab0ebf>; - }; - - mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; - }; - - msi@41600 { - compatible = "fsl,p2020-msi", "fsl,mpic-msi"; - reg = <0x41600 0x80>; - msi-available-ranges = <0 0x100>; - interrupts = < - 0xe0 0 - 0xe1 0 - 0xe2 0 - 0xe3 0 - 0xe4 0 - 0xe5 0 - 0xe6 0 - 0xe7 0>; - interrupt-parent = <&mpic>; - }; + }; - global-utilities@e0000 { //global utilities block - compatible = "fsl,p2020-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; - }; + pci0: pcie@ffe08000 { + status = "disabled"; }; - pci0: pcie@ffe09000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe09000 0 0x1000>; - bus-range = <0 255>; + pci1: pcie@ffe09000 { ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <25 2>; - pcie@0 { + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x1 + 0000 0x0 0x0 0x2 &mpic 0x5 0x1 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 + >; + pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; #address-cells = <3>; @@ -556,19 +272,17 @@ }; }; - pci1: pcie@ffe0a000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe0a000 0 0x1000>; - bus-range = <0 255>; + pci2: pcie@ffe0a000 { ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <26 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts index b69c3a5dc858..fc8ddddfccb6 100644 --- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts +++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts @@ -14,12 +14,11 @@ * option) any later version. */ -/dts-v1/; +/include/ "p2020si.dtsi" + / { - model = "fsl,P2020"; + model = "fsl,P2020RDB"; compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP"; - #address-cells = <2>; - #size-cells = <2>; aliases { ethernet1 = &enet1; @@ -29,91 +28,33 @@ }; cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P2020@0 { - device_type = "cpu"; - reg = <0x0>; - next-level-cache = <&L2>; + PowerPC,P2020@1 { + status = "disabled"; }; + }; memory { device_type = "memory"; }; - soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p2020-immr", "simple-bus"; - ranges = <0x0 0x0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <12>; - }; - - ecm@1000 { - compatible = "fsl,p2020-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <17 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,p2020-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <18 2>; - }; + localbus@ffe05000 { + status = "disabled"; + }; + soc@ffe00000 { i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; rtc@68 { compatible = "dallas,ds1339"; reg = <0x68>; }; }; - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <43 2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4500 0x100>; - clock-frequency = <0>; + serial1: serial@4600 { + status = "disabled"; }; spi@7000 { - cell-index = <0>; - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,espi"; - reg = <0x7000 0x1000>; - interrupts = <59 0x2>; - interrupt-parent = <&mpic>; - mode = "cpu"; fsl_m25p80@0 { #address-cells = <1>; @@ -161,76 +102,15 @@ }; }; - gpio: gpio-controller@f000 { - #gpio-cells = <2>; - compatible = "fsl,mpc8572-gpio"; - reg = <0xf000 0x100>; - interrupts = <47 0x2>; - interrupt-parent = <&mpic>; - gpio-controller; - }; - - L2: l2-cache-controller@20000 { - compatible = "fsl,p2020-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <32>; // 32 bytes - cache-size = <0x80000>; // L2,512K - interrupt-parent = <&mpic>; - interrupts = <16 2>; - }; - - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; - }; - dma-channel@100 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <23 2>; - }; + dma@c300 { + status = "disabled"; }; usb@22000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl-usb2-dr"; - reg = <0x22000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <28 0x2>; phy_type = "ulpi"; }; mdio@24520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-mdio"; - reg = <0x24520 0x20>; phy0: ethernet-phy@0 { interrupt-parent = <&mpic>; @@ -245,29 +125,21 @@ }; mdio@25520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x26520 0x20>; - tbi0: tbi-phy@11 { reg = <0x11>; device_type = "tbi-phy"; }; }; + mdio@26520 { + status = "disabled"; + }; + + enet0: ethernet@24000 { + status = "disabled"; + }; + enet1: ethernet@25000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x25000 0x1000>; - ranges = <0x0 0x25000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <35 2 36 2 40 2>; - interrupt-parent = <&mpic>; tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "sgmii"; @@ -275,49 +147,12 @@ }; enet2: ethernet@26000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <2>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x26000 0x1000>; - ranges = <0x0 0x26000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <31 2 32 2 33 2>; - interrupt-parent = <&mpic>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; }; - sdhci@2e000 { - compatible = "fsl,p2020-esdhc", "fsl,esdhc"; - reg = <0x2e000 0x1000>; - interrupts = <72 0x2>; - interrupt-parent = <&mpic>; - /* Filled in by U-Boot */ - clock-frequency = <0>; - }; - - crypto@30000 { - compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", - "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2 58 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xbfe>; - fsl,descriptor-types-mask = <0x3ab0ebf>; - }; mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; protected-sources = < 42 76 77 78 79 /* serial1 , dma2 */ 29 30 34 26 /* enet0, pci1 */ @@ -326,26 +161,28 @@ >; }; - global-utilities@e0000 { - compatible = "fsl,p2020-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; + msi@41600 { + status = "disabled"; }; + + }; - pci0: pcie@ffe09000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe09000 0 0x1000>; - bus-range = <0 255>; + pci0: pcie@ffe08000 { + status = "disabled"; + }; + + pci1: pcie@ffe09000 { ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <25 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x1 + 0000 0x0 0x0 0x2 &mpic 0x5 0x1 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; @@ -360,4 +197,8 @@ 0x0 0x100000>; }; }; + + pci2: pcie@ffe0a000 { + status = "disabled"; + }; }; diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts index 7a31d46c01b0..261c34ba45ec 100644 --- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts +++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts @@ -15,27 +15,21 @@ * option) any later version. */ -/dts-v1/; +/include/ "p2020si.dtsi" + / { - model = "fsl,P2020"; + model = "fsl,P2020RDB"; compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP"; - #address-cells = <2>; - #size-cells = <2>; aliases { ethernet0 = &enet0; - serial0 = &serial0; + serial0 = &serial1; pci1 = &pci1; }; cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,P2020@1 { - device_type = "cpu"; - reg = <0x1>; - next-level-cache = <&L2>; + PowerPC,P2020@0 { + status = "disabled"; }; }; @@ -43,20 +37,37 @@ device_type = "memory"; }; + localbus@ffe05000 { + status = "disabled"; + }; + soc@ffe00000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "fsl,p2020-immr", "simple-bus"; - ranges = <0x0 0x0 0xffe00000 0x100000>; - bus-frequency = <0>; // Filled out by uboot. - - serial0: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "ns16550"; - reg = <0x4600 0x100>; - clock-frequency = <0>; + ecm-law@0 { + status = "disabled"; + }; + + ecm@1000 { + status = "disabled"; + }; + + memory-controller@2000 { + status = "disabled"; + }; + + i2c@3000 { + status = "disabled"; + }; + + i2c@3100 { + status = "disabled"; + }; + + serial0: serial@4500 { + status = "disabled"; + }; + + spi@7000 { + status = "disabled"; }; dma@c300 { @@ -96,6 +107,10 @@ }; }; + gpio: gpio-controller@f000 { + status = "disabled"; + }; + L2: l2-cache-controller@20000 { compatible = "fsl,p2020-l2-cache-controller"; reg = <0x20000 0x1000>; @@ -104,31 +119,49 @@ interrupt-parent = <&mpic>; }; + dma@21300 { + status = "disabled"; + }; + + usb@22000 { + status = "disabled"; + }; + + mdio@24520 { + status = "disabled"; + }; + + mdio@25520 { + status = "disabled"; + }; + + mdio@26520 { + status = "disabled"; + }; enet0: ethernet@24000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <0>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x24000 0x1000>; - ranges = <0x0 0x24000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <29 2 30 2 34 2>; - interrupt-parent = <&mpic>; fixed-link = <1 1 1000 0 0>; phy-connection-type = "rgmii-id"; }; + enet1: ethernet@25000 { + status = "disabled"; + }; + + enet2: ethernet@26000 { + status = "disabled"; + }; + + sdhci@2e000 { + status = "disabled"; + }; + + crypto@30000 { + status = "disabled"; + }; + mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; protected-sources = < 17 18 43 42 59 47 /*ecm, mem, i2c, serial0, spi,gpio */ 16 20 21 22 23 28 /* L2, dma1, USB */ @@ -152,21 +185,32 @@ 0xe7 0>; interrupt-parent = <&mpic>; }; + + global-utilities@e0000 { //global utilities block + status = "disabled"; + }; + }; - pci1: pcie@ffe0a000 { - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0 0xffe0a000 0 0x1000>; - bus-range = <0 255>; + pci0: pcie@ffe08000 { + status = "disabled"; + }; + + pci1: pcie@ffe09000 { + status = "disabled"; + }; + + pci2: pcie@ffe0a000 { ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; - clock-frequency = <33333333>; - interrupt-parent = <&mpic>; - interrupts = <26 2>; + interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0x0 0x0 0x1 &mpic 0x0 0x1 + 0000 0x0 0x0 0x2 &mpic 0x1 0x1 + 0000 0x0 0x0 0x3 &mpic 0x2 0x1 + 0000 0x0 0x0 0x4 &mpic 0x3 0x1 + >; pcie@0 { reg = <0x0 0x0 0x0 0x0 0x0>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/p2020si.dtsi b/arch/powerpc/boot/dts/p2020si.dtsi new file mode 100644 index 000000000000..6def17f265d3 --- /dev/null +++ b/arch/powerpc/boot/dts/p2020si.dtsi @@ -0,0 +1,382 @@ +/* + * P2020 Device Tree Source + * + * Copyright 2011 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; +/ { + compatible = "fsl,P2020"; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + PowerPC,P2020@0 { + device_type = "cpu"; + reg = <0x0>; + next-level-cache = <&L2>; + }; + + PowerPC,P2020@1 { + device_type = "cpu"; + reg = <0x1>; + next-level-cache = <&L2>; + }; + }; + + localbus@ffe05000 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus"; + reg = <0 0xffe05000 0 0x1000>; + interrupts = <19 2>; + interrupt-parent = <&mpic>; + }; + + soc@ffe00000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "fsl,p2020-immr", "simple-bus"; + ranges = <0x0 0x0 0xffe00000 0x100000>; + bus-frequency = <0>; // Filled out by uboot. + + ecm-law@0 { + compatible = "fsl,ecm-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <12>; + }; + + ecm@1000 { + compatible = "fsl,p2020-ecm", "fsl,ecm"; + reg = <0x1000 0x1000>; + interrupts = <17 2>; + interrupt-parent = <&mpic>; + }; + + memory-controller@2000 { + compatible = "fsl,p2020-memory-controller"; + reg = <0x2000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <18 2>; + }; + + i2c@3000 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <0>; + compatible = "fsl-i2c"; + reg = <0x3000 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + i2c@3100 { + #address-cells = <1>; + #size-cells = <0>; + cell-index = <1>; + compatible = "fsl-i2c"; + reg = <0x3100 0x100>; + interrupts = <43 2>; + interrupt-parent = <&mpic>; + dfsrr; + }; + + serial0: serial@4500 { + cell-index = <0>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4500 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + serial1: serial@4600 { + cell-index = <1>; + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4600 0x100>; + clock-frequency = <0>; + interrupts = <42 2>; + interrupt-parent = <&mpic>; + }; + + spi@7000 { + cell-index = <0>; + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,espi"; + reg = <0x7000 0x1000>; + interrupts = <59 0x2>; + interrupt-parent = <&mpic>; + mode = "cpu"; + }; + + dma@c300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,eloplus-dma"; + reg = <0xc300 0x4>; + ranges = <0x0 0xc100 0x200>; + cell-index = <1>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + cell-index = <0>; + interrupt-parent = <&mpic>; + interrupts = <76 2>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupt-parent = <&mpic>; + interrupts = <77 2>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupt-parent = <&mpic>; + interrupts = <78 2>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupt-parent = <&mpic>; + interrupts = <79 2>; + }; + }; + + gpio: gpio-controller@f000 { + #gpio-cells = <2>; + compatible = "fsl,mpc8572-gpio"; + reg = <0xf000 0x100>; + interrupts = <47 0x2>; + interrupt-parent = <&mpic>; + gpio-controller; + }; + + L2: l2-cache-controller@20000 { + compatible = "fsl,p2020-l2-cache-controller"; + reg = <0x20000 0x1000>; + cache-line-size = <32>; // 32 bytes + cache-size = <0x80000>; // L2,512K + interrupt-parent = <&mpic>; + interrupts = <16 2>; + }; + + dma@21300 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,eloplus-dma"; + reg = <0x21300 0x4>; + ranges = <0x0 0x21100 0x200>; + cell-index = <0>; + dma-channel@0 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x0 0x80>; + cell-index = <0>; + interrupt-parent = <&mpic>; + interrupts = <20 2>; + }; + dma-channel@80 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x80 0x80>; + cell-index = <1>; + interrupt-parent = <&mpic>; + interrupts = <21 2>; + }; + dma-channel@100 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x100 0x80>; + cell-index = <2>; + interrupt-parent = <&mpic>; + interrupts = <22 2>; + }; + dma-channel@180 { + compatible = "fsl,eloplus-dma-channel"; + reg = <0x180 0x80>; + cell-index = <3>; + interrupt-parent = <&mpic>; + interrupts = <23 2>; + }; + }; + + usb@22000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl-usb2-dr"; + reg = <0x22000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <28 0x2>; + }; + + mdio@24520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-mdio"; + reg = <0x24520 0x20>; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x520 0x20>; + }; + + enet0: ethernet@24000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <0>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x24000 0x1000>; + ranges = <0x0 0x24000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <29 2 30 2 34 2>; + interrupt-parent = <&mpic>; + }; + + enet1: ethernet@25000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <1>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x25000 0x1000>; + ranges = <0x0 0x25000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <35 2 36 2 40 2>; + interrupt-parent = <&mpic>; + + }; + + enet2: ethernet@26000 { + #address-cells = <1>; + #size-cells = <1>; + cell-index = <2>; + device_type = "network"; + model = "eTSEC"; + compatible = "gianfar"; + reg = <0x26000 0x1000>; + ranges = <0x0 0x26000 0x1000>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <31 2 32 2 33 2>; + interrupt-parent = <&mpic>; + + }; + + sdhci@2e000 { + compatible = "fsl,p2020-esdhc", "fsl,esdhc"; + reg = <0x2e000 0x1000>; + interrupts = <72 0x2>; + interrupt-parent = <&mpic>; + /* Filled in by U-Boot */ + clock-frequency = <0>; + }; + + crypto@30000 { + compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4", + "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; + reg = <0x30000 0x10000>; + interrupts = <45 2 58 2>; + interrupt-parent = <&mpic>; + fsl,num-channels = <4>; + fsl,channel-fifo-len = <24>; + fsl,exec-units-mask = <0xbfe>; + fsl,descriptor-types-mask = <0x3ab0ebf>; + }; + + mpic: pic@40000 { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <2>; + reg = <0x40000 0x40000>; + compatible = "chrp,open-pic"; + device_type = "open-pic"; + }; + + msi@41600 { + compatible = "fsl,p2020-msi", "fsl,mpic-msi"; + reg = <0x41600 0x80>; + msi-available-ranges = <0 0x100>; + interrupts = < + 0xe0 0 + 0xe1 0 + 0xe2 0 + 0xe3 0 + 0xe4 0 + 0xe5 0 + 0xe6 0 + 0xe7 0>; + interrupt-parent = <&mpic>; + }; + + global-utilities@e0000 { //global utilities block + compatible = "fsl,p2020-guts"; + reg = <0xe0000 0x1000>; + fsl,has-rstcr; + }; + }; + + pci0: pcie@ffe08000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe08000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <24 2>; + }; + + pci1: pcie@ffe09000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe09000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <25 2>; + }; + + pci2: pcie@ffe0a000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <0 0xffe0a000 0 0x1000>; + bus-range = <0 255>; + clock-frequency = <33333333>; + interrupt-parent = <&mpic>; + interrupts = <26 2>; + }; +}; diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts index 5b7fc29dd6cf..927f94d16e9b 100644 --- a/arch/powerpc/boot/dts/p4080ds.dts +++ b/arch/powerpc/boot/dts/p4080ds.dts @@ -1,7 +1,7 @@ /* * P4080DS Device Tree Source * - * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2009-2011 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -33,6 +33,17 @@ dma1 = &dma1; sdhc = &sdhc; + crypto = &crypto; + sec_jr0 = &sec_jr0; + sec_jr1 = &sec_jr1; + sec_jr2 = &sec_jr2; + sec_jr3 = &sec_jr3; + rtic_a = &rtic_a; + rtic_b = &rtic_b; + rtic_c = &rtic_c; + rtic_d = &rtic_d; + sec_mon = &sec_mon; + rio0 = &rapidio0; }; @@ -410,6 +421,79 @@ dr_mode = "host"; phy_type = "ulpi"; }; + + crypto: crypto@300000 { + compatible = "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x300000 0x10000>; + ranges = <0 0x300000 0x10000>; + interrupt-parent = <&mpic>; + interrupts = <92 2>; + + sec_jr0: jr@1000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x1000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <88 2>; + }; + + sec_jr1: jr@2000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x2000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <89 2>; + }; + + sec_jr2: jr@3000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x3000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <90 2>; + }; + + sec_jr3: jr@4000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x4000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <91 2>; + }; + + rtic@6000 { + compatible = "fsl,sec-v4.0-rtic"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x6000 0x100>; + ranges = <0x0 0x6100 0xe00>; + + rtic_a: rtic-a@0 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x00 0x20 0x100 0x80>; + }; + + rtic_b: rtic-b@20 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x20 0x20 0x200 0x80>; + }; + + rtic_c: rtic-c@40 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x40 0x20 0x300 0x80>; + }; + + rtic_d: rtic-d@60 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x60 0x20 0x500 0x80>; + }; + }; + }; + + sec_mon: sec_mon@314000 { + compatible = "fsl,sec-v4.0-mon"; + reg = <0x314000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <93 2>; + }; }; rapidio0: rapidio@ffe0c0000 { diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c new file mode 100644 index 000000000000..06c1961bd124 --- /dev/null +++ b/arch/powerpc/boot/epapr.c @@ -0,0 +1,66 @@ +/* + * Bootwrapper for ePAPR compliant firmwares + * + * Copyright 2010 David Gibson <david@gibson.dropbear.id.au>, IBM Corporation. + * + * Based on earlier bootwrappers by: + * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\ + * and + * Scott Wood <scottwood@freescale.com> + * Copyright (c) 2007 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include "ops.h" +#include "stdio.h" +#include "io.h" +#include <libfdt.h> + +BSS_STACK(4096); + +#define EPAPR_SMAGIC 0x65504150 +#define EPAPR_EMAGIC 0x45504150 + +static unsigned epapr_magic; +static unsigned long ima_size; +static unsigned long fdt_addr; + +static void platform_fixups(void) +{ + if ((epapr_magic != EPAPR_EMAGIC) + && (epapr_magic != EPAPR_SMAGIC)) + fatal("r6 contained 0x%08x instead of ePAPR magic number\n", + epapr_magic); + + if (ima_size < (unsigned long)_end) + printf("WARNING: Image loaded outside IMA!" + " (_end=%p, ima_size=0x%lx)\n", _end, ima_size); + if (ima_size < fdt_addr) + printf("WARNING: Device tree address is outside IMA!" + "(fdt_addr=0x%lx, ima_size=0x%lx)\n", fdt_addr, + ima_size); + if (ima_size < fdt_addr + fdt_totalsize((void *)fdt_addr)) + printf("WARNING: Device tree extends outside IMA!" + " (fdt_addr=0x%lx, size=0x%x, ima_size=0x%lx\n", + fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); +} + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + epapr_magic = r6; + ima_size = r7; + fdt_addr = r3; + + /* FIXME: we should process reserve entries */ + + simple_alloc_init(_end, ima_size - (unsigned long)_end, 32, 64); + + fdt_init((void *)fdt_addr); + + serial_console_init(); + platform_ops.fixups = platform_fixups; +} diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index cb97e7511d7e..c74531af72c0 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -39,6 +39,7 @@ dts= cacheit= binary= gzip=.gz +pie= # cross-compilation prefix CROSS= @@ -157,9 +158,10 @@ pmac|chrp) platformo=$object/of.o ;; coff) - platformo=$object/of.o + platformo="$object/crt0.o $object/of.o" lds=$object/zImage.coff.lds link_address='0x500000' + pie= ;; miboot|uboot) # miboot and U-boot want just the bare bits, not an ELF binary @@ -208,6 +210,7 @@ ps3) ksection=.kernel:vmlinux.bin isection=.kernel:initrd link_address='' + pie= ;; ep88xc|ep405|ep8248e) platformo="$object/fixed-head.o $object/$platform.o" @@ -244,6 +247,10 @@ gamecube|wii) treeboot-iss4xx-mpic) platformo="$object/treeboot-iss4xx.o" ;; +epapr) + link_address='0x20000000' + pie=-pie + ;; esac vmz="$tmpdir/`basename \"$kernel\"`.$ext" @@ -251,7 +258,7 @@ if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then ${CROSS}objcopy $objflags "$kernel" "$vmz.$$" if [ -n "$gzip" ]; then - gzip -f -9 "$vmz.$$" + gzip -n -f -9 "$vmz.$$" fi if [ -n "$cacheit" ]; then @@ -310,9 +317,9 @@ fi if [ "$platform" != "miboot" ]; then if [ -n "$link_address" ] ; then - text_start="-Ttext $link_address --defsym _start=$link_address" + text_start="-Ttext $link_address" fi - ${CROSS}ld -m elf32ppc -T $lds $text_start -o "$ofile" \ + ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \ $platformo $tmp $object/wrapper.a rm $tmp fi @@ -336,7 +343,7 @@ coff) $objbin/hack-coff "$ofile" ;; cuboot*) - gzip -f -9 "$ofile" + gzip -n -f -9 "$ofile" ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a "$base" -e "$entry" \ $uboot_version -d "$ofile".gz "$ofile" ;; @@ -383,6 +390,6 @@ ps3) odir="$(dirname "$ofile.bin")" rm -f "$odir/otheros.bld" - gzip --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld" + gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld" ;; esac diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S index 856dc78b14ef..de4c9e3c9344 100644 --- a/arch/powerpc/boot/zImage.coff.lds.S +++ b/arch/powerpc/boot/zImage.coff.lds.S @@ -3,13 +3,13 @@ ENTRY(_zimage_start_opd) EXTERN(_zimage_start_opd) SECTIONS { - _start = .; .text : { + _start = .; *(.text) *(.fixup) + _etext = .; } - _etext = .; . = ALIGN(4096); .data : { @@ -17,9 +17,7 @@ SECTIONS *(.data*) *(__builtin_*) *(.sdata*) - __got2_start = .; *(.got2) - __got2_end = .; _dtb_start = .; *(.kernel:dtb) diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index 0962d62bdb50..2bd8731f1365 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -3,49 +3,64 @@ ENTRY(_zimage_start) EXTERN(_zimage_start) SECTIONS { - _start = .; .text : { + _start = .; *(.text) *(.fixup) + _etext = .; } - _etext = .; . = ALIGN(4096); .data : { *(.rodata*) *(.data*) *(.sdata*) - __got2_start = .; *(.got2) - __got2_end = .; } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .dynamic : + { + __dynamic_start = .; + *(.dynamic) + } + .hash : { *(.hash) } + .interp : { *(.interp) } + .rela.dyn : { *(.rela*) } . = ALIGN(8); - _dtb_start = .; - .kernel:dtb : { *(.kernel:dtb) } - _dtb_end = .; - - . = ALIGN(4096); - _vmlinux_start = .; - .kernel:vmlinux.strip : { *(.kernel:vmlinux.strip) } - _vmlinux_end = .; + .kernel:dtb : + { + _dtb_start = .; + *(.kernel:dtb) + _dtb_end = .; + } . = ALIGN(4096); - _initrd_start = .; - .kernel:initrd : { *(.kernel:initrd) } - _initrd_end = .; + .kernel:vmlinux.strip : + { + _vmlinux_start = .; + *(.kernel:vmlinux.strip) + _vmlinux_end = .; + } . = ALIGN(4096); - _edata = .; + .kernel:initrd : + { + _initrd_start = .; + *(.kernel:initrd) + _initrd_end = .; + } . = ALIGN(4096); - __bss_start = .; .bss : { - *(.sbss) - *(.bss) + _edata = .; + __bss_start = .; + *(.sbss) + *(.bss) + *(COMMON) + _end = . ; } - . = ALIGN(4096); - _end = . ; } diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index c683bce4c26e..126ef1b08a01 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -104,7 +104,6 @@ CONFIG_ROOT_NFS=y CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig index a721cd3d793f..abcf00ad939e 100644 --- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig @@ -101,7 +101,6 @@ CONFIG_ROOT_NFS=y CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig index 55e0725500dc..11662c217ac0 100644 --- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig @@ -58,7 +58,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig index d724095530a6..ebe9b30b0721 100644 --- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig @@ -59,7 +59,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig index 4b44beaa21ae..eb25229b387a 100644 --- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig +++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig @@ -63,7 +63,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig index b614508d6fd2..f51c7ebc181e 100644 --- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig +++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig @@ -168,7 +168,6 @@ CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index f9e6a3ea5a64..2a84fd7f631c 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -132,8 +132,8 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_IND=y CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig index 9fa1613e5e2b..d32283555b53 100644 --- a/arch/powerpc/configs/e55xx_smp_defconfig +++ b/arch/powerpc/configs/e55xx_smp_defconfig @@ -6,10 +6,10 @@ CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SPARSE_IRQ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y @@ -25,8 +25,32 @@ CONFIG_P5020_DS=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=m -CONFIG_SPARSE_IRQ=y # CONFIG_PCI is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NET_IPIP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_ARPD=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_IPV6=y +CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y @@ -34,6 +58,9 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +CONFIG_NET_ETHERNET=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -64,22 +91,14 @@ CONFIG_NLS=y CONFIG_NLS_UTF8=m CONFIG_CRC_T10DIF=y CONFIG_CRC_ITU_T=m -CONFIG_LIBCRC32C=m CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_VIRQ_DEBUG=y -CONFIG_CRYPTO=y -CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_TALITOS=y diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index c06a86c33098..96b89df7752a 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -204,7 +204,6 @@ CONFIG_CRC_T10DIF=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 942ced90557c..de65841aa04e 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -206,7 +206,6 @@ CONFIG_CRC_T10DIF=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig index 038a308cbfc4..a1cc8179e9fd 100644 --- a/arch/powerpc/configs/mpc86xx_defconfig +++ b/arch/powerpc/configs/mpc86xx_defconfig @@ -171,7 +171,6 @@ CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index ac4fc41035f6..f8b394a76ac3 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -112,8 +112,8 @@ CONFIG_IRDA_CACHE_LAST_LSAP=y CONFIG_IRDA_FAST_RR=y CONFIG_IRTTY_SIR=m CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 0a10fb009ef7..214208924a9c 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -351,8 +351,8 @@ CONFIG_VLSI_FIR=m CONFIG_VIA_FIR=m CONFIG_MCS_FIR=m CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index caba919f65d8..6472322bf13b 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -52,8 +52,8 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_DIAG is not set CONFIG_IPV6=y CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 249ddd0a27cd..7de13865508c 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -146,12 +146,18 @@ CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_CXGB3_ISCSI=m +CONFIG_SCSI_CXGB4_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_BE2ISCSI=m CONFIG_SCSI_IBMVSCSI=y CONFIG_SCSI_IBMVFC=m CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_IPR=y CONFIG_SCSI_QLA_FC=m +CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m CONFIG_ATA=y # CONFIG_ATA_SFF is not set @@ -197,6 +203,8 @@ CONFIG_S2IO=m CONFIG_MYRI10GE=m CONFIG_NETXEN_NIC=m CONFIG_MLX4_EN=m +CONFIG_QLGE=m +CONFIG_BE2NET=m CONFIG_PPP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 1833d1a07e79..c0d842cfd012 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -157,6 +157,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_476_DD2 ASM_CONST(0x0000000000010000) #define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000) #define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000) +#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x0000000000080000) #define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000) #define CPU_FTR_PPC_LE ASM_CONST(0x0000000000200000) #define CPU_FTR_REAL_LE ASM_CONST(0x0000000000400000) @@ -178,22 +179,18 @@ extern const char *powerpc_base_platform; #define LONG_ASM_CONST(x) 0 #endif -#define CPU_FTR_SLB LONG_ASM_CONST(0x0000000100000000) -#define CPU_FTR_16M_PAGE LONG_ASM_CONST(0x0000000200000000) -#define CPU_FTR_TLBIEL LONG_ASM_CONST(0x0000000400000000) + +#define CPU_FTR_HVMODE_206 LONG_ASM_CONST(0x0000000800000000) +#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_IABR LONG_ASM_CONST(0x0000002000000000) #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000) #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000008000000000) #define CPU_FTR_SMT LONG_ASM_CONST(0x0000010000000000) -#define CPU_FTR_LOCKLESS_TLBIE LONG_ASM_CONST(0x0000040000000000) -#define CPU_FTR_CI_LARGE_PAGE LONG_ASM_CONST(0x0000100000000000) #define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000200000000000) #define CPU_FTR_PURR LONG_ASM_CONST(0x0000400000000000) #define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000800000000000) #define CPU_FTR_SPURR LONG_ASM_CONST(0x0001000000000000) #define CPU_FTR_DSCR LONG_ASM_CONST(0x0002000000000000) -#define CPU_FTR_1T_SEGMENT LONG_ASM_CONST(0x0004000000000000) -#define CPU_FTR_NO_SLBIE_B LONG_ASM_CONST(0x0008000000000000) #define CPU_FTR_VSX LONG_ASM_CONST(0x0010000000000000) #define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000) #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000) @@ -202,12 +199,14 @@ extern const char *powerpc_base_platform; #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000) #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0800000000000000) +#define CPU_FTR_ICSWX LONG_ASM_CONST(0x1000000000000000) #ifndef __ASSEMBLY__ -#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_SLB | \ - CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_16M_PAGE) +#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) + +#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_SLB | MMU_FTR_TLBIEL | \ + MMU_FTR_16M_PAGE) /* We only set the altivec features if the kernel was compiled with altivec * support @@ -387,7 +386,8 @@ extern const char *powerpc_base_platform; CPU_FTR_DBELL) #define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ - CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD) + CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ @@ -407,44 +407,45 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ - CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \ - CPU_FTR_POPCNTB) + CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \ + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB) #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ + CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ - CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB) + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR) #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ + CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ - CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD) + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_ICSWX | CPU_FTR_CFAR) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | \ - CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \ + CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \ CPU_FTR_UNALIGNED_LD_STD) #define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ - CPU_FTR_PPCAS_ARCH_V2 | \ - CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \ - CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_NO_SLBIE_B) + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_PURR | CPU_FTR_REAL_LE) #define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) +#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \ + CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) + #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500) +#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500 | CPU_FTRS_A2) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \ CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \ - CPU_FTR_1T_SEGMENT | CPU_FTR_VSX) + CPU_FTR_VSX) #endif #else enum { @@ -487,7 +488,7 @@ enum { #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500) +#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500 & CPU_FTRS_A2) #else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \ diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index f71bb4c118b4..ce516e5eb0d3 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -37,16 +37,16 @@ extern cpumask_t threads_core_mask; * This can typically be used for things like IPI for tlb invalidations * since those need to be done only once per core/TLB */ -static inline cpumask_t cpu_thread_mask_to_cores(cpumask_t threads) +static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) { cpumask_t tmp, res; int i; - res = CPU_MASK_NONE; + cpumask_clear(&res); for (i = 0; i < NR_CPUS; i += threads_per_core) { - cpus_shift_left(tmp, threads_core_mask, i); - if (cpus_intersects(threads, tmp)) - cpu_set(i, res); + cpumask_shift_left(&tmp, &threads_core_mask, i); + if (cpumask_intersects(threads, &tmp)) + cpumask_set_cpu(i, &res); } return res; } @@ -58,7 +58,7 @@ static inline int cpu_nr_cores(void) static inline cpumask_t cpu_online_cores_map(void) { - return cpu_thread_mask_to_cores(cpu_online_map); + return cpu_thread_mask_to_cores(cpu_online_mask); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index 0893ab9343a6..9c70d0ca96d4 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -27,9 +27,8 @@ enum ppc_dbell { PPC_G_DBELL_MC = 4, /* guest mcheck doorbell */ }; -extern void doorbell_message_pass(int target, int msg); +extern void doorbell_cause_ipi(int cpu, unsigned long data); extern void doorbell_exception(struct pt_regs *regs); -extern void doorbell_check_self(void); extern void doorbell_setup_this_cpu(void); static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag) diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index f0fb4fc1f6e6..45921672b97a 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -52,6 +52,10 @@ extern struct ppc_emulated { #ifdef CONFIG_VSX struct ppc_emulated_entry vsx; #endif +#ifdef CONFIG_PPC64 + struct ppc_emulated_entry mfdscr; + struct ppc_emulated_entry mtdscr; +#endif } ppc_emulated; extern u32 ppc_warn_emulated; diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 7778d6f0c878..f5dfe3411f64 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -46,6 +46,7 @@ #define EX_CCR 60 #define EX_R3 64 #define EX_LR 72 +#define EX_CFAR 80 /* * We're short on space and time in the exception prolog, so we can't @@ -56,30 +57,40 @@ #define LOAD_HANDLER(reg, label) \ addi reg,reg,(label)-_stext; /* virt addr of handler ... */ -#define EXCEPTION_PROLOG_1(area) \ - mfspr r13,SPRN_SPRG_PACA; /* get paca address into r13 */ \ +/* Exception register prefixes */ +#define EXC_HV H +#define EXC_STD + +#define EXCEPTION_PROLOG_1(area) \ + GET_PACA(r13); \ std r9,area+EX_R9(r13); /* save r9 - r12 */ \ std r10,area+EX_R10(r13); \ std r11,area+EX_R11(r13); \ std r12,area+EX_R12(r13); \ - mfspr r9,SPRN_SPRG_SCRATCH0; \ + BEGIN_FTR_SECTION_NESTED(66); \ + mfspr r10,SPRN_CFAR; \ + std r10,area+EX_CFAR(r13); \ + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ + GET_SCRATCH0(r9); \ std r9,area+EX_R13(r13); \ mfcr r9 -#define EXCEPTION_PROLOG_PSERIES_1(label) \ +#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \ ld r12,PACAKBASE(r13); /* get high part of &label */ \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ - mfspr r11,SPRN_SRR0; /* save SRR0 */ \ + mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label) \ - mtspr SPRN_SRR0,r12; \ - mfspr r12,SPRN_SRR1; /* and SRR1 */ \ - mtspr SPRN_SRR1,r10; \ - rfid; \ + mtspr SPRN_##h##SRR0,r12; \ + mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ + mtspr SPRN_##h##SRR1,r10; \ + h##rfid; \ b . /* prevent speculative execution */ +#define EXCEPTION_PROLOG_PSERIES_1(label, h) \ + __EXCEPTION_PROLOG_PSERIES_1(label, h) -#define EXCEPTION_PROLOG_PSERIES(area, label) \ +#define EXCEPTION_PROLOG_PSERIES(area, label, h) \ EXCEPTION_PROLOG_1(area); \ - EXCEPTION_PROLOG_PSERIES_1(label); + EXCEPTION_PROLOG_PSERIES_1(label, h); /* * The common exception prolog is used for all except a few exceptions @@ -98,10 +109,11 @@ beq- 1f; \ ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ 1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ - bge- cr1,2f; /* abort if it is */ \ - b 3f; \ -2: li r1,(n); /* will be reloaded later */ \ + blt+ cr1,3f; /* abort if it is */ \ + li r1,(n); /* will be reloaded later */ \ sth r1,PACA_TRAP_SAVE(r13); \ + std r3,area+EX_R3(r13); \ + addi r3,r13,area; /* r3 -> where regs are saved*/ \ b bad_stack; \ 3: std r9,_CCR(r1); /* save CR in stackframe */ \ std r11,_NIP(r1); /* save SRR0 in stackframe */ \ @@ -123,6 +135,10 @@ std r9,GPR11(r1); \ std r10,GPR12(r1); \ std r11,GPR13(r1); \ + BEGIN_FTR_SECTION_NESTED(66); \ + ld r10,area+EX_CFAR(r13); \ + std r10,ORIG_GPR3(r1); \ + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ mflr r9; /* save LR in stackframe */ \ std r9,_LINK(r1); \ @@ -143,57 +159,62 @@ /* * Exception vectors. */ -#define STD_EXCEPTION_PSERIES(n, label) \ - . = n; \ +#define STD_EXCEPTION_PSERIES(loc, vec, label) \ + . = loc; \ .globl label##_pSeries; \ label##_pSeries: \ HMT_MEDIUM; \ - DO_KVM n; \ - mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) + DO_KVM vec; \ + SET_SCRATCH0(r13); /* save r13 */ \ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD) -#define HSTD_EXCEPTION_PSERIES(n, label) \ - . = n; \ - .globl label##_pSeries; \ -label##_pSeries: \ +#define STD_EXCEPTION_HV(loc, vec, label) \ + . = loc; \ + .globl label##_hv; \ +label##_hv: \ HMT_MEDIUM; \ - mtspr SPRN_SPRG_SCRATCH0,r20; /* save r20 */ \ - mfspr r20,SPRN_HSRR0; /* copy HSRR0 to SRR0 */ \ - mtspr SPRN_SRR0,r20; \ - mfspr r20,SPRN_HSRR1; /* copy HSRR0 to SRR0 */ \ - mtspr SPRN_SRR1,r20; \ - mfspr r20,SPRN_SPRG_SCRATCH0; /* restore r20 */ \ - mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) + DO_KVM vec; \ + SET_SCRATCH0(r13); /* save r13 */ \ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV) - -#define MASKABLE_EXCEPTION_PSERIES(n, label) \ - . = n; \ - .globl label##_pSeries; \ -label##_pSeries: \ +#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h) \ HMT_MEDIUM; \ - DO_KVM n; \ - mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \ - mfspr r13,SPRN_SPRG_PACA; /* get paca address into r13 */ \ + DO_KVM vec; \ + SET_SCRATCH0(r13); /* save r13 */ \ + GET_PACA(r13); \ std r9,PACA_EXGEN+EX_R9(r13); /* save r9, r10 */ \ std r10,PACA_EXGEN+EX_R10(r13); \ lbz r10,PACASOFTIRQEN(r13); \ mfcr r9; \ cmpwi r10,0; \ - beq masked_interrupt; \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ + beq masked_##h##interrupt; \ + GET_SCRATCH0(r10); \ std r10,PACA_EXGEN+EX_R13(r13); \ std r11,PACA_EXGEN+EX_R11(r13); \ std r12,PACA_EXGEN+EX_R12(r13); \ ld r12,PACAKBASE(r13); /* get high part of &label */ \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ - mfspr r11,SPRN_SRR0; /* save SRR0 */ \ + mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label##_common) \ - mtspr SPRN_SRR0,r12; \ - mfspr r12,SPRN_SRR1; /* and SRR1 */ \ - mtspr SPRN_SRR1,r10; \ - rfid; \ + mtspr SPRN_##h##SRR0,r12; \ + mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ + mtspr SPRN_##h##SRR1,r10; \ + h##rfid; \ b . /* prevent speculative execution */ +#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h) \ + __MASKABLE_EXCEPTION_PSERIES(vec, label, h) + +#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \ + . = loc; \ + .globl label##_pSeries; \ +label##_pSeries: \ + _MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_STD) + +#define MASKABLE_EXCEPTION_HV(loc, vec, label) \ + . = loc; \ + .globl label##_hv; \ +label##_hv: \ + _MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_HV) #ifdef CONFIG_PPC_ISERIES #define DISABLE_INTS \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 921a8470e18a..9a67a38bf7b9 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -49,7 +49,7 @@ label##5: \ FTR_ENTRY_OFFSET label##2b-label##5b; \ FTR_ENTRY_OFFSET label##3b-label##5b; \ FTR_ENTRY_OFFSET label##4b-label##5b; \ - .ifgt (label##4b-label##3b)-(label##2b-label##1b); \ + .ifgt (label##4b- label##3b)-(label##2b- label##1b); \ .error "Feature section else case larger than body"; \ .endif; \ .popsection; @@ -146,6 +146,19 @@ label##5: \ #ifndef __ASSEMBLY__ +#define ASM_FTR_IF(section_if, section_else, msk, val) \ + stringify_in_c(BEGIN_FTR_SECTION) \ + section_if "; " \ + stringify_in_c(FTR_SECTION_ELSE) \ + section_else "; " \ + stringify_in_c(ALT_FTR_SECTION_END((msk), (val))) + +#define ASM_FTR_IFSET(section_if, section_else, msk) \ + ASM_FTR_IF(section_if, section_else, (msk), (msk)) + +#define ASM_FTR_IFCLR(section_if, section_else, msk) \ + ASM_FTR_IF(section_if, section_else, (msk), 0) + #define ASM_MMU_FTR_IF(section_if, section_else, msk, val) \ stringify_in_c(BEGIN_MMU_FTR_SECTION) \ section_if "; " \ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 4ef662e4a31d..3a6c586c4e40 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -47,6 +47,7 @@ #define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000) #define FW_FEATURE_CMO ASM_CONST(0x0000000002000000) #define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000) +#define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000) #ifndef __ASSEMBLY__ @@ -60,7 +61,7 @@ enum { FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | - FW_FEATURE_CMO | FW_FEATURE_VPHN, + FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 8edec710cc6d..852b8c1c09db 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -102,6 +102,7 @@ #define H_ANDCOND (1UL<<(63-33)) #define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ #define H_ICACHE_SYNCHRONIZE (1UL<<(63-41)) /* dcbst, icbi, etc (ignored for IO pages */ +#define H_COALESCE_CAND (1UL<<(63-42)) /* page is a good candidate for coalescing */ #define H_ZERO_PAGE (1UL<<(63-48)) /* zero the page before mapping (ignored for IO pages) */ #define H_COPY_PAGE (1UL<<(63-49)) #define H_N (1UL<<(63-61)) @@ -234,6 +235,7 @@ #define H_GET_MPP 0x2D4 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC #define H_BEST_ENERGY 0x2F4 +#define H_GET_MPP_X 0x314 #define MAX_HCALL_OPCODE H_BEST_ENERGY #ifndef __ASSEMBLY__ @@ -312,6 +314,16 @@ struct hvcall_mpp_data { int h_get_mpp(struct hvcall_mpp_data *); +struct hvcall_mpp_x_data { + unsigned long coalesced_bytes; + unsigned long pool_coalesced_bytes; + unsigned long pool_purr_cycles; + unsigned long pool_spurr_cycles; + unsigned long reserved[3]; +}; + +int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data); + #ifdef CONFIG_PPC_PSERIES extern int CMO_PrPSP; extern int CMO_SecPSP; diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h index 6efc7782ebf2..fbae49286926 100644 --- a/arch/powerpc/platforms/cell/io-workarounds.h +++ b/arch/powerpc/include/asm/io-workarounds.h @@ -31,7 +31,6 @@ struct iowa_bus { void *private; }; -void __devinit io_workaround_init(void); void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *, int (*)(struct iowa_bus *, void *), void *); struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR); diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 001f2f11c19b..45698d55cd6a 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -2,6 +2,8 @@ #define _ASM_POWERPC_IO_H #ifdef __KERNEL__ +#define ARCH_HAS_IOREMAP_WC + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -481,10 +483,16 @@ __do_out_asm(_rec_outl, "stwbrx") _memcpy_fromio(dst,PCI_FIX_ADDR(src),n) #endif /* !CONFIG_EEH */ -#ifdef CONFIG_PPC_INDIRECT_IO -#define DEF_PCI_HOOK(x) x +#ifdef CONFIG_PPC_INDIRECT_PIO +#define DEF_PCI_HOOK_pio(x) x +#else +#define DEF_PCI_HOOK_pio(x) NULL +#endif + +#ifdef CONFIG_PPC_INDIRECT_MMIO +#define DEF_PCI_HOOK_mem(x) x #else -#define DEF_PCI_HOOK(x) NULL +#define DEF_PCI_HOOK_mem(x) NULL #endif /* Structure containing all the hooks */ @@ -504,7 +512,7 @@ extern struct ppc_pci_io { #define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ static inline ret name at \ { \ - if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL) \ + if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL) \ return ppc_pci_io.name al; \ return __do_##name al; \ } @@ -512,7 +520,7 @@ static inline ret name at \ #define DEF_PCI_AC_NORET(name, at, al, space, aa) \ static inline void name at \ { \ - if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL) \ + if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL) \ ppc_pci_io.name al; \ else \ __do_##name al; \ @@ -616,12 +624,13 @@ static inline void iosync(void) * * ioremap is the standard one and provides non-cacheable guarded mappings * and can be hooked by the platform via ppc_md * - * * ioremap_flags allows to specify the page flags as an argument and can - * also be hooked by the platform via ppc_md. ioremap_prot is the exact - * same thing as ioremap_flags. + * * ioremap_prot allows to specify the page flags as an argument and can + * also be hooked by the platform via ppc_md. * * * ioremap_nocache is identical to ioremap * + * * ioremap_wc enables write combining + * * * iounmap undoes such a mapping and can be hooked * * * __ioremap_at (and the pending __iounmap_at) are low level functions to @@ -629,7 +638,7 @@ static inline void iosync(void) * currently be hooked. Must be page aligned. * * * __ioremap is the low level implementation used by ioremap and - * ioremap_flags and cannot be hooked (but can be used by a hook on one + * ioremap_prot and cannot be hooked (but can be used by a hook on one * of the previous ones) * * * __ioremap_caller is the same as above but takes an explicit caller @@ -640,10 +649,10 @@ static inline void iosync(void) * */ extern void __iomem *ioremap(phys_addr_t address, unsigned long size); -extern void __iomem *ioremap_flags(phys_addr_t address, unsigned long size, - unsigned long flags); +extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, + unsigned long flags); +extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); #define ioremap_nocache(addr, size) ioremap((addr), (size)) -#define ioremap_prot(addr, size, prot) ioremap_flags((addr), (size), (prot)) extern void iounmap(volatile void __iomem *addr); diff --git a/arch/powerpc/include/asm/io_event_irq.h b/arch/powerpc/include/asm/io_event_irq.h new file mode 100644 index 000000000000..b1a9a1be3c21 --- /dev/null +++ b/arch/powerpc/include/asm/io_event_irq.h @@ -0,0 +1,54 @@ +/* + * Copyright 2010, 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_IO_EVENT_IRQ_H +#define _ASM_POWERPC_IO_EVENT_IRQ_H + +#include <linux/types.h> +#include <linux/notifier.h> + +#define PSERIES_IOEI_RPC_MAX_LEN 216 + +#define PSERIES_IOEI_TYPE_ERR_DETECTED 0x01 +#define PSERIES_IOEI_TYPE_ERR_RECOVERED 0x02 +#define PSERIES_IOEI_TYPE_EVENT 0x03 +#define PSERIES_IOEI_TYPE_RPC_PASS_THRU 0x04 + +#define PSERIES_IOEI_SUBTYPE_NOT_APP 0x00 +#define PSERIES_IOEI_SUBTYPE_REBALANCE_REQ 0x01 +#define PSERIES_IOEI_SUBTYPE_NODE_ONLINE 0x03 +#define PSERIES_IOEI_SUBTYPE_NODE_OFFLINE 0x04 +#define PSERIES_IOEI_SUBTYPE_DUMP_SIZE_CHANGE 0x05 +#define PSERIES_IOEI_SUBTYPE_TORRENT_IRV_UPDATE 0x06 +#define PSERIES_IOEI_SUBTYPE_TORRENT_HFI_CFGED 0x07 + +#define PSERIES_IOEI_SCOPE_NOT_APP 0x00 +#define PSERIES_IOEI_SCOPE_RIO_HUB 0x36 +#define PSERIES_IOEI_SCOPE_RIO_BRIDGE 0x37 +#define PSERIES_IOEI_SCOPE_PHB 0x38 +#define PSERIES_IOEI_SCOPE_EADS_GLOBAL 0x39 +#define PSERIES_IOEI_SCOPE_EADS_SLOT 0x3A +#define PSERIES_IOEI_SCOPE_TORRENT_HUB 0x3B +#define PSERIES_IOEI_SCOPE_SERVICE_PROC 0x51 + +/* Platform Event Log Format, Version 6, data portition of IO event section */ +struct pseries_io_event { + uint8_t event_type; /* 0x00 IO-Event Type */ + uint8_t rpc_data_len; /* 0x01 RPC data length */ + uint8_t scope; /* 0x02 Error/Event Scope */ + uint8_t event_subtype; /* 0x03 I/O-Event Sub-Type */ + uint32_t drc_index; /* 0x04 DRC Index */ + uint8_t rpc_data[PSERIES_IOEI_RPC_MAX_LEN]; + /* 0x08 RPC Data (0-216 bytes, */ + /* padded to 4 bytes alignment) */ +}; + +extern struct atomic_notifier_head pseries_ioei_notifier_list; + +#endif /* _ASM_POWERPC_IO_EVENT_IRQ_H */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 67ab5fb7d153..1bff591f7f72 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -88,9 +88,6 @@ struct irq_host_ops { /* Dispose of such a mapping */ void (*unmap)(struct irq_host *h, unsigned int virq); - /* Update of such a mapping */ - void (*remap)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw); - /* Translate device-tree interrupt specifier from raw format coming * from the firmware to a irq_hw_number_t (interrupt line number) and * type (sense) that can be passed to set_irq_type(). In the absence @@ -128,19 +125,10 @@ struct irq_host { struct device_node *of_node; }; -/* The main irq map itself is an array of NR_IRQ entries containing the - * associate host and irq number. An entry with a host of NULL is free. - * An entry can be allocated if it's free, the allocator always then sets - * hwirq first to the host's invalid irq number and then fills ops. - */ -struct irq_map_entry { - irq_hw_number_t hwirq; - struct irq_host *host; -}; - -extern struct irq_map_entry irq_map[NR_IRQS]; - +struct irq_data; +extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d); extern irq_hw_number_t virq_to_hw(unsigned int virq); +extern bool virq_is_host(unsigned int virq, struct irq_host *host); /** * irq_alloc_host - Allocate a new irq_host data structure diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index f54408d995b5..8a33698c61bd 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -76,7 +76,7 @@ extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); extern cpumask_t cpus_in_sr; static inline int kexec_sr_activated(int cpu) { - return cpu_isset(cpu,cpus_in_sr); + return cpumask_test_cpu(cpu, &cpus_in_sr); } struct kimage; diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 18ea6963ad77..d2ca5ed3877b 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -45,6 +45,114 @@ struct kvm_regs { __u64 gpr[32]; }; +#define KVM_SREGS_E_IMPL_NONE 0 +#define KVM_SREGS_E_IMPL_FSL 1 + +#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ + +/* + * Feature bits indicate which sections of the sregs struct are valid, + * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers + * corresponding to unset feature bits will not be modified. This allows + * restoring a checkpoint made without that feature, while keeping the + * default values of the new registers. + * + * KVM_SREGS_E_BASE contains: + * CSRR0/1 (refers to SRR2/3 on 40x) + * ESR + * DEAR + * MCSR + * TSR + * TCR + * DEC + * TB + * VRSAVE (USPRG0) + */ +#define KVM_SREGS_E_BASE (1 << 0) + +/* + * KVM_SREGS_E_ARCH206 contains: + * + * PIR + * MCSRR0/1 + * DECAR + * IVPR + */ +#define KVM_SREGS_E_ARCH206 (1 << 1) + +/* + * Contains EPCR, plus the upper half of 64-bit registers + * that are 32-bit on 32-bit implementations. + */ +#define KVM_SREGS_E_64 (1 << 2) + +#define KVM_SREGS_E_SPRG8 (1 << 3) +#define KVM_SREGS_E_MCIVPR (1 << 4) + +/* + * IVORs are used -- contains IVOR0-15, plus additional IVORs + * in combination with an appropriate feature bit. + */ +#define KVM_SREGS_E_IVOR (1 << 5) + +/* + * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. + * Also TLBnPS if MMUCFG[MAVN] = 1. + */ +#define KVM_SREGS_E_ARCH206_MMU (1 << 6) + +/* DBSR, DBCR, IAC, DAC, DVC */ +#define KVM_SREGS_E_DEBUG (1 << 7) + +/* Enhanced debug -- DSRR0/1, SPRG9 */ +#define KVM_SREGS_E_ED (1 << 8) + +/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_SPE (1 << 9) + +/* External Proxy (EXP) -- EPR */ +#define KVM_SREGS_EXP (1 << 10) + +/* External PID (E.PD) -- EPSC/EPLC */ +#define KVM_SREGS_E_PD (1 << 11) + +/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PC (1 << 12) + +/* Page table (E.PT) -- EPTCFG */ +#define KVM_SREGS_E_PT (1 << 13) + +/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PM (1 << 14) + +/* + * Special updates: + * + * Some registers may change even while a vcpu is not running. + * To avoid losing these changes, by default these registers are + * not updated by KVM_SET_SREGS. To force an update, set the bit + * in u.e.update_special corresponding to the register to be updated. + * + * The update_special field is zero on return from KVM_GET_SREGS. + * + * When restoring a checkpoint, the caller can set update_special + * to 0xffffffff to ensure that everything is restored, even new features + * that the caller doesn't know about. + */ +#define KVM_SREGS_E_UPDATE_MCSR (1 << 0) +#define KVM_SREGS_E_UPDATE_TSR (1 << 1) +#define KVM_SREGS_E_UPDATE_DEC (1 << 2) +#define KVM_SREGS_E_UPDATE_DBSR (1 << 3) + +/* + * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a + * previous KVM_GET_REGS. + * + * Unless otherwise indicated, setting any register with KVM_SET_SREGS + * directly sets its value. It does not trigger any special semantics such + * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct + * just received from KVM_GET_SREGS is always a no-op. + */ struct kvm_sregs { __u32 pvr; union { @@ -62,6 +170,82 @@ struct kvm_sregs { __u64 dbat[8]; } ppc32; } s; + struct { + union { + struct { /* KVM_SREGS_E_IMPL_FSL */ + __u32 features; /* KVM_SREGS_E_FSL_ */ + __u32 svr; + __u64 mcar; + __u32 hid0; + + /* KVM_SREGS_E_FSL_PIDn */ + __u32 pid1, pid2; + } fsl; + __u8 pad[256]; + } impl; + + __u32 features; /* KVM_SREGS_E_ */ + __u32 impl_id; /* KVM_SREGS_E_IMPL_ */ + __u32 update_special; /* KVM_SREGS_E_UPDATE_ */ + __u32 pir; /* read-only */ + __u64 sprg8; + __u64 sprg9; /* E.ED */ + __u64 csrr0; + __u64 dsrr0; /* E.ED */ + __u64 mcsrr0; + __u32 csrr1; + __u32 dsrr1; /* E.ED */ + __u32 mcsrr1; + __u32 esr; + __u64 dear; + __u64 ivpr; + __u64 mcivpr; + __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */ + + __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */ + __u32 tcr; + __u32 decar; + __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */ + + /* + * Userspace can read TB directly, but the + * value reported here is consistent with "dec". + * + * Read-only. + */ + __u64 tb; + + __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ + __u32 dbcr[3]; + __u32 iac[4]; + __u32 dac[2]; + __u32 dvc[2]; + __u8 num_iac; /* read-only */ + __u8 num_dac; /* read-only */ + __u8 num_dvc; /* read-only */ + __u8 pad; + + __u32 epr; /* EXP */ + __u32 vrsave; /* a.k.a. USPRG0 */ + __u32 epcr; /* KVM_SREGS_E_64 */ + + __u32 mas0; + __u32 mas1; + __u64 mas2; + __u64 mas7_3; + __u32 mas4; + __u32 mas6; + + __u32 ivor_low[16]; /* IVOR0-15 */ + __u32 ivor_high[18]; /* IVOR32+, plus room to expand */ + + __u32 mmucfg; /* read-only */ + __u32 eptcfg; /* E.PT, read-only */ + __u32 tlbcfg[4];/* read-only */ + __u32 tlbps[4]; /* read-only */ + + __u32 eplc, epsc; /* E.PD */ + } e; __u8 pad[1020]; } u; }; diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h index d22d39942a92..a0e57618ff33 100644 --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); } -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 5b7504674397..0951b17f4eb5 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -59,6 +59,7 @@ #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 #define BOOK3S_INTERRUPT_EXTERNAL 0x500 #define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501 +#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600 #define BOOK3S_INTERRUPT_PROGRAM 0x700 #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 36fdb3aff30b..d5a8a3861635 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -34,6 +34,7 @@ (\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \ (\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \ (\intno == BOOK3S_INTERRUPT_EXTERNAL) || \ + (\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \ (\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \ (\intno == BOOK3S_INTERRUPT_PROGRAM) || \ (\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \ diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 7fea26fffb25..7a2a565f88c4 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 { u32 host_pid[E500_PID_NUM]; u32 pid[E500_PID_NUM]; + u32 svr; u32 mas0; u32 mas1; @@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 { u32 hid1; u32 tlb0cfg; u32 tlb1cfg; + u64 mcar; struct kvm_vcpu vcpu; }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bba3b9b72a39..186f150b9b89 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -223,6 +223,7 @@ struct kvm_vcpu_arch { ulong hflags; ulong guest_owned_ext; #endif + u32 vrsave; /* also USPRG0 */ u32 mmucr; ulong sprg4; ulong sprg5; @@ -232,6 +233,9 @@ struct kvm_vcpu_arch { ulong csrr1; ulong dsrr0; ulong dsrr1; + ulong mcsrr0; + ulong mcsrr1; + ulong mcsr; ulong esr; u32 dec; u32 decar; @@ -255,6 +259,7 @@ struct kvm_vcpu_arch { u32 dbsr; #ifdef CONFIG_KVM_EXIT_TIMING + struct mutex exit_timing_lock; struct kvmppc_exit_timing timing_exit; struct kvmppc_exit_timing timing_last_enter; u32 last_exit_type; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ecb3bc74c344..9345238edecf 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); +extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); /* Core-specific hooks */ @@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index a077adc0b35e..e0298d26ce5d 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -210,6 +210,8 @@ struct dtl_entry { #define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ #define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) +extern struct kmem_cache *dtl_cache; + /* * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls * reading from the dispatch trace log. If other code wants to consume diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e4f01915fbb0..47cacddb14cf 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -29,21 +29,6 @@ struct file; struct pci_controller; struct kimage; -#ifdef CONFIG_SMP -struct smp_ops_t { - void (*message_pass)(int target, int msg); - int (*probe)(void); - void (*kick_cpu)(int nr); - void (*setup_cpu)(int nr); - void (*bringup_done)(void); - void (*take_timebase)(void); - void (*give_timebase)(void); - int (*cpu_disable)(void); - void (*cpu_die)(unsigned int nr); - int (*cpu_bootable)(unsigned int nr); -}; -#endif - struct machdep_calls { char *name; #ifdef CONFIG_PPC64 @@ -267,6 +252,7 @@ struct machdep_calls { extern void e500_idle(void); extern void power4_idle(void); +extern void power7_idle(void); extern void ppc6xx_idle(void); extern void book3e_idle(void); @@ -311,12 +297,6 @@ extern sys_ctrler_t sys_ctrler; #endif /* CONFIG_PPC_PMAC */ -#ifdef CONFIG_SMP -/* Poor default implementations */ -extern void __devinit smp_generic_give_timebase(void); -extern void __devinit smp_generic_take_timebase(void); -#endif /* CONFIG_SMP */ - /* Functions to produce codes on the leds. * The SRC code should be unique for the message category and should diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 17194fcd4040..3ea0f9a259d8 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -43,6 +43,7 @@ #define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) #define MAS0_ESEL(x) (((x) << 16) & 0x0FFF0000) #define MAS0_NV(x) ((x) & 0x00000FFF) +#define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_HES 0x00004000 #define MAS0_WQ_ALLWAYS 0x00000000 #define MAS0_WQ_COND 0x00001000 @@ -137,6 +138,21 @@ #define MMUCSR0_TLB2PS 0x00078000 /* TLB2 Page Size */ #define MMUCSR0_TLB3PS 0x00780000 /* TLB3 Page Size */ +/* MMUCFG bits */ +#define MMUCFG_MAVN_NASK 0x00000003 +#define MMUCFG_MAVN_V1_0 0x00000000 +#define MMUCFG_MAVN_V2_0 0x00000001 +#define MMUCFG_NTLB_MASK 0x0000000c +#define MMUCFG_NTLB_SHIFT 2 +#define MMUCFG_PIDSIZE_MASK 0x000007c0 +#define MMUCFG_PIDSIZE_SHIFT 6 +#define MMUCFG_TWC 0x00008000 +#define MMUCFG_LRAT 0x00010000 +#define MMUCFG_RASIZE_MASK 0x00fe0000 +#define MMUCFG_RASIZE_SHIFT 17 +#define MMUCFG_LPIDSIZE_MASK 0x0f000000 +#define MMUCFG_LPIDSIZE_SHIFT 24 + /* TLBnCFG encoding */ #define TLBnCFG_N_ENTRY 0x00000fff /* number of entries */ #define TLBnCFG_HES 0x00002000 /* HW select supported */ @@ -229,6 +245,10 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; extern int mmu_linear_psize; extern int mmu_vmemmap_psize; +#ifdef CONFIG_PPC64 +extern unsigned long linear_map_top; +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */ diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index ae7b3efec8e5..d865bd909c7d 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -408,6 +408,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } #endif /* CONFIG_PPC_SUBPAGE_PROT */ typedef unsigned long mm_context_id_t; +struct spinlock; typedef struct { mm_context_id_t id; @@ -423,6 +424,11 @@ typedef struct { #ifdef CONFIG_PPC_SUBPAGE_PROT struct subpage_prot_table spt; #endif /* CONFIG_PPC_SUBPAGE_PROT */ +#ifdef CONFIG_PPC_ICSWX + struct spinlock *cop_lockp; /* guard acop and cop_pid */ + unsigned long acop; /* mask of enabled coprocessor types */ + unsigned int cop_pid; /* pid value used with coprocessors */ +#endif /* CONFIG_PPC_ICSWX */ } mm_context_t; diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index bb40a06d3b77..4138b21ae80a 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -56,11 +56,6 @@ */ #define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000) -/* This indicates that the processor uses the ISA 2.06 server tlbie - * mnemonics - */ -#define MMU_FTR_TLBIE_206 ASM_CONST(0x00400000) - /* Enable use of TLB reservation. Processor should support tlbsrx. * instruction and MAS0[WQ]. */ @@ -70,6 +65,53 @@ */ #define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000) +/* MMU is SLB-based + */ +#define MMU_FTR_SLB ASM_CONST(0x02000000) + +/* Support 16M large pages + */ +#define MMU_FTR_16M_PAGE ASM_CONST(0x04000000) + +/* Supports TLBIEL variant + */ +#define MMU_FTR_TLBIEL ASM_CONST(0x08000000) + +/* Supports tlbies w/o locking + */ +#define MMU_FTR_LOCKLESS_TLBIE ASM_CONST(0x10000000) + +/* Large pages can be marked CI + */ +#define MMU_FTR_CI_LARGE_PAGE ASM_CONST(0x20000000) + +/* 1T segments available + */ +#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) + +/* Doesn't support the B bit (1T segment) in SLBIE + */ +#define MMU_FTR_NO_SLBIE_B ASM_CONST(0x80000000) + +/* MMU feature bit sets for various CPUs */ +#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ + MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 +#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2 +#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 +#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ + MMU_FTR_CI_LARGE_PAGE +#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ + MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B +#define MMU_FTRS_A2 MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | \ + MMU_FTR_USE_TLBIVAX_BCAST | \ + MMU_FTR_LOCK_BCAST_INVAL | \ + MMU_FTR_USE_TLBRSRV | \ + MMU_FTR_USE_PAIRED_MAS | \ + MMU_FTR_TLBIEL | \ + MMU_FTR_16M_PAGE #ifndef __ASSEMBLY__ #include <asm/cputable.h> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 81fb41289d6c..a73668a5f30d 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -32,6 +32,10 @@ extern void __destroy_context(unsigned long context_id); extern void mmu_context_init(void); #endif +extern void switch_cop(struct mm_struct *next); +extern int use_cop(unsigned long acop, struct mm_struct *mm); +extern void drop_cop(unsigned long acop, struct mm_struct *mm); + /* * switch_mm is the entry point called from the architecture independent * code in kernel/sched.c @@ -55,6 +59,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev == next) return; +#ifdef CONFIG_PPC_ICSWX + /* Switch coprocessor context only if prev or next uses a coprocessor */ + if (prev->context.acop || next->context.acop) + switch_cop(next); +#endif /* CONFIG_PPC_ICSWX */ + /* We must stop all altivec streams before changing the HW * context */ @@ -67,7 +77,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * sub architectures. */ #ifdef CONFIG_PPC_STD_MMU_64 - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) switch_slb(tsk, next); else switch_stab(tsk, next); diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 7005ee0b074d..df18989e78d4 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -3,7 +3,6 @@ #ifdef __KERNEL__ #include <linux/irq.h> -#include <linux/sysdev.h> #include <asm/dcr.h> #include <asm/msi_bitmap.h> @@ -263,6 +262,7 @@ struct mpic #ifdef CONFIG_SMP struct irq_chip hc_ipi; #endif + struct irq_chip hc_tm; const char *name; /* Flags */ unsigned int flags; @@ -281,7 +281,7 @@ struct mpic /* vector numbers used for internal sources (ipi/timers) */ unsigned int ipi_vecs[4]; - unsigned int timer_vecs[4]; + unsigned int timer_vecs[8]; /* Spurious vector to program into unused sources */ unsigned int spurious_vec; @@ -320,8 +320,6 @@ struct mpic /* link */ struct mpic *next; - struct sys_device sysdev; - #ifdef CONFIG_PM struct mpic_irq_save *save_data; #endif @@ -371,6 +369,8 @@ struct mpic * NOTE: This flag trumps MPIC_WANTS_RESET. */ #define MPIC_NO_RESET 0x00004000 +/* Freescale MPIC (compatible includes "fsl,mpic") */ +#define MPIC_FSL 0x00008000 /* MPIC HW modification ID */ #define MPIC_REGSET_MASK 0xf0000000 diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h index d4b4bfa26fb3..89d2f99c1bf4 100644 --- a/arch/powerpc/include/asm/pSeries_reconfig.h +++ b/arch/powerpc/include/asm/pSeries_reconfig.h @@ -18,13 +18,18 @@ extern int pSeries_reconfig_notifier_register(struct notifier_block *); extern void pSeries_reconfig_notifier_unregister(struct notifier_block *); extern struct blocking_notifier_head pSeries_reconfig_chain; +/* Not the best place to put this, will be fixed when we move some + * of the rtas suspend-me stuff to pseries */ +extern void pSeries_coalesce_init(void); #else /* !CONFIG_PPC_PSERIES */ static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb) { return 0; } static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { } +static inline void pSeries_coalesce_init(void) { } #endif /* CONFIG_PPC_PSERIES */ + #endif /* __KERNEL__ */ #endif /* _PPC64_PSERIES_RECONFIG_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ec57540cd7af..74126765106a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -92,9 +92,9 @@ struct paca_struct { * Now, starting in cacheline 2, the exception save areas */ /* used for most interrupts/exceptions */ - u64 exgen[10] __attribute__((aligned(0x80))); - u64 exmc[10]; /* used for machine checks */ - u64 exslb[10]; /* used for SLB/segment table misses + u64 exgen[11] __attribute__((aligned(0x80))); + u64 exmc[11]; /* used for machine checks */ + u64 exslb[11]; /* used for SLB/segment table misses * on the linear mapping */ /* SLB related definitions */ u16 vmalloc_sllp; @@ -106,7 +106,8 @@ struct paca_struct { pgd_t *pgd; /* Current PGD */ pgd_t *kernel_pgd; /* Kernel PGD */ u64 exgen[8] __attribute__((aligned(0x80))); - u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80))); + /* We can have up to 3 levels of reentrancy in the TLB miss handler */ + u64 extlb[3][EX_TLB_SIZE / sizeof(u64)] __attribute__((aligned(0x80))); u64 exmc[8]; /* used for machine checks */ u64 excrit[8]; /* used for crit interrupts */ u64 exdbg[8]; /* used for debug interrupts */ @@ -125,7 +126,7 @@ struct paca_struct { struct task_struct *__current; /* Pointer to current */ u64 kstack; /* Saved Kernel stack addr */ u64 stab_rr; /* stab/slb round-robin counter */ - u64 saved_r1; /* r1 save for RTAS calls */ + u64 saved_r1; /* r1 save for RTAS calls or PM */ u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 soft_enabled; /* irq soft-enable flag */ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 812b2cd80aed..9356262fd3cc 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -59,24 +59,7 @@ static __inline__ void clear_page(void *addr) : "ctr", "memory"); } -extern void copy_4K_page(void *to, void *from); - -#ifdef CONFIG_PPC_64K_PAGES -static inline void copy_page(void *to, void *from) -{ - unsigned int i; - for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) { - copy_4K_page(to, from); - to += 4096; - from += 4096; - } -} -#else /* CONFIG_PPC_64K_PAGES */ -static inline void copy_page(void *to, void *from) -{ - copy_4K_page(to, from); -} -#endif /* CONFIG_PPC_64K_PAGES */ +extern void copy_page(void *to, void *from); /* Log 2 of page table size */ extern u64 ppc64_pft_size; @@ -130,7 +113,7 @@ extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); -#define slice_mm_new_context(mm) ((mm)->context.id == 0) +#define slice_mm_new_context(mm) ((mm)->context.id == MMU_NO_CONTEXT) #endif /* __ASSEMBLY__ */ #else diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 2b09cd522d33..81576ee0cfb1 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -257,21 +257,20 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long old; - if ((pte_val(*ptep) & _PAGE_RW) == 0) - return; - old = pte_update(mm, addr, ptep, _PAGE_RW, 0); + if ((pte_val(*ptep) & _PAGE_RW) == 0) + return; + + pte_update(mm, addr, ptep, _PAGE_RW, 0); } static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long old; - if ((pte_val(*ptep) & _PAGE_RW) == 0) return; - old = pte_update(mm, addr, ptep, _PAGE_RW, 1); + + pte_update(mm, addr, ptep, _PAGE_RW, 1); } /* diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1255569387b6..e472659d906c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -41,6 +41,10 @@ #define PPC_INST_RFCI 0x4c000066 #define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFMCI 0x4c00004c +#define PPC_INST_MFSPR_DSCR 0x7c1102a6 +#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff +#define PPC_INST_MTSPR_DSCR 0x7c1103a6 +#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe @@ -56,6 +60,17 @@ #define PPC_INST_TLBSRX_DOT 0x7c0006a5 #define PPC_INST_XXLOR 0xf0000510 +#define PPC_INST_NAP 0x4c000364 +#define PPC_INST_SLEEP 0x4c0003a4 + +/* A2 specific instructions */ +#define PPC_INST_ERATWE 0x7c0001a6 +#define PPC_INST_ERATRE 0x7c000166 +#define PPC_INST_ERATILX 0x7c000066 +#define PPC_INST_ERATIVAX 0x7c000666 +#define PPC_INST_ERATSX 0x7c000126 +#define PPC_INST_ERATSX_DOT 0x7c000127 + /* macros to insert fields into opcodes */ #define __PPC_RA(a) (((a) & 0x1f) << 16) #define __PPC_RB(b) (((b) & 0x1f) << 11) @@ -67,6 +82,8 @@ #define __PPC_XT(s) __PPC_XS(s) #define __PPC_T_TLB(t) (((t) & 0x3) << 21) #define __PPC_WC(w) (((w) & 0x3) << 21) +#define __PPC_WS(w) (((w) & 0x1f) << 11) + /* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a * larx with EH set as an illegal instruction. @@ -113,6 +130,21 @@ #define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_INST_ERATWE | \ + __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_INST_ERATRE | \ + __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_INST_ERATILX | \ + __PPC_T_TLB(t) | __PPC_RA(a) | \ + __PPC_RB(b)) +#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_INST_ERATIVAX | \ + __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_INST_ERATSX | \ + __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \ + __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b)) + + /* * Define what the VSX XX1 form instructions will look like, then add * the 128 bit load store instructions based on that. @@ -126,4 +158,7 @@ #define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \ VSX_XX3((t), (a), (b))) +#define PPC_NAP stringify_in_c(.long PPC_INST_NAP) +#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) + #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 98210067c1cc..1b422381fc16 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -170,6 +170,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define HMT_MEDIUM or 2,2,2 #define HMT_MEDIUM_HIGH or 5,5,5 # medium high priority #define HMT_HIGH or 3,3,3 +#define HMT_EXTRA_HIGH or 7,7,7 # power7 only #ifdef __KERNEL__ #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index de1967a1ff57..d50c2b6d9bc3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -238,6 +238,10 @@ struct thread_struct { #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ +#ifdef CONFIG_PPC64 + unsigned long dscr; + int dscr_inherit; +#endif }; #define ARCH_MIN_TASKALIGN 16 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7e4abebe76c0..c5cae0dd176c 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -99,17 +99,23 @@ #define MSR_LE __MASK(MSR_LE_LG) /* Little Endian */ #if defined(CONFIG_PPC_BOOK3S_64) +#define MSR_64BIT MSR_SF + /* Server variant */ #define MSR_ MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV -#define MSR_KERNEL MSR_ | MSR_SF +#define MSR_KERNEL MSR_ | MSR_64BIT #define MSR_USER32 MSR_ | MSR_PR | MSR_EE -#define MSR_USER64 MSR_USER32 | MSR_SF +#define MSR_USER64 MSR_USER32 | MSR_64BIT #elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx) /* Default MSR for kernel mode. */ #define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) #endif +#ifndef MSR_64BIT +#define MSR_64BIT 0 +#endif + /* Floating Point Status and Control Register (FPSCR) Fields */ #define FPSCR_FX 0x80000000 /* FPU exception summary */ #define FPSCR_FEX 0x40000000 /* FPU enabled exception summary */ @@ -182,6 +188,8 @@ #define SPRN_CTR 0x009 /* Count Register */ #define SPRN_DSCR 0x11 +#define SPRN_CFAR 0x1c /* Come From Address Register */ +#define SPRN_ACOP 0x1F /* Available Coprocessor Register */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 #define CTRL_CT 0xc0000000 /* current thread */ @@ -210,8 +218,43 @@ #define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ #define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ #define SPRN_SPURR 0x134 /* Scaled PURR */ +#define SPRN_HSPRG0 0x130 /* Hypervisor Scratch 0 */ +#define SPRN_HSPRG1 0x131 /* Hypervisor Scratch 1 */ +#define SPRN_HDSISR 0x132 +#define SPRN_HDAR 0x133 +#define SPRN_HDEC 0x136 /* Hypervisor Decrementer */ #define SPRN_HIOR 0x137 /* 970 Hypervisor interrupt offset */ +#define SPRN_RMOR 0x138 /* Real mode offset register */ +#define SPRN_HRMOR 0x139 /* Real mode offset register */ +#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ +#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_LPCR 0x13E /* LPAR Control Register */ +#define LPCR_VPM0 (1ul << (63-0)) +#define LPCR_VPM1 (1ul << (63-1)) +#define LPCR_ISL (1ul << (63-2)) +#define LPCR_DPFD_SH (63-11) +#define LPCR_VRMA_L (1ul << (63-12)) +#define LPCR_VRMA_LP0 (1ul << (63-15)) +#define LPCR_VRMA_LP1 (1ul << (63-16)) +#define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */ +#define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */ +#define LPCR_PECE 0x00007000 /* powersave exit cause enable */ +#define LPCR_PECE0 0x00004000 /* ext. exceptions can cause exit */ +#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ +#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ +#define LPCR_MER 0x00000800 /* Mediated External Exception */ +#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ +#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ +#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ +#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ +#define SPRN_LPID 0x13F /* Logical Partition Identifier */ +#define SPRN_HMER 0x150 /* Hardware m? error recovery */ +#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ +#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ +#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ +#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */ +#define SPRN_TLBRPNR 0x156 /* P7 TLB control register */ +#define SPRN_TLBLPIDR 0x157 /* P7 TLB control register */ #define SPRN_DBAT0L 0x219 /* Data BAT 0 Lower Register */ #define SPRN_DBAT0U 0x218 /* Data BAT 0 Upper Register */ #define SPRN_DBAT1L 0x21B /* Data BAT 1 Lower Register */ @@ -434,16 +477,23 @@ #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ -#define SRR1_WAKERESET 0x00380000 /* System reset */ #define SRR1_WAKESYSERR 0x00300000 /* System error */ #define SRR1_WAKEEE 0x00200000 /* External interrupt */ #define SRR1_WAKEMT 0x00280000 /* mtctrl */ +#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ +#define SRR1_WAKERESET 0x00100000 /* System reset */ +#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ +#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, + * may not be recoverable */ +#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ +#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ #define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ #define SRR1_PROGTRAP 0x00020000 /* Trap */ #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ + #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ @@ -673,12 +723,15 @@ * SPRG usage: * * All 64-bit: - * - SPRG1 stores PACA pointer + * - SPRG1 stores PACA pointer except 64-bit server in + * HV mode in which case it is HSPRG0 * * 64-bit server: * - SPRG0 unused (reserved for HV on Power4) * - SPRG2 scratch for exception vectors * - SPRG3 unused (user visible) + * - HSPRG0 stores PACA in HV mode + * - HSPRG1 scratch for "HV" exceptions * * 64-bit embedded * - SPRG0 generic exception scratch @@ -741,6 +794,41 @@ #ifdef CONFIG_PPC_BOOK3S_64 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG2 +#define SPRN_SPRG_HPACA SPRN_HSPRG0 +#define SPRN_SPRG_HSCRATCH0 SPRN_HSPRG1 + +#define GET_PACA(rX) \ + BEGIN_FTR_SECTION_NESTED(66); \ + mfspr rX,SPRN_SPRG_PACA; \ + FTR_SECTION_ELSE_NESTED(66); \ + mfspr rX,SPRN_SPRG_HPACA; \ + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + +#define SET_PACA(rX) \ + BEGIN_FTR_SECTION_NESTED(66); \ + mtspr SPRN_SPRG_PACA,rX; \ + FTR_SECTION_ELSE_NESTED(66); \ + mtspr SPRN_SPRG_HPACA,rX; \ + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + +#define GET_SCRATCH0(rX) \ + BEGIN_FTR_SECTION_NESTED(66); \ + mfspr rX,SPRN_SPRG_SCRATCH0; \ + FTR_SECTION_ELSE_NESTED(66); \ + mfspr rX,SPRN_SPRG_HSCRATCH0; \ + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + +#define SET_SCRATCH0(rX) \ + BEGIN_FTR_SECTION_NESTED(66); \ + mtspr SPRN_SPRG_SCRATCH0,rX; \ + FTR_SECTION_ELSE_NESTED(66); \ + mtspr SPRN_SPRG_HSCRATCH0,rX; \ + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + +#else /* CONFIG_PPC_BOOK3S_64 */ +#define GET_SCRATCH0(rX) mfspr rX,SPRN_SPRG_SCRATCH0 +#define SET_SCRATCH0(rX) mtspr SPRN_SPRG_SCRATCH0,rX + #endif #ifdef CONFIG_PPC_BOOK3E_64 @@ -750,6 +838,10 @@ #define SPRN_SPRG_TLB_EXFRAME SPRN_SPRG2 #define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6 #define SPRN_SPRG_GEN_SCRATCH SPRN_SPRG0 + +#define SET_PACA(rX) mtspr SPRN_SPRG_PACA,rX +#define GET_PACA(rX) mfspr rX,SPRN_SPRG_PACA + #endif #ifdef CONFIG_PPC_BOOK3S_32 @@ -800,6 +892,8 @@ #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 #endif + + /* * An mtfsf instruction with the L bit set. On CPUs that support this a * full 64bits of FPSCR is restored and on other CPUs the L bit is ignored. @@ -894,6 +988,8 @@ #define PV_POWER5p 0x003B #define PV_POWER7 0x003F #define PV_970FX 0x003C +#define PV_POWER6 0x003E +#define PV_POWER7 0x003F #define PV_630 0x0040 #define PV_630p 0x0041 #define PV_970MP 0x0044 diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h new file mode 100644 index 000000000000..3d52a1132f3d --- /dev/null +++ b/arch/powerpc/include/asm/reg_a2.h @@ -0,0 +1,165 @@ +/* + * Register definitions specific to the A2 core + * + * Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ASM_POWERPC_REG_A2_H__ +#define __ASM_POWERPC_REG_A2_H__ + +#define SPRN_TENSR 0x1b5 +#define SPRN_TENS 0x1b6 /* Thread ENable Set */ +#define SPRN_TENC 0x1b7 /* Thread ENable Clear */ + +#define SPRN_A2_CCR0 0x3f0 /* Core Configuration Register 0 */ +#define SPRN_A2_CCR1 0x3f1 /* Core Configuration Register 1 */ +#define SPRN_A2_CCR2 0x3f2 /* Core Configuration Register 2 */ +#define SPRN_MMUCR0 0x3fc /* MMU Control Register 0 */ +#define SPRN_MMUCR1 0x3fd /* MMU Control Register 1 */ +#define SPRN_MMUCR2 0x3fe /* MMU Control Register 2 */ +#define SPRN_MMUCR3 0x3ff /* MMU Control Register 3 */ + +#define SPRN_IAR 0x372 + +#define SPRN_IUCR0 0x3f3 +#define IUCR0_ICBI_ACK 0x1000 + +#define SPRN_XUCR0 0x3f6 /* Execution Unit Config Register 0 */ + +#define A2_IERAT_SIZE 16 +#define A2_DERAT_SIZE 32 + +/* A2 MMUCR0 bits */ +#define MMUCR0_ECL 0x80000000 /* Extended Class for TLB fills */ +#define MMUCR0_TID_NZ 0x40000000 /* TID is non-zero */ +#define MMUCR0_TS 0x10000000 /* Translation space for TLB fills */ +#define MMUCR0_TGS 0x20000000 /* Guest space for TLB fills */ +#define MMUCR0_TLBSEL 0x0c000000 /* TLB or ERAT target for TLB fills */ +#define MMUCR0_TLBSEL_U 0x00000000 /* TLBSEL = UTLB */ +#define MMUCR0_TLBSEL_I 0x08000000 /* TLBSEL = I-ERAT */ +#define MMUCR0_TLBSEL_D 0x0c000000 /* TLBSEL = D-ERAT */ +#define MMUCR0_LOCKSRSH 0x02000000 /* Use TLB lock on tlbsx. */ +#define MMUCR0_TID_MASK 0x000000ff /* TID field */ + +/* A2 MMUCR1 bits */ +#define MMUCR1_IRRE 0x80000000 /* I-ERAT round robin enable */ +#define MMUCR1_DRRE 0x40000000 /* D-ERAT round robin enable */ +#define MMUCR1_REE 0x20000000 /* Reference Exception Enable*/ +#define MMUCR1_CEE 0x10000000 /* Change exception enable */ +#define MMUCR1_CSINV_ALL 0x00000000 /* Inval ERAT on all CS evts */ +#define MMUCR1_CSINV_NISYNC 0x04000000 /* Inval ERAT on all ex isync*/ +#define MMUCR1_CSINV_NEVER 0x0c000000 /* Don't inval ERAT on CS */ +#define MMUCR1_ICTID 0x00080000 /* IERAT class field as TID */ +#define MMUCR1_ITTID 0x00040000 /* IERAT thdid field as TID */ +#define MMUCR1_DCTID 0x00020000 /* DERAT class field as TID */ +#define MMUCR1_DTTID 0x00010000 /* DERAT thdid field as TID */ +#define MMUCR1_DCCD 0x00008000 /* DERAT class ignore */ +#define MMUCR1_TLBWE_BINV 0x00004000 /* back invalidate on tlbwe */ + +/* A2 MMUCR2 bits */ +#define MMUCR2_PSSEL_SHIFT 4 + +/* A2 MMUCR3 bits */ +#define MMUCR3_THID 0x0000000f /* Thread ID */ + +/* *** ERAT TLB bits definitions */ +#define TLB0_EPN_MASK ASM_CONST(0xfffffffffffff000) +#define TLB0_CLASS_MASK ASM_CONST(0x0000000000000c00) +#define TLB0_CLASS_00 ASM_CONST(0x0000000000000000) +#define TLB0_CLASS_01 ASM_CONST(0x0000000000000400) +#define TLB0_CLASS_10 ASM_CONST(0x0000000000000800) +#define TLB0_CLASS_11 ASM_CONST(0x0000000000000c00) +#define TLB0_V ASM_CONST(0x0000000000000200) +#define TLB0_X ASM_CONST(0x0000000000000100) +#define TLB0_SIZE_MASK ASM_CONST(0x00000000000000f0) +#define TLB0_SIZE_4K ASM_CONST(0x0000000000000010) +#define TLB0_SIZE_64K ASM_CONST(0x0000000000000030) +#define TLB0_SIZE_1M ASM_CONST(0x0000000000000050) +#define TLB0_SIZE_16M ASM_CONST(0x0000000000000070) +#define TLB0_SIZE_1G ASM_CONST(0x00000000000000a0) +#define TLB0_THDID_MASK ASM_CONST(0x000000000000000f) +#define TLB0_THDID_0 ASM_CONST(0x0000000000000001) +#define TLB0_THDID_1 ASM_CONST(0x0000000000000002) +#define TLB0_THDID_2 ASM_CONST(0x0000000000000004) +#define TLB0_THDID_3 ASM_CONST(0x0000000000000008) +#define TLB0_THDID_ALL ASM_CONST(0x000000000000000f) + +#define TLB1_RESVATTR ASM_CONST(0x00f0000000000000) +#define TLB1_U0 ASM_CONST(0x0008000000000000) +#define TLB1_U1 ASM_CONST(0x0004000000000000) +#define TLB1_U2 ASM_CONST(0x0002000000000000) +#define TLB1_U3 ASM_CONST(0x0001000000000000) +#define TLB1_R ASM_CONST(0x0000800000000000) +#define TLB1_C ASM_CONST(0x0000400000000000) +#define TLB1_RPN_MASK ASM_CONST(0x000003fffffff000) +#define TLB1_W ASM_CONST(0x0000000000000800) +#define TLB1_I ASM_CONST(0x0000000000000400) +#define TLB1_M ASM_CONST(0x0000000000000200) +#define TLB1_G ASM_CONST(0x0000000000000100) +#define TLB1_E ASM_CONST(0x0000000000000080) +#define TLB1_VF ASM_CONST(0x0000000000000040) +#define TLB1_UX ASM_CONST(0x0000000000000020) +#define TLB1_SX ASM_CONST(0x0000000000000010) +#define TLB1_UW ASM_CONST(0x0000000000000008) +#define TLB1_SW ASM_CONST(0x0000000000000004) +#define TLB1_UR ASM_CONST(0x0000000000000002) +#define TLB1_SR ASM_CONST(0x0000000000000001) + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP +#define WSP_UART_PHYS 0xffc000c000 +/* This needs to be careful chosen to hit a !0 congruence class + * in the TLB since we bolt it in way 3, which is already occupied + * by our linear mapping primary bolted entry in CC 0. + */ +#define WSP_UART_VIRT 0xf000000000001000 +#endif + +/* A2 erativax attributes definitions */ +#define ERATIVAX_RS_IS_ALL 0x000 +#define ERATIVAX_RS_IS_TID 0x040 +#define ERATIVAX_RS_IS_CLASS 0x080 +#define ERATIVAX_RS_IS_FULLMATCH 0x0c0 +#define ERATIVAX_CLASS_00 0x000 +#define ERATIVAX_CLASS_01 0x010 +#define ERATIVAX_CLASS_10 0x020 +#define ERATIVAX_CLASS_11 0x030 +#define ERATIVAX_PSIZE_4K (TLB_PSIZE_4K >> 1) +#define ERATIVAX_PSIZE_64K (TLB_PSIZE_64K >> 1) +#define ERATIVAX_PSIZE_1M (TLB_PSIZE_1M >> 1) +#define ERATIVAX_PSIZE_16M (TLB_PSIZE_16M >> 1) +#define ERATIVAX_PSIZE_1G (TLB_PSIZE_1G >> 1) + +/* A2 eratilx attributes definitions */ +#define ERATILX_T_ALL 0 +#define ERATILX_T_TID 1 +#define ERATILX_T_TGS 2 +#define ERATILX_T_FULLMATCH 3 +#define ERATILX_T_CLASS0 4 +#define ERATILX_T_CLASS1 5 +#define ERATILX_T_CLASS2 6 +#define ERATILX_T_CLASS3 7 + +/* XUCR0 bits */ +#define XUCR0_TRACE_UM_T0 0x40000000 /* Thread 0 */ +#define XUCR0_TRACE_UM_T1 0x20000000 /* Thread 1 */ +#define XUCR0_TRACE_UM_T2 0x10000000 /* Thread 2 */ +#define XUCR0_TRACE_UM_T3 0x08000000 /* Thread 3 */ + +/* A2 CCR0 register */ +#define A2_CCR0_PME_DISABLED 0x00000000 +#define A2_CCR0_PME_SLEEP 0x40000000 +#define A2_CCR0_PME_RVW 0x80000000 +#define A2_CCR0_PME_DISABLED2 0xc0000000 + +/* A2 CCR2 register */ +#define A2_CCR2_ERAT_ONLY_MODE 0x00000001 +#define A2_CCR2_ENABLE_ICSWX 0x00000002 +#define A2_CCR2_ENABLE_PC 0x20000000 +#define A2_CCR2_ENABLE_TRACE 0x40000000 + +#endif /* __ASM_POWERPC_REG_A2_H__ */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index b316794aa2b5..0f0ad9fa01c1 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -27,10 +27,12 @@ #define MSR_CM (1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */ #if defined(CONFIG_PPC_BOOK3E_64) +#define MSR_64BIT MSR_CM + #define MSR_ MSR_ME | MSR_CE -#define MSR_KERNEL MSR_ | MSR_CM +#define MSR_KERNEL MSR_ | MSR_64BIT #define MSR_USER32 MSR_ | MSR_PR | MSR_EE | MSR_DE -#define MSR_USER64 MSR_USER32 | MSR_CM | MSR_DE +#define MSR_USER64 MSR_USER32 | MSR_64BIT #elif defined (CONFIG_40x) #define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) @@ -81,6 +83,10 @@ #define SPRN_IVOR13 0x19D /* Interrupt Vector Offset Register 13 */ #define SPRN_IVOR14 0x19E /* Interrupt Vector Offset Register 14 */ #define SPRN_IVOR15 0x19F /* Interrupt Vector Offset Register 15 */ +#define SPRN_IVOR38 0x1B0 /* Interrupt Vector Offset Register 38 */ +#define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */ +#define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */ +#define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */ #define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */ #define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */ #define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9a1193e30f26..58625d1e7802 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -158,7 +158,50 @@ struct rtas_error_log { unsigned long target:4; /* Target of failed operation */ unsigned long type:8; /* General event or error*/ unsigned long extended_log_length:32; /* length in bytes */ - unsigned char buffer[1]; + unsigned char buffer[1]; /* Start of extended log */ + /* Variable length. */ +}; + +#define RTAS_V6EXT_LOG_FORMAT_EVENT_LOG 14 + +#define RTAS_V6EXT_COMPANY_ID_IBM (('I' << 24) | ('B' << 16) | ('M' << 8)) + +/* RTAS general extended event log, Version 6. The extended log starts + * from "buffer" field of struct rtas_error_log defined above. + */ +struct rtas_ext_event_log_v6 { + /* Byte 0 */ + uint32_t log_valid:1; /* 1:Log valid */ + uint32_t unrecoverable_error:1; /* 1:Unrecoverable error */ + uint32_t recoverable_error:1; /* 1:recoverable (correctable */ + /* or successfully retried) */ + uint32_t degraded_operation:1; /* 1:Unrecoverable err, bypassed*/ + /* - degraded operation (e.g. */ + /* CPU or mem taken off-line) */ + uint32_t predictive_error:1; + uint32_t new_log:1; /* 1:"New" log (Always 1 for */ + /* data returned from RTAS */ + uint32_t big_endian:1; /* 1: Big endian */ + uint32_t :1; /* reserved */ + /* Byte 1 */ + uint32_t :8; /* reserved */ + /* Byte 2 */ + uint32_t powerpc_format:1; /* Set to 1 (indicating log is */ + /* in PowerPC format */ + uint32_t :3; /* reserved */ + uint32_t log_format:4; /* Log format indicator. Define */ + /* format used for byte 12-2047 */ + /* Byte 3 */ + uint32_t :8; /* reserved */ + /* Byte 4-11 */ + uint8_t reserved[8]; /* reserved */ + /* Byte 12-15 */ + uint32_t company_id; /* Company ID of the company */ + /* that defines the format for */ + /* the vendor specific log type */ + /* Byte 16-end of log */ + uint8_t vendor_log[1]; /* Start of vendor specific log */ + /* Variable length. */ }; /* diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h new file mode 100644 index 000000000000..0cabfd7bc2d1 --- /dev/null +++ b/arch/powerpc/include/asm/scom.h @@ -0,0 +1,156 @@ +/* + * Copyright 2010 Benjamin Herrenschmidt, IBM Corp + * <benh@kernel.crashing.org> + * and David Gibson, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_POWERPC_SCOM_H +#define _ASM_POWERPC_SCOM_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#ifdef CONFIG_PPC_SCOM + +/* + * The SCOM bus is a sideband bus used for accessing various internal + * registers of the processor or the chipset. The implementation details + * differ between processors and platforms, and the access method as + * well. + * + * This API allows to "map" ranges of SCOM register numbers associated + * with a given SCOM controller. The later must be represented by a + * device node, though some implementations might support NULL if there + * is no possible ambiguity + * + * Then, scom_read/scom_write can be used to accesses registers inside + * that range. The argument passed is a register number relative to + * the beginning of the range mapped. + */ + +typedef void *scom_map_t; + +/* Value for an invalid SCOM map */ +#define SCOM_MAP_INVALID (NULL) + +/* The scom_controller data structure is what the platform passes + * to the core code in scom_init, it provides the actual implementation + * of all the SCOM functions + */ +struct scom_controller { + scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count); + void (*unmap)(scom_map_t map); + + u64 (*read)(scom_map_t map, u32 reg); + void (*write)(scom_map_t map, u32 reg, u64 value); +}; + +extern const struct scom_controller *scom_controller; + +/** + * scom_init - Initialize the SCOM backend, called by the platform + * @controller: The platform SCOM controller + */ +static inline void scom_init(const struct scom_controller *controller) +{ + scom_controller = controller; +} + +/** + * scom_map_ok - Test is a SCOM mapping is successful + * @map: The result of scom_map to test + */ +static inline int scom_map_ok(scom_map_t map) +{ + return map != SCOM_MAP_INVALID; +} + +/** + * scom_map - Map a block of SCOM registers + * @ctrl_dev: Device node of the SCOM controller + * some implementations allow NULL here + * @reg: first SCOM register to map + * @count: Number of SCOM registers to map + */ + +static inline scom_map_t scom_map(struct device_node *ctrl_dev, + u64 reg, u64 count) +{ + return scom_controller->map(ctrl_dev, reg, count); +} + +/** + * scom_find_parent - Find the SCOM controller for a device + * @dev: OF node of the device + * + * This is not meant for general usage, but in combination with + * scom_map() allows to map registers not represented by the + * device own scom-reg property. Useful for applying HW workarounds + * on things not properly represented in the device-tree for example. + */ +struct device_node *scom_find_parent(struct device_node *dev); + + +/** + * scom_map_device - Map a device's block of SCOM registers + * @dev: OF node of the device + * @index: Register bank index (index in "scom-reg" property) + * + * This function will use the device-tree binding for SCOM which + * is to follow "scom-parent" properties until it finds a node with + * a "scom-controller" property to find the controller. It will then + * use the "scom-reg" property which is made of reg/count pairs, + * each of them having a size defined by the controller's #scom-cells + * property + */ +extern scom_map_t scom_map_device(struct device_node *dev, int index); + + +/** + * scom_unmap - Unmap a block of SCOM registers + * @map: Result of scom_map is to be unmapped + */ +static inline void scom_unmap(scom_map_t map) +{ + if (scom_map_ok(map)) + scom_controller->unmap(map); +} + +/** + * scom_read - Read a SCOM register + * @map: Result of scom_map + * @reg: Register index within that map + */ +static inline u64 scom_read(scom_map_t map, u32 reg) +{ + return scom_controller->read(map, reg); +} + +/** + * scom_write - Write to a SCOM register + * @map: Result of scom_map + * @reg: Register index within that map + * @value: Value to write + */ +static inline void scom_write(scom_map_t map, u32 reg, u64 value) +{ + scom_controller->write(map, reg, value); +} + +#endif /* CONFIG_PPC_SCOM */ +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_SCOM_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index a902a0d3ae0d..880b8c1e6e53 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -20,6 +20,7 @@ #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/kernel.h> +#include <linux/irqreturn.h> #ifndef __ASSEMBLY__ @@ -29,14 +30,32 @@ #include <asm/percpu.h> extern int boot_cpuid; +extern int boot_cpu_count; extern void cpu_die(void); #ifdef CONFIG_SMP -extern void smp_send_debugger_break(int cpu); -extern void smp_message_recv(int); +struct smp_ops_t { + void (*message_pass)(int cpu, int msg); +#ifdef CONFIG_PPC_SMP_MUXED_IPI + void (*cause_ipi)(int cpu, unsigned long data); +#endif + int (*probe)(void); + int (*kick_cpu)(int nr); + void (*setup_cpu)(int nr); + void (*bringup_done)(void); + void (*take_timebase)(void); + void (*give_timebase)(void); + int (*cpu_disable)(void); + void (*cpu_die)(unsigned int nr); + int (*cpu_bootable)(unsigned int nr); +}; + +extern void smp_send_debugger_break(void); extern void start_secondary_resume(void); +extern void __devinit smp_generic_give_timebase(void); +extern void __devinit smp_generic_take_timebase(void); DECLARE_PER_CPU(unsigned int, cpu_pvr); @@ -93,13 +112,16 @@ extern int cpu_to_core_id(int cpu); #define PPC_MSG_CALL_FUNC_SINGLE 2 #define PPC_MSG_DEBUGGER_BREAK 3 -/* - * irq controllers that have dedicated ipis per message and don't - * need additional code in the action handler may use this - */ +/* for irq controllers that have dedicated ipis per message (4) */ extern int smp_request_message_ipi(int virq, int message); extern const char *smp_ipi_name[]; +/* for irq controllers with only a single ipi */ +extern void smp_muxed_ipi_set_data(int cpu, unsigned long data); +extern void smp_muxed_ipi_message_pass(int cpu, int msg); +extern void smp_muxed_ipi_resend(void); +extern irqreturn_t smp_ipi_demux(void); + void smp_init_iSeries(void); void smp_init_pSeries(void); void smp_init_cell(void); @@ -149,7 +171,7 @@ extern int smt_enabled_at_boot; extern int smp_mpic_probe(void); extern void smp_mpic_setup_cpu(int cpu); -extern void smp_generic_kick_cpu(int nr); +extern int smp_generic_kick_cpu(int nr); extern void smp_generic_give_timebase(void); extern void smp_generic_take_timebase(void); @@ -169,6 +191,8 @@ extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; +extern irqreturn_t debug_ipi_action(int irq, void *data); + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 60f64b132bd4..8489d372077f 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at) COMPAT_SYS_SPU(open_by_handle_at) COMPAT_SYS_SPU(clock_adjtime) SYSCALL_SPU(syncfs) +COMPAT_SYS_SPU(sendmmsg) diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 5e474ddd2273..2dc595dda03b 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -219,8 +219,6 @@ extern int mem_init_done; /* set on boot once kmalloc can be called */ extern int init_bootmem_done; /* set once bootmem is available */ extern phys_addr_t memory_limit; extern unsigned long klimit; - -extern void *alloc_maybe_bootmem(size_t size, gfp_t mask); extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); extern int powersave_nap; /* set if nap mode can be used in idle loop */ diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index d50a380b2b6f..81143fcbd113 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -79,6 +79,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) #elif defined(CONFIG_PPC_STD_MMU_64) +#define MMU_NO_CONTEXT 0 + /* * TLB flushing for 64-bit hash-MMU CPUs */ diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 11ae699135ba..58580e94a2bb 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,6 +52,7 @@ extern void __init udbg_init_44x_as1(void); extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); +extern void __init udbg_init_wsp(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UDBG_H */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 3c215648ce6d..6d23c8193caa 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -371,10 +371,11 @@ #define __NR_open_by_handle_at 346 #define __NR_clock_adjtime 347 #define __NR_syncfs 348 +#define __NR_sendmmsg 349 #ifdef __KERNEL__ -#define __NR_syscalls 349 +#define __NR_syscalls 350 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h new file mode 100644 index 000000000000..c7dc83088a33 --- /dev/null +++ b/arch/powerpc/include/asm/wsp.h @@ -0,0 +1,14 @@ +/* + * Copyright 2011 Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __ASM_POWERPC_WSP_H +#define __ASM_POWERPC_WSP_H + +extern int wsp_get_chip_id(struct device_node *dn); + +#endif /* __ASM_POWERPC_WSP_H */ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h new file mode 100644 index 000000000000..b183a4062011 --- /dev/null +++ b/arch/powerpc/include/asm/xics.h @@ -0,0 +1,142 @@ +/* + * Common definitions accross all variants of ICP and ICS interrupt + * controllers. + */ + +#ifndef _XICS_H +#define _XICS_H + +#include <linux/interrupt.h> + +#define XICS_IPI 2 +#define XICS_IRQ_SPURIOUS 0 + +/* Want a priority other than 0. Various HW issues require this. */ +#define DEFAULT_PRIORITY 5 + +/* + * Mark IPIs as higher priority so we can take them inside interrupts that + * arent marked IRQF_DISABLED + */ +#define IPI_PRIORITY 4 + +/* The least favored priority */ +#define LOWEST_PRIORITY 0xFF + +/* The number of priorities defined above */ +#define MAX_NUM_PRIORITIES 3 + +/* Native ICP */ +extern int icp_native_init(void); + +/* PAPR ICP */ +extern int icp_hv_init(void); + +/* ICP ops */ +struct icp_ops { + unsigned int (*get_irq)(void); + void (*eoi)(struct irq_data *d); + void (*set_priority)(unsigned char prio); + void (*teardown_cpu)(void); + void (*flush_ipi)(void); +#ifdef CONFIG_SMP + void (*cause_ipi)(int cpu, unsigned long data); + irq_handler_t ipi_action; +#endif +}; + +extern const struct icp_ops *icp_ops; + +/* Native ICS */ +extern int ics_native_init(void); + +/* RTAS ICS */ +extern int ics_rtas_init(void); + +/* ICS instance, hooked up to chip_data of an irq */ +struct ics { + struct list_head link; + int (*map)(struct ics *ics, unsigned int virq); + void (*mask_unknown)(struct ics *ics, unsigned long vec); + long (*get_server)(struct ics *ics, unsigned long vec); + int (*host_match)(struct ics *ics, struct device_node *node); + char data[]; +}; + +/* Commons */ +extern unsigned int xics_default_server; +extern unsigned int xics_default_distrib_server; +extern unsigned int xics_interrupt_server_size; +extern struct irq_host *xics_host; + +struct xics_cppr { + unsigned char stack[MAX_NUM_PRIORITIES]; + int index; +}; + +DECLARE_PER_CPU(struct xics_cppr, xics_cppr); + +static inline void xics_push_cppr(unsigned int vec) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) + return; + + if (vec == XICS_IPI) + os_cppr->stack[++os_cppr->index] = IPI_PRIORITY; + else + os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY; +} + +static inline unsigned char xics_pop_cppr(void) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + if (WARN_ON(os_cppr->index < 1)) + return LOWEST_PRIORITY; + + return os_cppr->stack[--os_cppr->index]; +} + +static inline void xics_set_base_cppr(unsigned char cppr) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + /* we only really want to set the priority when there's + * just one cppr value on the stack + */ + WARN_ON(os_cppr->index != 0); + + os_cppr->stack[0] = cppr; +} + +static inline unsigned char xics_cppr_top(void) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + return os_cppr->stack[os_cppr->index]; +} + +DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message); + +extern void xics_init(void); +extern void xics_setup_cpu(void); +extern void xics_update_irq_servers(void); +extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join); +extern void xics_mask_unknown_vec(unsigned int vec); +extern irqreturn_t xics_ipi_dispatch(int cpu); +extern int xics_smp_probe(void); +extern void xics_register_ics(struct ics *ics); +extern void xics_teardown_cpu(void); +extern void xics_kexec_teardown_cpu(int secondary); +extern void xics_migrate_irqs_away(void); +#ifdef CONFIG_SMP +extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, + unsigned int strict_check); +#else +#define xics_get_irq_server(virq, cpumask, strict_check) (xics_default_server) +#endif + + +#endif /* _XICS_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 3bb2a3e6a337..9aab36312572 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -38,11 +38,14 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ paca.o nvram_64.o firmware.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o +obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o obj-$(CONFIG_PPC_CLOCK) += clock.o procfs-y := proc_powerpc.o @@ -75,7 +78,6 @@ obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o extra-y := head_$(CONFIG_WORD_SIZE).o -extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o @@ -103,6 +105,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o +obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o + obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 23e6a93145ab..36e1c8a29be8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -74,6 +74,7 @@ int main(void) DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); + DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ @@ -395,6 +396,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S new file mode 100644 index 000000000000..7f818feaa7a5 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -0,0 +1,114 @@ +/* + * A2 specific assembly support code + * + * Copyright 2009 Ben Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> +#include <asm/processor.h> +#include <asm/reg_a2.h> +#include <asm/reg.h> +#include <asm/thread_info.h> + +/* + * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. + * This also prevents external LPID accesses but that isn't a problem when not a + * guest. Under PV, this setting will be ignored and MMUCR will return the right + * number of PID bits we can use. + */ +#define MMUCR1_EXTEND_PID \ + (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ + MMUCR1_DTTID | MMUCR1_DCCD) + +/* + * Use extended PIDs if enabled. + * Don't clear the ERATs on context sync events and enable I & D LRU. + * Enable ERAT back invalidate when tlbwe overwrites an entry. + */ +#define INITIAL_MMUCR1 \ + (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ + MMUCR1_DRRE | MMUCR1_TLBWE_BINV) + +_GLOBAL(__setup_cpu_a2) + /* Some of these are actually thread local and some are + * core local but doing it always won't hurt + */ + +#ifdef CONFIG_PPC_WSP_COPRO + /* Make sure ACOP starts out as zero */ + li r3,0 + mtspr SPRN_ACOP,r3 + + /* Enable icswx instruction */ + mfspr r3,SPRN_A2_CCR2 + ori r3,r3,A2_CCR2_ENABLE_ICSWX + mtspr SPRN_A2_CCR2,r3 + + /* Unmask all CTs in HACOP */ + li r3,-1 + mtspr SPRN_HACOP,r3 +#endif /* CONFIG_PPC_WSP_COPRO */ + + /* Enable doorbell */ + mfspr r3,SPRN_A2_CCR2 + oris r3,r3,A2_CCR2_ENABLE_PC@h + mtspr SPRN_A2_CCR2,r3 + isync + + /* Setup CCR0 to disable power saving for now as it's busted + * in the current implementations. Setup CCR1 to wake on + * interrupts normally (we write the default value but who + * knows what FW may have clobbered...) + */ + li r3,0 + mtspr SPRN_A2_CCR0, r3 + LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) + mtspr SPRN_A2_CCR1, r3 + + /* Initialise MMUCR1 */ + lis r3,INITIAL_MMUCR1@h + ori r3,r3,INITIAL_MMUCR1@l + mtspr SPRN_MMUCR1,r3 + + /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ + LOAD_REG_IMMEDIATE(r3, 0x000a7531) + mtspr SPRN_MMUCR2,r3 + + /* Set MMUCR3 to write all thids bit to the TLB */ + LOAD_REG_IMMEDIATE(r3, 0x0000000f) + mtspr SPRN_MMUCR3,r3 + + /* Don't do ERAT stuff if running guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne 1f + + /* Now set the I-ERAT watermark to 15 */ + lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_IERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* Now set the D-ERAT watermark to 31 */ + lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_DERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* And invalidate the beast just in case. That won't get rid of + * a bolted entry though it will be in LRU and so will go away eventually + * but let's not bother for now + */ + PPC_ERATILX(0,0,0) +1: + blr + +_GLOBAL(__restore_cpu_a2) + b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 913611105c1f..8053db02b85e 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -88,6 +88,9 @@ _GLOBAL(__setup_cpu_e5500) bl __e500_dcache_setup #ifdef CONFIG_PPC_BOOK3E_64 bl .__setup_base_ivors + bl .setup_perfmon_ivor + bl .setup_doorbell_ivors + bl .setup_ehv_ivors #else bl __setup_e500mc_ivors #endif diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S new file mode 100644 index 000000000000..4f9a93fcfe07 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -0,0 +1,91 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +/* Entry: r3 = crap, r4 = ptr to cputable entry + * + * Note that we can be called twice for pseudo-PVRs + */ +_GLOBAL(__setup_cpu_power7) + mflr r11 + bl __init_hvmode_206 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + bl __init_LPCR + bl __init_TLB + mtlr r11 + blr + +_GLOBAL(__restore_cpu_power7) + mflr r11 + mfmsr r3 + rldicl. r0,r3,4,63 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + bl __init_LPCR + bl __init_TLB + mtlr r11 + blr + +__init_hvmode_206: + /* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */ + mfmsr r3 + rldicl. r0,r3,4,63 + bnelr + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206) + xor r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) + blr + +__init_LPCR: + /* Setup a sane LPCR: + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * + * Other bits untouched for now + */ + mfspr r3,SPRN_LPCR + ori r3,r3,(LPCR_LPES0|LPCR_LPES1) + xori r3,r3, LPCR_LPES0 + ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) + li r5,7 + sldi r5,r5,LPCR_DPFD_SH + andc r3,r3,r5 + li r5,4 + sldi r5,r5,LPCR_DPFD_SH + or r3,r3,r5 + mtspr SPRN_LPCR,r3 + isync + blr + +__init_TLB: + /* Clear the TLB */ + li r6,128 + mtctr r6 + li r7,0xc00 /* IS field = 0b11 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b9602ee06deb..34d2722b9451 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -62,10 +62,12 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); +extern void __restore_cpu_a2(void); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -199,7 +201,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -214,7 +216,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -230,7 +232,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -248,7 +250,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -284,7 +286,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -302,7 +304,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -318,7 +320,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5 (gr)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -338,7 +340,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -354,7 +356,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -371,7 +373,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_cpu_type = "ppc64/ibm-compat-v1", @@ -385,7 +387,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_POWER6, .cpu_user_features = COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -404,7 +406,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER6 (architected)", .cpu_features = CPU_FTRS_POWER6, .cpu_user_features = COMMON_USER_POWER6, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_cpu_type = "ppc64/ibm-compat-v1", @@ -417,12 +419,13 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7 (architected)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7", }, { /* Power7 */ @@ -431,14 +434,15 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7 (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7", }, { /* Power7+ */ @@ -447,14 +451,15 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7+ (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7+", }, { /* Cell Broadband Engine */ @@ -465,7 +470,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_user_features = COMMON_USER_PPC64 | PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_CELL, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 4, @@ -480,7 +485,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "PA6T", .cpu_features = CPU_FTRS_PA6T, .cpu_user_features = COMMON_USER_PA6T, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PA6T, .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 6, @@ -497,7 +502,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (compatible)", .cpu_features = CPU_FTRS_COMPATIBLE, .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -2005,7 +2010,22 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #endif /* CONFIG_E500 */ -#ifdef CONFIG_PPC_BOOK3E_64 +#ifdef CONFIG_PPC_A2 + { /* Standard A2 (>= DD2) + FPU core */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00480000, + .cpu_name = "A2 (>= DD2)", + .cpu_features = CPU_FTRS_A2, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTRS_A2, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 0, + .cpu_setup = __setup_cpu_a2, + .cpu_restore = __restore_cpu_a2, + .machine_check = machine_check_generic, + .platform = "ppca2", + }, { /* This is a default entry to get going, to be replaced by * a real one at some stage */ @@ -2026,7 +2046,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "power6", }, -#endif +#endif /* CONFIG_PPC_A2 */ }; static struct cpu_spec the_cpu_spec; diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 5b5e1f002a8e..4e6ee944495a 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -64,9 +64,9 @@ void crash_ipi_callback(struct pt_regs *regs) return; hard_irq_disable(); - if (!cpu_isset(cpu, cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_in_crash)) crash_save_cpu(regs, cpu); - cpu_set(cpu, cpus_in_crash); + cpumask_set_cpu(cpu, &cpus_in_crash); /* * Entered via soft-reset - could be the kdump @@ -77,8 +77,8 @@ void crash_ipi_callback(struct pt_regs *regs) * Tell the kexec CPU that entered via soft-reset and ready * to go down. */ - if (cpu_isset(cpu, cpus_in_sr)) { - cpu_clear(cpu, cpus_in_sr); + if (cpumask_test_cpu(cpu, &cpus_in_sr)) { + cpumask_clear_cpu(cpu, &cpus_in_sr); atomic_inc(&enter_on_soft_reset); } @@ -87,7 +87,7 @@ void crash_ipi_callback(struct pt_regs *regs) * This barrier is needed to make sure that all CPUs are stopped. * If not, soft-reset will be invoked to bring other CPUs. */ - while (!cpu_isset(crashing_cpu, cpus_in_crash)) + while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -109,7 +109,7 @@ static void crash_soft_reset_check(int cpu) { unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - cpu_clear(cpu, cpus_in_sr); + cpumask_clear_cpu(cpu, &cpus_in_sr); while (atomic_read(&enter_on_soft_reset) != ncpus) cpu_relax(); } @@ -132,7 +132,7 @@ static void crash_kexec_prepare_cpus(int cpu) */ printk(KERN_EMERG "Sending IPI to other cpus...\n"); msecs = 10000; - while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) { + while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { cpu_relax(); mdelay(1); } @@ -144,52 +144,24 @@ static void crash_kexec_prepare_cpus(int cpu) * user to do soft reset such that we get all. * Soft-reset will be used until better mechanism is implemented. */ - if (cpus_weight(cpus_in_crash) < ncpus) { + if (cpumask_weight(&cpus_in_crash) < ncpus) { printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", - ncpus - cpus_weight(cpus_in_crash)); + ncpus - cpumask_weight(&cpus_in_crash)); printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpus_in_sr = CPU_MASK_NONE; + cpumask_clear(&cpus_in_sr); atomic_set(&enter_on_soft_reset, 0); - while (cpus_weight(cpus_in_crash) < ncpus) + while (cpumask_weight(&cpus_in_crash) < ncpus) cpu_relax(); } /* * Make sure all CPUs are entered via soft-reset if the kdump is * invoked using soft-reset. */ - if (cpu_isset(cpu, cpus_in_sr)) + if (cpumask_test_cpu(cpu, &cpus_in_sr)) crash_soft_reset_check(cpu); /* Leave the IPI callback set */ } -/* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#ifdef CONFIG_PPC_STD_MMU_64 -static void crash_kexec_wait_realmode(int cpu) -{ - unsigned int msecs; - int i; - - msecs = 10000; - for (i=0; i < NR_CPUS && msecs > 0; i++) { - if (i == cpu) - continue; - - while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { - barrier(); - if (!cpu_possible(i)) { - break; - } - if (!cpu_online(i)) { - break; - } - msecs--; - mdelay(1); - } - } - mb(); -} -#endif /* CONFIG_PPC_STD_MMU_64 */ - /* * This function will be called by secondary cpus or by kexec cpu * if soft-reset is activated to stop some CPUs. @@ -210,7 +182,7 @@ void crash_kexec_secondary(struct pt_regs *regs) * exited using 'x'(exit and recover) or * kexec_should_crash() failed for all running tasks. */ - cpu_clear(cpu, cpus_in_sr); + cpumask_clear_cpu(cpu, &cpus_in_sr); local_irq_restore(flags); return; } @@ -224,7 +196,7 @@ void crash_kexec_secondary(struct pt_regs *regs) * then start kexec boot. */ crash_soft_reset_check(cpu); - cpu_set(crashing_cpu, cpus_in_crash); + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); machine_kexec(kexec_crash_image); @@ -234,7 +206,6 @@ void crash_kexec_secondary(struct pt_regs *regs) } #else /* ! CONFIG_SMP */ -static inline void crash_kexec_wait_realmode(int cpu) {} static void crash_kexec_prepare_cpus(int cpu) { @@ -253,10 +224,40 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpus_in_sr = CPU_MASK_NONE; + cpumask_clear(&cpus_in_sr); } #endif /* CONFIG_SMP */ +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = 10000; + for (i=0; i < nr_cpu_ids && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i)) { + break; + } + if (!cpu_online(i)) { + break; + } + msecs--; + mdelay(1); + } + } + mb(); +} +#else +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ + /* * Register a function to be called on shutdown. Only use this if you * can't reset your device in the second kernel. @@ -345,7 +346,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crashing_cpu = smp_processor_id(); crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); - cpu_set(crashing_cpu, cpus_in_crash); + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 3307a52d797f..2cc451aaaca7 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -13,84 +13,35 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/threads.h> -#include <linux/percpu.h> +#include <linux/hardirq.h> #include <asm/dbell.h> #include <asm/irq_regs.h> #ifdef CONFIG_SMP -struct doorbell_cpu_info { - unsigned long messages; /* current messages bits */ - unsigned int tag; /* tag value */ -}; - -static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info); - void doorbell_setup_this_cpu(void) { - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + unsigned long tag = mfspr(SPRN_PIR) & 0x3fff; - info->messages = 0; - info->tag = mfspr(SPRN_PIR) & 0x3fff; + smp_muxed_ipi_set_data(smp_processor_id(), tag); } -void doorbell_message_pass(int target, int msg) +void doorbell_cause_ipi(int cpu, unsigned long data) { - struct doorbell_cpu_info *info; - int i; - - if (target < NR_CPUS) { - info = &per_cpu(doorbell_cpu_info, target); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); - } - else if (target == MSG_ALL_BUT_SELF) { - for_each_online_cpu(i) { - if (i == smp_processor_id()) - continue; - info = &per_cpu(doorbell_cpu_info, i); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); - } - } - else { /* target == MSG_ALL */ - for_each_online_cpu(i) { - info = &per_cpu(doorbell_cpu_info, i); - set_bit(msg, &info->messages); - } - ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0); - } + ppc_msgsnd(PPC_DBELL, 0, data); } void doorbell_exception(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); - int msg; - /* Warning: regs can be NULL when called from irq enable */ + irq_enter(); - if (!info->messages || (num_online_cpus() < 2)) - goto out; + smp_ipi_demux(); - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &info->messages)) - smp_message_recv(msg); - -out: + irq_exit(); set_irq_regs(old_regs); } - -void doorbell_check_self(void) -{ - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); - - if (!info->messages) - return; - - ppc_msgsnd(PPC_DBELL, 0, info->tag); -} - #else /* CONFIG_SMP */ void doorbell_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d82878c4daa6..d834425186ae 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -421,6 +421,12 @@ BEGIN_FTR_SECTION std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + mfspr r25,SPRN_DSCR + std r25,THREAD_DSCR(r3) +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif and. r0,r0,r22 beq+ 1f andc r22,r22,r0 @@ -462,10 +468,10 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE_NESTED(95) clrrdi r6,r8,40 /* get its 1T ESID */ clrrdi r9,r1,40 /* get current sp 1T ESID */ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95) + ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95) FTR_SECTION_ELSE b 2f -ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB) clrldi. r0,r6,2 /* is new ESID c00000000? */ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ cror eq,4*cr1+eq,eq @@ -479,7 +485,7 @@ BEGIN_FTR_SECTION li r9,MMU_SEGSIZE_1T /* insert B field */ oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0 -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Update the last bolted SLB. No write barriers are needed * here, provided we only update the current CPU's SLB shadow @@ -491,7 +497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */ std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */ - /* No need to check for CPU_FTR_NO_SLBIE_B here, since when + /* No need to check for MMU_FTR_NO_SLBIE_B here, since when * we have 1TB segments, the only CPUs known to have the errata * only support less than 1TB of system memory and we'll never * actually hit this code path. @@ -522,6 +528,15 @@ BEGIN_FTR_SECTION mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + ld r0,THREAD_DSCR(r4) + cmpd r0,r25 + beq 1f + mtspr SPRN_DSCR,r0 +1: +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif /* r3-r13 are destroyed -- Cort */ REST_8GPRS(14, r1) @@ -838,7 +853,7 @@ _GLOBAL(enter_rtas) _STATIC(rtas_return_loc) /* relocation is off at this point */ - mfspr r4,SPRN_SPRG_PACA /* Get PACA */ + GET_PACA(r4) clrldi r4,r4,2 /* convert to realmode address */ bcl 20,31,$+4 @@ -869,7 +884,7 @@ _STATIC(rtas_restore_regs) REST_8GPRS(14, r1) /* Restore the non-volatiles */ REST_10GPRS(22, r1) /* ditto */ - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) ld r4,_CCR(r1) mtcr r4 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 9651acc3504a..d24d4400cc79 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -17,6 +17,7 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/thread_info.h> +#include <asm/reg_a2.h> #include <asm/exception-64e.h> #include <asm/bug.h> #include <asm/irqflags.h> @@ -252,9 +253,6 @@ exception_marker: .balign 0x1000 .globl interrupt_base_book3e interrupt_base_book3e: /* fake trap */ - /* Note: If real debug exceptions are supported by the HW, the vector - * below will have to be patched up to point to an appropriate handler - */ EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */ EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */ EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */ @@ -271,8 +269,13 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ EXCEPTION_STUB(0x1c0, data_tlb_miss) EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x260, perfmon) EXCEPTION_STUB(0x280, doorbell) EXCEPTION_STUB(0x2a0, doorbell_crit) + EXCEPTION_STUB(0x2c0, guest_doorbell) + EXCEPTION_STUB(0x2e0, guest_doorbell_crit) + EXCEPTION_STUB(0x300, hypercall) + EXCEPTION_STUB(0x320, ehpriv) .globl interrupt_end_book3e interrupt_end_book3e: @@ -454,6 +457,70 @@ interrupt_end_book3e: kernel_dbg_exc: b . /* NYI */ +/* Debug exception as a debug interrupt*/ + START_EXCEPTION(debug_debug); + DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + + /* + * If there is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the DSRR1 value and clearing the debug status. + */ + + mfspr r14,SPRN_DBSR /* check single-step/branch taken */ + andis. r15,r14,DBSR_IC@h + beq+ 1f + + LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e) + cmpld cr0,r10,r14 + cmpld cr1,r10,r15 + blt+ cr0,1f + bge+ cr1,1f + + /* here it looks like we got an inappropriate debug exception. */ + lis r14,DBSR_IC@h /* clear the IC event */ + rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */ + mtspr SPRN_DBSR,r14 + mtspr SPRN_DSRR1,r11 + lwz r10,PACA_EXDBG+EX_CR(r13) /* restore registers */ + ld r1,PACA_EXDBG+EX_R1(r13) + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + mtcr r10 + ld r10,PACA_EXDBG+EX_R10(r13) /* restore registers */ + ld r11,PACA_EXDBG+EX_R11(r13) + mfspr r13,SPRN_SPRG_DBG_SCRATCH + rfdi + + /* Normal debug exception */ + /* XXX We only handle coming from userspace for now since we can't + * quite save properly an interrupted kernel state yet + */ +1: andi. r14,r11,MSR_PR; /* check for userspace again */ + beq kernel_dbg_exc; /* if from kernel mode */ + + /* Now we mash up things to make it look like we are coming on a + * normal exception + */ + mfspr r15,SPRN_SPRG_DBG_SCRATCH + mtspr SPRN_SPRG_GEN_SCRATCH,r15 + mfspr r14,SPRN_DBSR + EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + bl .save_nvgprs + bl .DebugException + b .ret_from_except + + MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) + /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE) @@ -468,6 +535,11 @@ kernel_dbg_exc: // b ret_from_crit_except b . + MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE) + /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -587,7 +659,12 @@ fast_exception_return: BAD_STACK_TRAMPOLINE(0x000) BAD_STACK_TRAMPOLINE(0x100) BAD_STACK_TRAMPOLINE(0x200) +BAD_STACK_TRAMPOLINE(0x260) +BAD_STACK_TRAMPOLINE(0x2c0) +BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) +BAD_STACK_TRAMPOLINE(0x310) +BAD_STACK_TRAMPOLINE(0x320) BAD_STACK_TRAMPOLINE(0x400) BAD_STACK_TRAMPOLINE(0x500) BAD_STACK_TRAMPOLINE(0x600) @@ -864,8 +941,23 @@ have_hes: * that will have to be made dependent on whether we are running under * a hypervisor I suppose. */ - ori r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS - mtspr SPRN_MAS0,r3 + + /* BEWARE, MAGIC + * This code is called as an ordinary function on the boot CPU. But to + * avoid duplication, this code is also used in SCOM bringup of + * secondary CPUs. We read the code between the initial_tlb_code_start + * and initial_tlb_code_end labels one instruction at a time and RAM it + * into the new core via SCOM. That doesn't process branches, so there + * must be none between those two labels. It also means if this code + * ever takes any parameters, the SCOM code must also be updated to + * provide them. + */ + .globl a2_tlbinit_code_start +a2_tlbinit_code_start: + + ori r11,r3,MAS0_WQ_ALLWAYS + oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */ + mtspr SPRN_MAS0,r11 lis r3,(MAS1_VALID | MAS1_IPROT)@h ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT mtspr SPRN_MAS1,r3 @@ -879,18 +971,86 @@ have_hes: /* Write the TLB entry */ tlbwe + .globl a2_tlbinit_after_linear_map +a2_tlbinit_after_linear_map: + /* Now we branch the new virtual address mapped by this entry */ LOAD_REG_IMMEDIATE(r3,1f) mtctr r3 bctr 1: /* We are now running at PAGE_OFFSET, clean the TLB of everything - * else (XXX we should scan for bolted crap from the firmware too) + * else (including IPROTed things left by firmware) + * r4 = TLBnCFG + * r3 = current address (more or less) */ + + li r5,0 + mtspr SPRN_MAS6,r5 + tlbsx 0,r3 + + rlwinm r9,r4,0,TLBnCFG_N_ENTRY + rlwinm r10,r4,8,0xff + addi r10,r10,-1 /* Get inner loop mask */ + + li r3,1 + + mfspr r5,SPRN_MAS1 + rlwinm r5,r5,0,(~(MAS1_VALID|MAS1_IPROT)) + + mfspr r6,SPRN_MAS2 + rldicr r6,r6,0,51 /* Extract EPN */ + + mfspr r7,SPRN_MAS0 + rlwinm r7,r7,0,0xffff0fff /* Clear HES and WQ */ + + rlwinm r8,r7,16,0xfff /* Extract ESEL */ + +2: add r4,r3,r8 + and r4,r4,r10 + + rlwimi r7,r4,16,MAS0_ESEL_MASK + + mtspr SPRN_MAS0,r7 + mtspr SPRN_MAS1,r5 + mtspr SPRN_MAS2,r6 + tlbwe + + addi r3,r3,1 + and. r4,r3,r10 + + bne 3f + addis r6,r6,(1<<30)@h +3: + cmpw r3,r9 + blt 2b + + .globl a2_tlbinit_after_iprot_flush +a2_tlbinit_after_iprot_flush: + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP + /* Now establish early debug mappings if applicable */ + /* Restore the MAS0 we used for linear mapping load */ + mtspr SPRN_MAS0,r11 + + lis r3,(MAS1_VALID | MAS1_IPROT)@h + ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) + mtspr SPRN_MAS1,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) + mtspr SPRN_MAS2,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) + mtspr SPRN_MAS7_MAS3,r3 + /* re-use the MAS8 value from the linear mapping */ + tlbwe +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ + PPC_TLBILX(0,0,0) sync isync + .globl a2_tlbinit_code_end +a2_tlbinit_code_end: + /* We translate LR and return */ mflr r3 tovirt(r3,r3) @@ -1040,3 +1200,33 @@ _GLOBAL(__setup_base_ivors) sync blr + +_GLOBAL(setup_perfmon_ivor) + SET_IVOR(35, 0x260) /* Performance Monitor */ + blr + +_GLOBAL(setup_doorbell_ivors) + SET_IVOR(36, 0x280) /* Processor Doorbell */ + SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */ + + /* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ + SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ + blr + +_GLOBAL(setup_ehv_ivors) + /* + * We may be running as a guest and lack E.HV even on a chip + * that normally has it. + */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */ + SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */ + blr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index aeb739e18769..a85f4874cba7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -37,23 +37,51 @@ .globl __start_interrupts __start_interrupts: - STD_EXCEPTION_PSERIES(0x100, system_reset) + .globl system_reset_pSeries; +system_reset_pSeries: + HMT_MEDIUM; + DO_KVM 0x100; + SET_SCRATCH0(r13) +#ifdef CONFIG_PPC_P7_NAP +BEGIN_FTR_SECTION + /* Running native on arch 2.06 or later, check if we are + * waking up from nap. We only handle no state loss and + * supervisor state loss. We do -not- handle hypervisor + * state loss at this time. + */ + mfspr r13,SPRN_SRR1 + rlwinm r13,r13,47-31,30,31 + cmpwi cr0,r13,1 + bne 1f + b .power7_wakeup_noloss +1: cmpwi cr0,r13,2 + bne 1f + b .power7_wakeup_loss + /* Total loss of HV state is fatal, we could try to use the + * PIR to locate a PACA, then use an emergency stack etc... + * but for now, let's just stay stuck here + */ +1: cmpwi cr0,r13,3 + beq . +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +#endif /* CONFIG_PPC_P7_NAP */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) . = 0x200 _machine_check_pSeries: HMT_MEDIUM DO_KVM 0x200 - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) . = 0x300 .globl data_access_pSeries data_access_pSeries: HMT_MEDIUM DO_KVM 0x300 - mtspr SPRN_SPRG_SCRATCH0,r13 + SET_SCRATCH0(r13) BEGIN_FTR_SECTION - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) std r9,PACA_EXSLB+EX_R9(r13) std r10,PACA_EXSLB+EX_R10(r13) mfspr r10,SPRN_DAR @@ -67,22 +95,22 @@ BEGIN_FTR_SECTION std r11,PACA_EXGEN+EX_R11(r13) ld r11,PACA_EXSLB+EX_R9(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r12,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r12) std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R9(r13) std r12,PACA_EXGEN+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(data_access_common) + EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD) FTR_SECTION_ELSE - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x380 - mtspr SPRN_SPRG_SCRATCH0,r13 - mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ + SET_SCRATCH0(r13) + GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -95,7 +123,7 @@ data_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -113,15 +141,15 @@ data_access_slb_pSeries: bctr #endif - STD_EXCEPTION_PSERIES(0x400, instruction_access) + STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access) . = 0x480 .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x480 - mtspr SPRN_SPRG_SCRATCH0,r13 - mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ + SET_SCRATCH0(r13) + GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -134,7 +162,7 @@ instruction_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -147,13 +175,29 @@ instruction_access_slb_pSeries: bctr #endif - MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt) - STD_EXCEPTION_PSERIES(0x600, alignment) - STD_EXCEPTION_PSERIES(0x700, program_check) - STD_EXCEPTION_PSERIES(0x800, fp_unavailable) - MASKABLE_EXCEPTION_PSERIES(0x900, decrementer) - STD_EXCEPTION_PSERIES(0xa00, trap_0a) - STD_EXCEPTION_PSERIES(0xb00, trap_0b) + /* We open code these as we can't have a ". = x" (even with + * x = "." within a feature section + */ + . = 0x500; + .globl hardware_interrupt_pSeries; + .globl hardware_interrupt_hv; +hardware_interrupt_pSeries: +hardware_interrupt_hv: + BEGIN_FTR_SECTION + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD) + FTR_SECTION_ELSE + _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV) + ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) + + STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) + STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) + STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) + + MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) + MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer) + + STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) + STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) . = 0xc00 .globl system_call_pSeries @@ -165,13 +209,13 @@ BEGIN_FTR_SECTION beq- 1f END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mr r9,r13 - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) mfspr r11,SPRN_SRR0 - ld r12,PACAKBASE(r13) - ld r10,PACAKMSR(r13) - LOAD_HANDLER(r12, system_call_entry) - mtspr SPRN_SRR0,r12 mfspr r12,SPRN_SRR1 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10, system_call_entry) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 rfid b . /* prevent speculative execution */ @@ -183,8 +227,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) rfid /* return to userspace */ b . - STD_EXCEPTION_PSERIES(0xd00, single_step) - STD_EXCEPTION_PSERIES(0xe00, trap_0e) + STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) + + /* At 0xe??? we have a bunch of hypervisor exceptions, we branch + * out of line to handle them + */ + . = 0xe00 + b h_data_storage_hv + . = 0xe20 + b h_instr_storage_hv + . = 0xe40 + b emulation_assist_hv + . = 0xe50 + b hmi_exception_hv + . = 0xe60 + b hmi_exception_hv /* We need to deal with the Altivec unavailable exception * here which is at 0xf20, thus in the middle of the @@ -193,39 +250,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) */ performance_monitor_pSeries_1: . = 0xf00 - DO_KVM 0xf00 b performance_monitor_pSeries altivec_unavailable_pSeries_1: . = 0xf20 - DO_KVM 0xf20 b altivec_unavailable_pSeries vsx_unavailable_pSeries_1: . = 0xf40 - DO_KVM 0xf40 b vsx_unavailable_pSeries #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error) + STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) + STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance) + STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1700, altivec_assist) + STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal) + STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) #endif /* CONFIG_CBE_RAS */ . = 0x3000 -/*** pSeries interrupt support ***/ +/*** Out of line interrupts support ***/ + + /* moved from 0xe00 */ + STD_EXCEPTION_HV(., 0xe00, h_data_storage) + STD_EXCEPTION_HV(., 0xe20, h_instr_storage) + STD_EXCEPTION_HV(., 0xe40, emulation_assist) + STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */ /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(., performance_monitor) - STD_EXCEPTION_PSERIES(., altivec_unavailable) - STD_EXCEPTION_PSERIES(., vsx_unavailable) + STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -240,17 +300,30 @@ masked_interrupt: rotldi r10,r10,16 mtspr SPRN_SRR1,r10 ld r10,PACA_EXGEN+EX_R10(r13) - mfspr r13,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r13) rfid b . +masked_Hinterrupt: + stb r10,PACAHARDIRQEN(r13) + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + mfspr r10,SPRN_HSRR1 + rldicl r10,r10,48,1 /* clear MSR_EE */ + rotldi r10,r10,16 + mtspr SPRN_HSRR1,r10 + ld r10,PACA_EXGEN+EX_R10(r13) + GET_SCRATCH0(r13) + hrfid + b . + .align 7 do_stab_bolted_pSeries: std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted) + EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) #ifdef CONFIG_PPC_PSERIES /* @@ -260,15 +333,15 @@ do_stab_bolted_pSeries: .align 7 system_reset_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) .globl machine_check_fwnmi .align 7 machine_check_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) #endif /* CONFIG_PPC_PSERIES */ @@ -282,7 +355,7 @@ slb_miss_user_pseries: std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R11(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r10,SPRG_SCRATCH0 + GET_SCRATCH0(r10) ld r11,PACA_EXSLB+EX_R9(r13) ld r12,PACA_EXSLB+EX_R3(r13) std r10,PACA_EXGEN+EX_R13(r13) @@ -342,6 +415,8 @@ machine_check_common: STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) + STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) + STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) #ifdef CONFIG_ALTIVEC @@ -386,9 +461,24 @@ bad_stack: std r12,_XER(r1) SAVE_GPR(0,r1) SAVE_GPR(2,r1) - SAVE_4GPRS(3,r1) - SAVE_2GPRS(7,r1) - SAVE_10GPRS(12,r1) + ld r10,EX_R3(r3) + std r10,GPR3(r1) + SAVE_GPR(4,r1) + SAVE_4GPRS(5,r1) + ld r9,EX_R9(r3) + ld r10,EX_R10(r3) + SAVE_2GPRS(9,r1) + ld r9,EX_R11(r3) + ld r10,EX_R12(r3) + ld r11,EX_R13(r3) + std r9,GPR11(r1) + std r10,GPR12(r1) + std r11,GPR13(r1) +BEGIN_FTR_SECTION + ld r10,EX_CFAR(r3) + std r10,ORIG_GPR3(r1) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + SAVE_8GPRS(14,r1) SAVE_10GPRS(22,r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) @@ -397,6 +487,9 @@ bad_stack: li r12,0 std r12,0(r11) ld r2,PACATOC(r13) + ld r11,exception_marker@toc(r2) + std r12,RESULT(r1) + std r11,STACK_FRAME_OVERHEAD-16(r1) 1: addi r3,r1,STACK_FRAME_OVERHEAD bl .kernel_bad_stack b 1b @@ -419,6 +512,19 @@ data_access_common: li r5,0x300 b .do_hash_page /* Try to handle as hpte fault */ + .align 7 + .globl h_data_storage_common +h_data_storage_common: + mfspr r10,SPRN_HDAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_HDSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b .ret_from_except + .align 7 .globl instruction_access_common instruction_access_common: @@ -428,6 +534,8 @@ instruction_access_common: li r5,0x400 b .do_hash_page /* Try to handle as hpte fault */ + STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception) + /* * Here is the common SLB miss user that is used when going to virtual * mode for SLB misses, that is currently not used @@ -750,7 +858,7 @@ _STATIC(do_hash_page) BEGIN_FTR_SECTION andis. r0,r4,0x0020 /* Is it a segment table fault? */ bne- do_ste_alloc /* If so handle it */ -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) clrrdi r11,r1,THREAD_SHIFT lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index c5c24beb8387..ba250d505e07 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -805,19 +805,6 @@ _ENTRY(copy_and_flush) blr #ifdef CONFIG_SMP -#ifdef CONFIG_GEMINI - .globl __secondary_start_gemini -__secondary_start_gemini: - mfspr r4,SPRN_HID0 - ori r4,r4,HID0_ICFI - li r3,0 - ori r3,r3,HID0_ICE - andc r4,r4,r3 - mtspr SPRN_HID0,r4 - sync - b __secondary_start -#endif /* CONFIG_GEMINI */ - .globl __secondary_start_mpc86xx __secondary_start_mpc86xx: mfspr r3, SPRN_PIR @@ -890,15 +877,6 @@ __secondary_start: mtspr SPRN_SRR1,r4 SYNC RFI - -_GLOBAL(start_secondary_resume) - /* Reset stack */ - rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - li r3,0 - std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary - b . #endif /* CONFIG_SMP */ #ifdef CONFIG_KVM_BOOK3S_HANDLER diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3a319f9c9d3e..ba504099844a 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -147,6 +147,8 @@ __secondary_hold: mtctr r4 mr r3,r24 li r4,0 + /* Make sure that patched code is visible */ + isync bctr #else BUG_OPCODE @@ -216,19 +218,25 @@ generic_secondary_common_init: */ LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ ld r13,0(r13) /* Get base vaddr of paca array */ +#ifndef CONFIG_SMP + addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ + b .kexec_wait /* wait for next kernel if !SMP */ +#else + LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ + lwz r7,0(r7) /* also the max paca allocated */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ beq 2f addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ addi r5,r5,1 - cmpwi r5,NR_CPUS + cmpw r5,r7 /* Check if more pacas exist */ blt 1b mr r3,r24 /* not found, copy phys to r3 */ b .kexec_wait /* next kernel might do better */ -2: mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG */ +2: SET_PACA(r13) #ifdef CONFIG_PPC_BOOK3E addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */ mtspr SPRN_SPRG_TLB_EXFRAME,r12 @@ -236,34 +244,39 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 -3: HMT_LOW - lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ - /* start. */ - -#ifndef CONFIG_SMP - b 3b /* Never go on non-SMP */ -#else - cmpwi 0,r23,0 - beq 3b /* Loop until told to go */ - - sync /* order paca.run and cur_cpu_spec */ /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) ld r23,CPU_SPEC_RESTORE(r23) cmpdi 0,r23,0 - beq 4f + beq 3f ld r23,0(r23) mtctr r23 bctrl -4: /* Create a temp kernel stack for use before relocation is on. */ +3: LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */ + lwarx r4,0,r3 + subi r4,r4,1 + stwcx. r4,0,r3 + bne 3b + isync + +4: HMT_LOW + lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ + /* start. */ + cmpwi 0,r23,0 + beq 4b /* Loop until told to go */ + + sync /* order paca.run and cur_cpu_spec */ + isync /* In case code patching happened */ + + /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) subi r1,r1,STACK_FRAME_OVERHEAD b __secondary_start -#endif +#endif /* SMP */ /* * Turn the MMU off. @@ -534,7 +547,7 @@ _GLOBAL(pmac_secondary_start) ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ - mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ + SET_PACA(r13) /* Save vaddr of paca in an SPRG*/ /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) @@ -645,7 +658,7 @@ _GLOBAL(enable_64b_mode) oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 #else /* CONFIG_PPC_BOOK3E */ - li r12,(MSR_SF | MSR_ISF)@highest + li r12,(MSR_64BIT | MSR_ISF)@highest sldi r12,r12,48 or r11,r11,r12 mtmsrd r11 diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S new file mode 100644 index 000000000000..f8f0bc7f1d4f --- /dev/null +++ b/arch/powerpc/kernel/idle_power7.S @@ -0,0 +1,97 @@ +/* + * This file contains the power_save function for 970-family CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ppc-opcode.h> + +#undef DEBUG + + .text + +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + + /* NAP is a state loss, we create a regs frame on the + * stack, fill it up with the state we care about and + * stick a pointer to it in PACAR1. We really only + * need to save PC, some CR bits and the NV GPRs, + * but for now an interrupt frame will do. + */ + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) + std r0,_LINK(r1) + std r0,_NIP(r1) + +#ifndef CONFIG_SMP + /* Make sure FPU, VSX etc... are flushed as we may lose + * state when going to nap mode + */ + bl .discard_lazy_cpu_state +#endif /* CONFIG_SMP */ + + /* Hard disable interrupts */ + mfmsr r9 + rldicl r9,r9,48,1 + rotldi r9,r9,16 + mtmsrd r9,1 /* hard-disable interrupts */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + stb r0,PACAHARDIRQEN(r13) + + /* Continue saving state */ + SAVE_GPR(2, r1) + SAVE_NVGPRS(r1) + mfcr r3 + std r3,_CCR(r1) + std r9,_MSR(r1) + std r1,PACAR1(r13) + + /* Magic NAP mode enter sequence */ + std r0,0(r1) + ptesync + ld r0,0(r1) +1: cmp cr0,r0,r0 + bne 1b + PPC_NAP + b . + +_GLOBAL(power7_wakeup_loss) + GET_PACA(r13) + ld r1,PACAR1(r13) + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r3,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r3 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + +_GLOBAL(power7_wakeup_noloss) + GET_PACA(r13) + ld r1,PACAR1(r13) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 5c1118e31940..ffafaea3d261 100644 --- a/arch/powerpc/platforms/cell/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -17,8 +17,7 @@ #include <asm/machdep.h> #include <asm/pgtable.h> #include <asm/ppc-pci.h> - -#include "io-workarounds.h" +#include <asm/io-workarounds.h> #define IOWA_MAX_BUS 8 @@ -145,7 +144,19 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, return res; } -/* Regist new bus to support workaround */ +/* Enable IO workaround */ +static void __devinit io_workaround_init(void) +{ + static int io_workaround_inited; + + if (io_workaround_inited) + return; + ppc_pci_io = iowa_pci_io; + ppc_md.ioremap = iowa_ioremap; + io_workaround_inited = 1; +} + +/* Register new bus to support workaround */ void __devinit iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops, int (*initfunc)(struct iowa_bus *, void *), void *data) @@ -153,6 +164,8 @@ void __devinit iowa_register_bus(struct pci_controller *phb, struct iowa_bus *bus; struct device_node *np = phb->dn; + io_workaround_init(); + if (iowa_bus_count >= IOWA_MAX_BUS) { pr_err("IOWA:Too many pci bridges, " "workarounds disabled for %s\n", np->full_name); @@ -162,6 +175,7 @@ void __devinit iowa_register_bus(struct pci_controller *phb, bus = &iowa_busses[iowa_bus_count]; bus->phb = phb; bus->ops = ops; + bus->private = data; if (initfunc) if ((*initfunc)(bus, data)) @@ -172,14 +186,3 @@ void __devinit iowa_register_bus(struct pci_controller *phb, pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); } -/* enable IO workaround */ -void __devinit io_workaround_init(void) -{ - static int io_workaround_inited; - - if (io_workaround_inited) - return; - ppc_pci_io = iowa_pci_io; - ppc_md.ioremap = iowa_ioremap; - io_workaround_inited = 1; -} diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f621b7d2d869..a24d37d4cf51 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -66,7 +66,6 @@ #include <asm/ptrace.h> #include <asm/machdep.h> #include <asm/udbg.h> -#include <asm/dbell.h> #include <asm/smp.h> #ifdef CONFIG_PPC64 @@ -160,7 +159,8 @@ notrace void arch_local_irq_restore(unsigned long en) #if defined(CONFIG_BOOKE) && defined(CONFIG_SMP) /* Check for pending doorbell interrupts and resend to ourself */ - doorbell_check_self(); + if (cpu_has_feature(CPU_FTR_DBELL)) + smp_muxed_ipi_resend(); #endif /* @@ -397,24 +397,28 @@ struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; void exc_lvl_ctx_init(void) { struct thread_info *tp; - int i, hw_cpu; + int i, cpu_nr; for_each_possible_cpu(i) { - hw_cpu = get_hard_smp_processor_id(i); - memset((void *)critirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = critirq_ctx[hw_cpu]; - tp->cpu = i; +#ifdef CONFIG_PPC64 + cpu_nr = i; +#else + cpu_nr = get_hard_smp_processor_id(i); +#endif + memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = critirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = 0; #ifdef CONFIG_BOOKE - memset((void *)dbgirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = dbgirq_ctx[hw_cpu]; - tp->cpu = i; + memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = dbgirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = 0; - memset((void *)mcheckirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = mcheckirq_ctx[hw_cpu]; - tp->cpu = i; + memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = mcheckirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = HARDIRQ_OFFSET; #endif } @@ -477,20 +481,41 @@ void do_softirq(void) * IRQ controller and virtual interrupts */ +/* The main irq map itself is an array of NR_IRQ entries containing the + * associate host and irq number. An entry with a host of NULL is free. + * An entry can be allocated if it's free, the allocator always then sets + * hwirq first to the host's invalid irq number and then fills ops. + */ +struct irq_map_entry { + irq_hw_number_t hwirq; + struct irq_host *host; +}; + static LIST_HEAD(irq_hosts); static DEFINE_RAW_SPINLOCK(irq_big_lock); -static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); -struct irq_map_entry irq_map[NR_IRQS]; +static struct irq_map_entry irq_map[NR_IRQS]; static unsigned int irq_virq_count = NR_IRQS; static struct irq_host *irq_default_host; +irq_hw_number_t irqd_to_hwirq(struct irq_data *d) +{ + return irq_map[d->irq].hwirq; +} +EXPORT_SYMBOL_GPL(irqd_to_hwirq); + irq_hw_number_t virq_to_hw(unsigned int virq) { return irq_map[virq].hwirq; } EXPORT_SYMBOL_GPL(virq_to_hw); +bool virq_is_host(unsigned int virq, struct irq_host *host) +{ + return irq_map[virq].host == host; +} +EXPORT_SYMBOL_GPL(virq_is_host); + static int default_irq_host_match(struct irq_host *h, struct device_node *np) { return h->of_node != NULL && h->of_node == np; @@ -511,7 +536,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, /* Allocate structure and revmap table if using linear mapping */ if (revmap_type == IRQ_HOST_MAP_LINEAR) size += revmap_arg * sizeof(unsigned int); - host = zalloc_maybe_bootmem(size, GFP_KERNEL); + host = kzalloc(size, GFP_KERNEL); if (host == NULL) return NULL; @@ -561,14 +586,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, irq_map[i].host = host; smp_wmb(); - /* Clear norequest flags */ - irq_clear_status_flags(i, IRQ_NOREQUEST); - /* Legacy flags are left to default at this point, * one can then use irq_create_mapping() to * explicitly change them */ ops->map(host, i, i); + + /* Clear norequest flags */ + irq_clear_status_flags(i, IRQ_NOREQUEST); } break; case IRQ_HOST_MAP_LINEAR: @@ -579,6 +604,9 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, smp_wmb(); host->revmap_data.linear.revmap = rmap; break; + case IRQ_HOST_MAP_TREE: + INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL); + break; default: break; } @@ -636,8 +664,6 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, goto error; } - irq_clear_status_flags(virq, IRQ_NOREQUEST); - /* map it */ smp_wmb(); irq_map[virq].hwirq = hwirq; @@ -648,6 +674,8 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, goto errdesc; } + irq_clear_status_flags(virq, IRQ_NOREQUEST); + return 0; errdesc: @@ -704,8 +732,6 @@ unsigned int irq_create_mapping(struct irq_host *host, */ virq = irq_find_mapping(host, hwirq); if (virq != NO_IRQ) { - if (host->ops->remap) - host->ops->remap(host, virq, hwirq); pr_debug("irq: -> existing mapping on virq %d\n", virq); return virq; } @@ -786,14 +812,15 @@ void irq_dispose_mapping(unsigned int virq) return; host = irq_map[virq].host; - WARN_ON (host == NULL); - if (host == NULL) + if (WARN_ON(host == NULL)) return; /* Never unmap legacy interrupts */ if (host->revmap_type == IRQ_HOST_MAP_LEGACY) return; + irq_set_status_flags(virq, IRQ_NOREQUEST); + /* remove chip and handler */ irq_set_chip_and_handler(virq, NULL, NULL); @@ -813,13 +840,6 @@ void irq_dispose_mapping(unsigned int virq) host->revmap_data.linear.revmap[hwirq] = NO_IRQ; break; case IRQ_HOST_MAP_TREE: - /* - * Check if radix tree allocated yet, if not then nothing to - * remove. - */ - smp_rmb(); - if (revmap_trees_allocated < 1) - break; mutex_lock(&revmap_trees_mutex); radix_tree_delete(&host->revmap_data.tree, hwirq); mutex_unlock(&revmap_trees_mutex); @@ -830,8 +850,6 @@ void irq_dispose_mapping(unsigned int virq) smp_mb(); irq_map[virq].hwirq = host->inval_irq; - irq_set_status_flags(virq, IRQ_NOREQUEST); - irq_free_descs(virq, 1); /* Free it */ irq_free_virt(virq, 1); @@ -877,16 +895,9 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, struct irq_map_entry *ptr; unsigned int virq; - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - - /* - * Check if the radix tree exists and has bee initialized. - * If not, we fallback to slow mode - */ - if (revmap_trees_allocated < 2) + if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE)) return irq_find_mapping(host, hwirq); - /* Now try to resolve */ /* * No rcu_read_lock(ing) needed, the ptr returned can't go under us * as it's referencing an entry in the static irq_map table. @@ -909,16 +920,7 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { - - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - - /* - * Check if the radix tree exists yet. - * If not, then the irq will be inserted into the tree when it gets - * initialized. - */ - smp_rmb(); - if (revmap_trees_allocated < 1) + if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE)) return; if (virq != NO_IRQ) { @@ -934,7 +936,8 @@ unsigned int irq_linear_revmap(struct irq_host *host, { unsigned int *revmap; - WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR); + if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR)) + return irq_find_mapping(host, hwirq); /* Check revmap bounds */ if (unlikely(hwirq >= host->revmap_data.linear.size)) @@ -1028,53 +1031,6 @@ int arch_early_irq_init(void) return 0; } -/* We need to create the radix trees late */ -static int irq_late_init(void) -{ - struct irq_host *h; - unsigned int i; - - /* - * No mutual exclusion with respect to accessors of the tree is needed - * here as the synchronization is done via the state variable - * revmap_trees_allocated. - */ - list_for_each_entry(h, &irq_hosts, link) { - if (h->revmap_type == IRQ_HOST_MAP_TREE) - INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL); - } - - /* - * Make sure the radix trees inits are visible before setting - * the flag - */ - smp_wmb(); - revmap_trees_allocated = 1; - - /* - * Insert the reverse mapping for those interrupts already present - * in irq_map[]. - */ - mutex_lock(&revmap_trees_mutex); - for (i = 0; i < irq_virq_count; i++) { - if (irq_map[i].host && - (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE)) - radix_tree_insert(&irq_map[i].host->revmap_data.tree, - irq_map[i].hwirq, &irq_map[i]); - } - mutex_unlock(&revmap_trees_mutex); - - /* - * Make sure the radix trees insertions are visible before setting - * the flag - */ - smp_wmb(); - revmap_trees_allocated = 2; - - return 0; -} -arch_initcall(irq_late_init); - #ifdef CONFIG_VIRQ_DEBUG static int virq_debug_show(struct seq_file *m, void *private) { @@ -1082,10 +1038,11 @@ static int virq_debug_show(struct seq_file *m, void *private) struct irq_desc *desc; const char *p; static const char none[] = "none"; + void *data; int i; - seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq", - "chip name", "host name"); + seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", + "chip name", "chip data", "host name"); for (i = 1; i < nr_irqs; i++) { desc = irq_to_desc(i); @@ -1098,7 +1055,7 @@ static int virq_debug_show(struct seq_file *m, void *private) struct irq_chip *chip; seq_printf(m, "%5d ", i); - seq_printf(m, "0x%05lx ", virq_to_hw(i)); + seq_printf(m, "0x%05lx ", irq_map[i].hwirq); chip = irq_desc_get_chip(desc); if (chip && chip->name) @@ -1107,6 +1064,9 @@ static int virq_debug_show(struct seq_file *m, void *private) p = none; seq_printf(m, "%-15s ", p); + data = irq_desc_get_chip_data(desc); + seq_printf(m, "0x%16p ", data); + if (irq_map[i].host && irq_map[i].host->of_node) p = irq_map[i].host->of_node->full_name; else diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 42850ee00ada..bd9d35f59cf4 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -109,7 +109,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) #ifdef CONFIG_SMP void kgdb_roundup_cpus(unsigned long flags) { - smp_send_debugger_break(MSG_ALL_BUT_SELF); + smp_send_debugger_break(); } #endif diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 301db65f05a1..84daabe2fcba 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -132,34 +132,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v) /* * Methods used to fetch LPAR data when running on a pSeries platform. */ -/** - * h_get_mpp - * H_GET_MPP hcall returns info in 7 parms - */ -int h_get_mpp(struct hvcall_mpp_data *mpp_data) -{ - int rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - - rc = plpar_hcall9(H_GET_MPP, retbuf); - - mpp_data->entitled_mem = retbuf[0]; - mpp_data->mapped_mem = retbuf[1]; - - mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; - mpp_data->pool_num = retbuf[2] & 0xffff; - - mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; - mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; - mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; - - mpp_data->pool_size = retbuf[4]; - mpp_data->loan_request = retbuf[5]; - mpp_data->backing_mem = retbuf[6]; - - return rc; -} -EXPORT_SYMBOL(h_get_mpp); struct hvcall_ppp_data { u64 entitlement; @@ -345,6 +317,30 @@ static void parse_mpp_data(struct seq_file *m) seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); } +/** + * parse_mpp_x_data + * Parse out data returned from h_get_mpp_x + */ +static void parse_mpp_x_data(struct seq_file *m) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (!firmware_has_feature(FW_FEATURE_XCMO)) + return; + if (h_get_mpp_x(&mpp_x_data)) + return; + + seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes); + + if (mpp_x_data.pool_coalesced_bytes) + seq_printf(m, "pool_coalesced_bytes=%ld\n", + mpp_x_data.pool_coalesced_bytes); + if (mpp_x_data.pool_purr_cycles) + seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles); + if (mpp_x_data.pool_spurr_cycles) + seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); +} + #define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) @@ -520,6 +516,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) parse_system_parameter_string(m); parse_ppp_data(m); parse_mpp_data(m); + parse_mpp_x_data(m); pseries_cmo_data(m); splpar_dispatch_data(m); diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 094bd9821ad4..998a10028608 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -694,6 +694,17 @@ _GLOBAL(kernel_thread) addi r1,r1,16 blr +#ifdef CONFIG_SMP +_GLOBAL(start_secondary_resume) + /* Reset stack */ + rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r3,0 + stw r3,0(r1) /* Zero the stack frame pointer */ + bl start_secondary + b . +#endif /* CONFIG_SMP */ + /* * This routine is just here to keep GCC happy - sigh... */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 206a321a71d3..e89df59cdc5a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -462,7 +462,8 @@ _GLOBAL(disable_kernel_fp) * wait for the flag to change, indicating this kernel is going away but * the slave code for the next one is at addresses 0 to 100. * - * This is used by all slaves. + * This is used by all slaves, even those that did not find a matching + * paca in the secondary startup code. * * Physical (hardware) cpu id should be in r3. */ @@ -471,10 +472,6 @@ _GLOBAL(kexec_wait) 1: mflr r5 addi r5,r5,kexec_flag-1b - li r4,KEXEC_STATE_REAL_MODE - stb r4,PACAKEXECSTATE(r13) - SYNC - 99: HMT_LOW #ifdef CONFIG_KEXEC /* use no memory without kexec */ lwz r4,0(r5) @@ -499,11 +496,17 @@ kexec_flag: * * get phys id from paca * switch to real mode + * mark the paca as no longer used * join other cpus in kexec_wait(phys_id) */ _GLOBAL(kexec_smp_wait) lhz r3,PACAHWCPUID(r13) bl real_mode + + li r4,KEXEC_STATE_REAL_MODE + stb r4,PACAKEXECSTATE(r13) + SYNC + b .kexec_wait /* diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 10f0aadee95b..efeb88184182 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/threads.h> +#include <linux/smp.h> #include <linux/module.h> #include <linux/memblock.h> @@ -156,18 +156,29 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) /* Put the paca pointer into r13 and SPRG_PACA */ void setup_paca(struct paca_struct *new_paca) { + /* Setup r13 */ local_paca = new_paca; - mtspr(SPRN_SPRG_PACA, local_paca); + #ifdef CONFIG_PPC_BOOK3E + /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#else + /* In HV mode, we setup both HPACA and PACA to avoid problems + * if we do a GET_PACA() before the feature fixups have been + * applied + */ + if (cpu_has_feature(CPU_FTR_HVMODE_206)) + mtspr(SPRN_SPRG_HPACA, local_paca); #endif + mtspr(SPRN_SPRG_PACA, local_paca); + } static int __initdata paca_size; void __init allocate_pacas(void) { - int nr_cpus, cpu, limit; + int cpu, limit; /* * We can't take SLB misses on the paca, and we want to access them @@ -179,23 +190,18 @@ void __init allocate_pacas(void) if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); - nr_cpus = NR_CPUS; - /* On iSeries we know we can never have more than 64 cpus */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - nr_cpus = min(64, nr_cpus); - - paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); memset(paca, 0, paca_size); printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", - paca_size, nr_cpus, paca); + paca_size, nr_cpu_ids, paca); - allocate_lppacas(nr_cpus, limit); + allocate_lppacas(nr_cpu_ids, limit); /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_cpu_ids; cpu++) initialise_paca(&paca[cpu], cpu); } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d225d99fe39d..6baabc13306a 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -43,10 +43,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data) const u32 *regs; struct pci_dn *pdn; - pdn = alloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) return NULL; - memset(pdn, 0, sizeof(*pdn)); dn->data = pdn; pdn->node = dn; pdn->phb = phb; diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ef3ef566235e..7d28f540200c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -54,7 +54,6 @@ extern void single_step_exception(struct pt_regs *regs); extern int sys_sigreturn(struct pt_regs *regs); EXPORT_SYMBOL(clear_pages); -EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); @@ -88,9 +87,7 @@ EXPORT_SYMBOL(__copy_tofrom_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(copy_4K_page); -#endif +EXPORT_SYMBOL(copy_page); #if defined(CONFIG_PCI) && defined(CONFIG_PPC32) EXPORT_SYMBOL(isa_io_base); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f74f355a9617..095043d79946 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -702,6 +702,8 @@ void prepare_to_copy(struct task_struct *tsk) /* * Copy a thread.. */ +extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */ + int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) @@ -755,11 +757,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, _ALIGN_UP(sizeof(struct thread_info), 16); #ifdef CONFIG_PPC_STD_MMU_64 - if (cpu_has_feature(CPU_FTR_SLB)) { + if (mmu_has_feature(MMU_FTR_SLB)) { unsigned long sp_vsid; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) << SLB_VSID_SHIFT_1T; else @@ -769,6 +771,20 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ksp_vsid = sp_vsid; } #endif /* CONFIG_PPC_STD_MMU_64 */ +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_DSCR)) { + if (current->thread.dscr_inherit) { + p->thread.dscr_inherit = 1; + p->thread.dscr = current->thread.dscr; + } else if (0 != dscr_default) { + p->thread.dscr_inherit = 1; + p->thread.dscr = dscr_default; + } else { + p->thread.dscr_inherit = 0; + p->thread.dscr = 0; + } + } +#endif /* * The PPC64 ABI makes use of a TOC to contain function diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index e74fa12afc82..48aeb55faae9 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -68,6 +68,7 @@ int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; #endif +static phys_addr_t first_memblock_size; static int __init early_parse_mem(char *p) { @@ -123,18 +124,19 @@ static void __init move_device_tree(void) */ static struct ibm_pa_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ + unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {CPU_FTR_SLB, 0, 0, 2, 0}, - {CPU_FTR_CTRL, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 1, 1, 1}, - {CPU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, + {0, MMU_FTR_SLB, 0, 0, 2, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, }; @@ -166,9 +168,11 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs, if (bit ^ fp->invert) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features |= fp->mmu_features; } else { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } } @@ -268,13 +272,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - static int logical_cpuid = 0; char *type = of_get_flat_dt_prop(node, "device_type", NULL); const u32 *prop; const u32 *intserv; int i, nthreads; unsigned long len; - int found = 0; + int found = -1; + int found_thread = 0; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) @@ -298,11 +302,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * version 2 of the kexec param format adds the phys cpuid of * booted proc. */ - if (initial_boot_params && initial_boot_params->version >= 2) { - if (intserv[i] == - initial_boot_params->boot_cpuid_phys) { - found = 1; - break; + if (initial_boot_params->version >= 2) { + if (intserv[i] == initial_boot_params->boot_cpuid_phys) { + found = boot_cpu_count; + found_thread = i; } } else { /* @@ -311,23 +314,20 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * off secondary threads. */ if (of_get_flat_dt_prop(node, - "linux,boot-cpu", NULL) != NULL) { - found = 1; - break; - } + "linux,boot-cpu", NULL) != NULL) + found = boot_cpu_count; } - #ifdef CONFIG_SMP /* logical cpu id is always 0 on UP kernels */ - logical_cpuid++; + boot_cpu_count++; #endif } - if (found) { - DBG("boot cpu: logical %d physical %d\n", logical_cpuid, - intserv[i]); - boot_cpuid = logical_cpuid; - set_hard_smp_processor_id(boot_cpuid, intserv[i]); + if (found >= 0) { + DBG("boot cpu: logical %d physical %d\n", found, + intserv[found_thread]); + boot_cpuid = found; + set_hard_smp_processor_id(found, intserv[found_thread]); /* * PAPR defines "logical" PVR values for cpus that @@ -509,11 +509,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) size = 0x80000000ul - base; } #endif - - /* First MEMBLOCK added, do some special initializations */ - if (memstart_addr == ~(phys_addr_t)0) - setup_initial_memory_limit(base, size); - memstart_addr = min((u64)memstart_addr, base); + /* Keep track of the beginning of memory -and- the size of + * the very first block in the device-tree as it represents + * the RMA on ppc64 server + */ + if (base < memstart_addr) { + memstart_addr = base; + first_memblock_size = size; + } /* Add the chunk to the MEMBLOCK list */ memblock_add(base, size); @@ -698,6 +701,7 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 941ff4dbc567..c016033ba78d 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -335,6 +335,7 @@ static void __init prom_printf(const char *format, ...) const char *p, *q, *s; va_list args; unsigned long v; + long vs; struct prom_t *_prom = &RELOC(prom); va_start(args, format); @@ -368,12 +369,35 @@ static void __init prom_printf(const char *format, ...) v = va_arg(args, unsigned long); prom_print_hex(v); break; + case 'd': + ++q; + vs = va_arg(args, int); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); + break; case 'l': ++q; - if (*q == 'u') { /* '%lu' */ + if (*q == 0) + break; + else if (*q == 'x') { + ++q; + v = va_arg(args, unsigned long); + prom_print_hex(v); + } else if (*q == 'u') { /* '%lu' */ ++q; v = va_arg(args, unsigned long); prom_print_dec(v); + } else if (*q == 'd') { /* %ld */ + ++q; + vs = va_arg(args, long); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); } break; } @@ -676,8 +700,10 @@ static void __init early_cmdline_parse(void) #endif /* CONFIG_PCI_MSI */ #ifdef CONFIG_PPC_SMLPAR #define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */ +#define OV5_XCMO 0x40 /* Page Coalescing */ #else #define OV5_CMO 0x00 +#define OV5_XCMO 0x00 #endif #define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */ @@ -732,7 +758,7 @@ static unsigned char ibm_architecture_vec[] = { OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, - OV5_CMO, + OV5_CMO | OV5_XCMO, OV5_TYPE1_AFFINITY, 0, 0, diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 2097f2b3cba8..271ff6318eda 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -42,6 +42,7 @@ #include <asm/time.h> #include <asm/mmu.h> #include <asm/topology.h> +#include <asm/pSeries_reconfig.h> struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED @@ -494,7 +495,7 @@ unsigned int rtas_busy_delay(int status) might_sleep(); ms = rtas_busy_delay_time(status); - if (ms) + if (ms && need_resched()) msleep(ms); return ms; @@ -731,6 +732,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w atomic_set(&data->error, rc); start_topology_update(); + pSeries_coalesce_init(); if (wake_when_done) { atomic_set(&data->done, 1); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 21f30cb68077..79fca2651b65 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -381,7 +381,7 @@ static void __init cpu_init_thread_core_maps(int tpc) int i; threads_per_core = tpc; - threads_core_mask = CPU_MASK_NONE; + cpumask_clear(&threads_core_mask); /* This implementation only supports power of 2 number of threads * for simplicity and performance @@ -390,7 +390,7 @@ static void __init cpu_init_thread_core_maps(int tpc) BUG_ON(tpc != (1 << threads_shift)); for (i = 0; i < tpc; i++) - cpu_set(i, threads_core_mask); + cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", tpc, tpc > 1 ? "s" : ""); @@ -404,7 +404,7 @@ static void __init cpu_init_thread_core_maps(int tpc) * cpu_present_mask * * Having the possible map set up early allows us to restrict allocations - * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. + * of things like irqstacks to nr_cpu_ids rather than NR_CPUS. * * We do not initialize the online map here; cpus set their own bits in * cpu_online_mask as they come up. @@ -424,7 +424,7 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) { const int *intserv; int j, len; @@ -443,7 +443,7 @@ void __init smp_setup_cpu_maps(void) intserv = &cpu; /* assume logical == phys */ } - for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { + for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, intserv[j]); set_cpu_present(cpu, true); @@ -483,12 +483,12 @@ void __init smp_setup_cpu_maps(void) if (cpu_has_feature(CPU_FTR_SMT)) maxcpus *= nthreads; - if (maxcpus > NR_CPUS) { + if (maxcpus > nr_cpu_ids) { printk(KERN_WARNING "Partition configured for %d cpus, " "operating system maximum is %d.\n", - maxcpus, NR_CPUS); - maxcpus = NR_CPUS; + maxcpus, nr_cpu_ids); + maxcpus = nr_cpu_ids; } else printk(KERN_INFO "Partition configured for %d cpus.\n", maxcpus); @@ -510,7 +510,7 @@ void __init smp_setup_cpu_maps(void) cpu_init_thread_core_maps(nthreads); /* Now that possible cpus are set, set nr_cpu_ids for later use */ - nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; + setup_nr_cpu_ids(); free_unused_pacas(); } @@ -602,6 +602,10 @@ int check_legacy_ioport(unsigned long base_port) * name instead */ if (!np) np = of_find_node_by_name(NULL, "8042"); + if (np) { + of_i8042_kbd_irq = 1; + of_i8042_aux_irq = 12; + } break; case FDC_BASE: /* FDC1 */ np = of_find_node_by_type(NULL, "fdc"); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 1d2fbc905303..620d792b52e4 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -48,6 +48,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +int __initdata boot_cpu_count; int boot_cpuid_phys; int smp_hw_index[NR_CPUS]; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5a0401fcaebd..a88bf2713d41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -62,6 +62,7 @@ #include <asm/udbg.h> #include <asm/kexec.h> #include <asm/mmu_context.h> +#include <asm/code-patching.h> #include "setup.h" @@ -72,6 +73,7 @@ #endif int boot_cpuid = 0; +int __initdata boot_cpu_count; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions @@ -233,6 +235,7 @@ void early_setup_secondary(void) void smp_release_cpus(void) { unsigned long *ptr; + int i; DBG(" -> smp_release_cpus()\n"); @@ -245,7 +248,16 @@ void smp_release_cpus(void) ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop - PHYSICAL_START); *ptr = __pa(generic_secondary_smp_init); - mb(); + + /* And wait a bit for them to catch up */ + for (i = 0; i < 100000; i++) { + mb(); + HMT_low(); + if (boot_cpu_count == 0) + break; + udelay(1); + } + DBG("boot_cpu_count = %d\n", boot_cpu_count); DBG(" <- smp_release_cpus()\n"); } @@ -423,17 +435,30 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } -static u64 slb0_limit(void) +/* This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause a TLB or SLB miss. This is + * used to allocate interrupt or emergency stacks for which our + * exception entry path doesn't deal with being interrupted. + */ +static u64 safe_stack_limit(void) { - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { +#ifdef CONFIG_PPC_BOOK3E + /* Freescale BookE bolts the entire linear mapping */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + return linear_map_top; + /* Other BookE, we assume the first GB is bolted */ + return 1ul << 30; +#else + /* BookS, the first segment is bolted */ + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) return 1UL << SID_SHIFT_1T; - } return 1UL << SID_SHIFT; +#endif } static void __init irqstack_early_init(void) { - u64 limit = slb0_limit(); + u64 limit = safe_stack_limit(); unsigned int i; /* @@ -453,6 +478,9 @@ static void __init irqstack_early_init(void) #ifdef CONFIG_PPC_BOOK3E static void __init exc_lvl_early_init(void) { + extern unsigned int interrupt_base_book3e; + extern unsigned int exc_debug_debug_book3e; + unsigned int i; for_each_possible_cpu(i) { @@ -463,6 +491,10 @@ static void __init exc_lvl_early_init(void) mcheckirq_ctx[i] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } + + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, + (unsigned long)&exc_debug_debug_book3e, 0); } #else #define exc_lvl_early_init() @@ -486,7 +518,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), ppc64_rma_size); + limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 27c4a4584f80..da989fff19cc 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -381,7 +381,7 @@ badframe: regs, uc, &uc->uc_mcontext); #endif if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "rt_sigreturn", (long)uc, regs->nip, regs->link); @@ -469,7 +469,7 @@ badframe: regs, frame, newsp); #endif if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "setup_rt_frame", (long)frame, regs->nip, regs->link); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cbdbb14be4b0..4a6f2ec7e761 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -95,7 +95,7 @@ int smt_enabled_at_boot = 1; static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; #ifdef CONFIG_PPC64 -void __devinit smp_generic_kick_cpu(int nr) +int __devinit smp_generic_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); @@ -106,37 +106,10 @@ void __devinit smp_generic_kick_cpu(int nr) */ paca[nr].cpu_start = 1; smp_mb(); -} -#endif -void smp_message_recv(int msg) -{ - switch(msg) { - case PPC_MSG_CALL_FUNCTION: - generic_smp_call_function_interrupt(); - break; - case PPC_MSG_RESCHEDULE: - /* we notice need_resched on exit */ - break; - case PPC_MSG_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case PPC_MSG_DEBUGGER_BREAK: - if (crash_ipi_function_ptr) { - crash_ipi_function_ptr(get_irq_regs()); - break; - } -#ifdef CONFIG_DEBUGGER - debugger_ipi(get_irq_regs()); - break; -#endif /* CONFIG_DEBUGGER */ - /* FALLTHROUGH */ - default: - printk("SMP %d: smp_message_recv(): unknown msg %d\n", - smp_processor_id(), msg); - break; - } + return 0; } +#endif static irqreturn_t call_function_action(int irq, void *data) { @@ -146,7 +119,7 @@ static irqreturn_t call_function_action(int irq, void *data) static irqreturn_t reschedule_action(int irq, void *data) { - /* we just need the return path side effect of checking need_resched */ + scheduler_ipi(); return IRQ_HANDLED; } @@ -156,9 +129,17 @@ static irqreturn_t call_function_single_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t debug_ipi_action(int irq, void *data) +irqreturn_t debug_ipi_action(int irq, void *data) { - smp_message_recv(PPC_MSG_DEBUGGER_BREAK); + if (crash_ipi_function_ptr) { + crash_ipi_function_ptr(get_irq_regs()); + return IRQ_HANDLED; + } + +#ifdef CONFIG_DEBUGGER + debugger_ipi(get_irq_regs()); +#endif /* CONFIG_DEBUGGER */ + return IRQ_HANDLED; } @@ -197,6 +178,66 @@ int smp_request_message_ipi(int virq, int msg) return err; } +#ifdef CONFIG_PPC_SMP_MUXED_IPI +struct cpu_messages { + int messages; /* current messages */ + unsigned long data; /* data for cause ipi */ +}; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); + +void smp_muxed_ipi_set_data(int cpu, unsigned long data) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + + info->data = data; +} + +void smp_muxed_ipi_message_pass(int cpu, int msg) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + char *message = (char *)&info->messages; + + message[msg] = 1; + mb(); + smp_ops->cause_ipi(cpu, info->data); +} + +void smp_muxed_ipi_resend(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + + if (info->messages) + smp_ops->cause_ipi(smp_processor_id(), info->data); +} + +irqreturn_t smp_ipi_demux(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + unsigned int all; + + mb(); /* order any irq clear */ + + do { + all = xchg_local(&info->messages, 0); + +#ifdef __BIG_ENDIAN + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION))) + generic_smp_call_function_interrupt(); + if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE))) + scheduler_ipi(); + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE))) + generic_smp_call_function_single_interrupt(); + if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK))) + debug_ipi_action(0, NULL); +#else +#error Unsupported ENDIAN +#endif + } while (info->messages); + + return IRQ_HANDLED; +} +#endif /* CONFIG_PPC_SMP_MUXED_IPI */ + void smp_send_reschedule(int cpu) { if (likely(smp_ops)) @@ -216,11 +257,18 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION); } -#ifdef CONFIG_DEBUGGER -void smp_send_debugger_break(int cpu) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +void smp_send_debugger_break(void) { - if (likely(smp_ops)) - smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); + int cpu; + int me = raw_smp_processor_id(); + + if (unlikely(!smp_ops)) + return; + + for_each_online_cpu(cpu) + if (cpu != me) + smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); } #endif @@ -228,9 +276,9 @@ void smp_send_debugger_break(int cpu) void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback && smp_ops) { + if (crash_ipi_callback) { mb(); - smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK); + smp_send_debugger_break(); } } #endif @@ -410,8 +458,6 @@ int __cpuinit __cpu_up(unsigned int cpu) { int rc, c; - secondary_ti = current_set[cpu]; - if (smp_ops == NULL || (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; @@ -421,6 +467,8 @@ int __cpuinit __cpu_up(unsigned int cpu) if (rc) return rc; + secondary_ti = current_set[cpu]; + /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ @@ -434,7 +482,11 @@ int __cpuinit __cpu_up(unsigned int cpu) /* wake up cpus */ DBG("smp: kicking cpu %d\n", cpu); - smp_ops->kick_cpu(cpu); + rc = smp_ops->kick_cpu(cpu); + if (rc) { + pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc); + return rc; + } /* * wait to see if the cpu made a callin (is actually up). @@ -507,7 +559,7 @@ int cpu_first_thread_of_core(int core) } EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); -/* Must be called when no change can occur to cpu_present_map, +/* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ static struct device_node *cpu_to_l2cache(int cpu) @@ -608,7 +660,7 @@ void __init smp_cpus_done(unsigned int max_cpus) * se we pin us down to CPU 0 for a short while */ alloc_cpumask_var(&old_mask, GFP_NOWAIT); - cpumask_copy(old_mask, ¤t->cpus_allowed); + cpumask_copy(old_mask, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c0d8c2006bf4..f0f2199e64e1 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -182,6 +182,41 @@ static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); + +unsigned long dscr_default = 0; +EXPORT_SYMBOL(dscr_default); + +static ssize_t show_dscr_default(struct sysdev_class *class, + struct sysdev_class_attribute *attr, char *buf) +{ + return sprintf(buf, "%lx\n", dscr_default); +} + +static ssize_t __used store_dscr_default(struct sysdev_class *class, + struct sysdev_class_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int ret = 0; + + ret = sscanf(buf, "%lx", &val); + if (ret != 1) + return -EINVAL; + dscr_default = val; + + return count; +} + +static SYSDEV_CLASS_ATTR(dscr_default, 0600, + show_dscr_default, store_dscr_default); + +static void sysfs_create_dscr_default(void) +{ + int err = 0; + if (cpu_has_feature(CPU_FTR_DSCR)) + err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, + &attr_dscr_default.attr); +} #endif /* CONFIG_PPC64 */ #ifdef HAS_PPC_PMC_PA6T @@ -617,6 +652,9 @@ static int __init topology_init(void) if (cpu_online(cpu)) register_cpu_online(cpu); } +#ifdef CONFIG_PPC64 + sysfs_create_dscr_default(); +#endif /* CONFIG_PPC64 */ return 0; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5ddb801bc154..b13306b0d925 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -143,7 +143,6 @@ int die(const char *str, struct pt_regs *regs, long err) #endif printk("%s\n", ppc_md.name ? ppc_md.name : ""); - sysfs_printk_last_file(); if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) return 1; @@ -199,7 +198,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) } else if (show_unhandled_signals && unhandled_signal(current, signr) && printk_ratelimit()) { - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); } @@ -221,7 +220,7 @@ void system_reset_exception(struct pt_regs *regs) } #ifdef CONFIG_KEXEC - cpu_set(smp_processor_id(), cpus_in_sr); + cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); #endif die("System Reset", regs, SIGABRT); @@ -909,6 +908,26 @@ static int emulate_instruction(struct pt_regs *regs) return emulate_isel(regs, instword); } +#ifdef CONFIG_PPC64 + /* Emulate the mfspr rD, DSCR. */ + if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mfdscr, regs); + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_DSCR); + return 0; + } + /* Emulate the mtspr DSCR, rD. */ + if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mtdscr, regs); + rd = (instword >> 21) & 0x1f; + mtspr(SPRN_DSCR, regs->gpr[rd]); + current->thread.dscr_inherit = 1; + return 0; + } +#endif + return -EINVAL; } @@ -1506,6 +1525,10 @@ struct ppc_emulated ppc_emulated = { #ifdef CONFIG_VSX WARN_EMULATED_SETUP(vsx), #endif +#ifdef CONFIG_PPC64 + WARN_EMULATED_SETUP(mfdscr), + WARN_EMULATED_SETUP(mtdscr), +#endif }; u32 ppc_warn_emulated; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index e39cad83c884..23d65abbedce 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,6 +62,8 @@ void __init udbg_early_init(void) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) udbg_init_usbgecko(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) + udbg_init_wsp(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index baa33a7517bc..6837f839ab78 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include <asm/udbg.h> #include <asm/io.h> +#include <asm/reg_a2.h> extern u8 real_readb(volatile u8 __iomem *addr); extern void real_writeb(u8 data, volatile u8 __iomem *addr); @@ -298,3 +299,53 @@ void __init udbg_init_40x_realmode(void) udbg_getc_poll = NULL; } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP +static void udbg_wsp_flush(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +static void udbg_wsp_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_wsp_putc('\r'); + udbg_wsp_flush(); + writeb(c, &udbg_comport->thr); eieio(); + } +} + +static int udbg_wsp_getc(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_DR) == 0) + ; /* wait for char */ + return readb(&udbg_comport->rbr); + } + return -1; +} + +static int udbg_wsp_getc_poll(void) +{ + if (udbg_comport) + if (readb(&udbg_comport->lsr) & LSR_DR) + return readb(&udbg_comport->rbr); + return -1; +} + +void __init udbg_init_wsp(void) +{ + udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT; + + udbg_init_uart(udbg_comport, 57600, 50000000); + + udbg_putc = udbg_wsp_putc; + udbg_flush = udbg_wsp_flush; + udbg_getc = udbg_wsp_getc; + udbg_getc_poll = udbg_wsp_getc_poll; +} +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 9de6f396cf85..4d5a3edff49e 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -102,7 +102,7 @@ _GLOBAL(giveup_altivec) MTMSRD(r5) /* enable use of VMX now */ isync PPC_LCMPI 0,r3,0 - beqlr- /* if no previous owner, done */ + beqlr /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r5,0 diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 74d0e7421143..da3a1225c0ac 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_44x *vcpu_44x; diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 65ea083a5b27..549bb2c9a47a 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } - kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); return emulated; } @@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } - kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); return emulated; } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index c961de40c676..0f95b5cce033 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -236,7 +236,7 @@ void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) { - return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); + return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); } void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 2b9c9088d00e..1a1b34487e71 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -35,9 +35,7 @@ #if defined(CONFIG_PPC_BOOK3S_64) -#define LOAD_SHADOW_VCPU(reg) \ - mfspr reg, SPRN_SPRG_PACA - +#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg) #define SHADOW_VCPU_OFF PACA_KVM_SVCPU #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) #define FUNC(name) GLUE(.,name) @@ -72,7 +70,7 @@ .global kvmppc_trampoline_\intno kvmppc_trampoline_\intno: - mtspr SPRN_SPRG_SCRATCH0, r13 /* Save r13 */ + SET_SCRATCH0(r13) /* Save r13 */ /* * First thing to do is to find out if we're coming @@ -91,7 +89,7 @@ kvmppc_trampoline_\intno: lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) mtcr r12 PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) - mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ + GET_SCRATCH0(r13) /* r13 = original r13 */ b kvmppc_resume_\intno /* Get back original handler */ /* Now we know we're handling a KVM guest */ @@ -114,6 +112,9 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL +#ifdef CONFIG_PPC_BOOK3S_64 +INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL_HV +#endif INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL @@ -158,7 +159,7 @@ kvmppc_handler_skip_ins: lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) mtcr r12 PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) - mfspr r13, SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r13) /* And get back into the code */ RFI diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 7c52ed0b7051..451264274b8c 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -155,14 +155,20 @@ kvmppc_handler_trampoline_exit: PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13) /* Save guest PC and MSR */ - mfsrr0 r3 + andi. r0,r12,0x2 + beq 1f + mfspr r3,SPRN_HSRR0 + mfspr r4,SPRN_HSRR1 + andi. r12,r12,0x3ffd + b 2f +1: mfsrr0 r3 mfsrr1 r4 - +2: PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13) PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13) /* Get scratch'ed off registers */ - mfspr r9, SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r9) PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ef76acb455c3..8462b3a1c1c7 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.shared->srr0 = regs->srr0; vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_pid(vcpu, regs->pid); vcpu->arch.shared->sprg0 = regs->sprg0; vcpu->arch.shared->sprg1 = regs->sprg1; vcpu->arch.shared->sprg2 = regs->sprg2; @@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +static void get_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + u64 tb = get_tb(); + + sregs->u.e.features |= KVM_SREGS_E_BASE; + + sregs->u.e.csrr0 = vcpu->arch.csrr0; + sregs->u.e.csrr1 = vcpu->arch.csrr1; + sregs->u.e.mcsr = vcpu->arch.mcsr; + sregs->u.e.esr = vcpu->arch.esr; + sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.tsr = vcpu->arch.tsr; + sregs->u.e.tcr = vcpu->arch.tcr; + sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); + sregs->u.e.tb = tb; + sregs->u.e.vrsave = vcpu->arch.vrsave; +} + +static int set_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_BASE)) + return 0; + + vcpu->arch.csrr0 = sregs->u.e.csrr0; + vcpu->arch.csrr1 = sregs->u.e.csrr1; + vcpu->arch.mcsr = sregs->u.e.mcsr; + vcpu->arch.esr = sregs->u.e.esr; + vcpu->arch.shared->dar = sregs->u.e.dear; + vcpu->arch.vrsave = sregs->u.e.vrsave; + vcpu->arch.tcr = sregs->u.e.tcr; + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) + vcpu->arch.dec = sregs->u.e.dec; + + kvmppc_emulate_dec(vcpu); + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + /* + * FIXME: existing KVM timer handling is incomplete. + * TSR cannot be read by the guest, and its value in + * vcpu->arch is always zero. For now, just handle + * the case where the caller is trying to inject a + * decrementer interrupt. + */ + + if ((sregs->u.e.tsr & TSR_DIS) && + (vcpu->arch.tcr & TCR_DIE)) + kvmppc_core_queue_dec(vcpu); + } + + return 0; +} + +static void get_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_ARCH206; + + sregs->u.e.pir = 0; + sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; + sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; + sregs->u.e.decar = vcpu->arch.decar; + sregs->u.e.ivpr = vcpu->arch.ivpr; +} + +static int set_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) + return 0; + + if (sregs->u.e.pir != 0) + return -EINVAL; + + vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; + vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1; + vcpu->arch.decar = sregs->u.e.decar; + vcpu->arch.ivpr = sregs->u.e.ivpr; + + return 0; +} + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_IVOR; + + sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; +} + +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2]; + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3]; + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5]; + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8]; + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11]; + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15]; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + sregs->pvr = vcpu->arch.pvr; + + get_sregs_base(vcpu, sregs); + get_sregs_arch206(vcpu, sregs); + kvmppc_core_get_sregs(vcpu, sregs); + return 0; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + int ret; + + if (vcpu->arch.pvr != sregs->pvr) + return -EINVAL; + + ret = set_sregs_base(vcpu, sregs); + if (ret < 0) + return ret; + + ret = set_sregs_arch206(vcpu, sregs); + if (ret < 0) + return ret; + + return kvmppc_core_set_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 1cc471faac2d..b58ccae95904 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -380,7 +380,6 @@ lightweight_exit: * because host interrupt handlers would get confused. */ lwz r1, VCPU_GPR(r1)(r4) - /* XXX handle USPRG0 */ /* Host interrupt handlers may have clobbered these guest-readable * SPRGs, so we need to reload them here with the guest's values. */ lwz r3, VCPU_SPRG4(r4) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index e3768ee9b595..318dbc61ba44 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) /* Registers init */ vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ vcpu->vcpu_id = 0; @@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE | + KVM_SREGS_E_PM; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + sregs->u.e.mas0 = vcpu_e500->mas0; + sregs->u.e.mas1 = vcpu_e500->mas1; + sregs->u.e.mas2 = vcpu_e500->mas2; + sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; + sregs->u.e.mas4 = vcpu_e500->mas4; + sregs->u.e.mas6 = vcpu_e500->mas6; + + sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); + sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; + sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; + + sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; + sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; + sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu_e500->mas0 = sregs->u.e.mas0; + vcpu_e500->mas1 = sregs->u.e.mas1; + vcpu_e500->mas2 = sregs->u.e.mas2; + vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; + vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; + vcpu_e500->mas4 = sregs->u.e.mas4; + vcpu_e500->mas6 = sregs->u.e.mas6; + } + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_SPE) { + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = + sregs->u.e.ivor_high[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = + sregs->u.e.ivor_high[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = + sregs->u.e.ivor_high[2]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 8e3edfbc9634..69cd665a0caf 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, <yu.liu@freescale.com> * @@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) switch (sprn) { case SPRN_PID: - vcpu_e500->pid[0] = vcpu->arch.shadow_pid = - vcpu->arch.pid = spr_val; + kvmppc_set_pid(vcpu, spr_val); break; case SPRN_PID1: vcpu_e500->pid[1] = spr_val; break; @@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; case SPRN_HID1: kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; + case SPRN_SVR: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; case SPRN_MMUCSR0: kvmppc_set_gpr(vcpu, rt, 0); break; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d6d6d47a75a9..b18fe353397d 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com * @@ -24,6 +24,7 @@ #include "../mm/mmu_decl.h" #include "e500_tlb.h" #include "trace.h" +#include "timing.h" #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) @@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) vcpu_e500->mas7 = 0; } + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); return EMULATE_DONE; } @@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) write_host_tlbe(vcpu_e500, stlbsel, sesel); } + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); return EMULATE_DONE; } @@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, return -1; } +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + vcpu_e500->pid[0] = vcpu->arch.shadow_pid = + vcpu->arch.pid = pid; +} + void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) { struct tlbe *tlbe; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c64fd2909bb2..141dce3c6810 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } +u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) +{ + u64 jd = tb - vcpu->arch.dec_jiffies; + return vcpu->arch.dec - jd; +} + /* XXX to do: * lhax * lhaux @@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_DEC: { - u64 jd = get_tb() - vcpu->arch.dec_jiffies; - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); - pr_debug("mfDEC: %x - %llx = %lx\n", - vcpu->arch.dec, jd, - kvmppc_get_gpr(vcpu, rt)); + kvmppc_set_gpr(vcpu, rt, + kvmppc_get_dec(vcpu, get_tb())); break; } default: @@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; } + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); break; case OP_31_XOP_STHX: @@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) printk("mtspr: unknown spr %x\n", sprn); break; } + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); break; case OP_31_XOP_DCBI: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 99758460efde..616dd516ca1f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_SREGS: +#else case KVM_CAP_PPC_SEGSTATE: +#endif case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: @@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; +#ifdef CONFIG_KVM_EXIT_TIMING + mutex_init(&vcpu->arch.exit_timing_lock); +#endif + return 0; } @@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { +#ifdef CONFIG_BOOKE + /* + * vrsave (formerly usprg0) isn't used by Linux, but may + * be used by the guest. + * + * On non-booke this is associated with Altivec and + * is handled by code in book3s.c. + */ + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); +#endif kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvmppc_core_vcpu_put(vcpu); +#ifdef CONFIG_BOOKE + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); +#endif } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index a021f5827a33..319177df9587 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) { int i; - /* pause guest execution to avoid concurrent updates */ - mutex_lock(&vcpu->mutex); + /* Take a lock to avoid concurrent updates */ + mutex_lock(&vcpu->arch.exit_timing_lock); vcpu->arch.last_exit_type = 0xDEAD; for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { @@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) vcpu->arch.timing_exit.tv64 = 0; vcpu->arch.timing_last_enter.tv64 = 0; - mutex_unlock(&vcpu->mutex); + mutex_unlock(&vcpu->arch.exit_timing_lock); } static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) @@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) return; } + mutex_lock(&vcpu->arch.exit_timing_lock); + vcpu->arch.timing_count_type[type]++; /* sum */ @@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) vcpu->arch.timing_min_duration[type] = duration; if (unlikely(duration > vcpu->arch.timing_max_duration[type])) vcpu->arch.timing_max_duration[type] = duration; + + mutex_unlock(&vcpu->arch.exit_timing_lock); } void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) @@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) { struct kvm_vcpu *vcpu = m->private; int i; + u64 min, max, sum, sum_quad; seq_printf(m, "%s", "type count min max sum sum_squared\n"); + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + + min = vcpu->arch.timing_min_duration[i]; + do_div(min, tb_ticks_per_usec); + max = vcpu->arch.timing_max_duration[i]; + do_div(max, tb_ticks_per_usec); + sum = vcpu->arch.timing_sum_duration[i]; + do_div(sum, tb_ticks_per_usec); + sum_quad = vcpu->arch.timing_sum_quad_duration[i]; + do_div(sum_quad, tb_ticks_per_usec); + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", kvm_exit_names[i], vcpu->arch.timing_count_type[i], - vcpu->arch.timing_min_duration[i], - vcpu->arch.timing_max_duration[i], - vcpu->arch.timing_sum_duration[i], - vcpu->arch.timing_sum_quad_duration[i]); + min, + max, + sum, + sum_quad); + } return 0; } diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index f53e09c7dac7..13b676c20d12 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -6,14 +6,6 @@ #include <asm/system.h> -void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask) -{ - if (mem_init_done) - return kmalloc(size, mask); - else - return alloc_bootmem(size); -} - void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 4d4eeb900486..53dcb6b1b708 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -6,6 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <asm/page.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> @@ -15,9 +16,9 @@ PPC64_CACHES: .tc ppc64_caches[TC],ppc64_caches .section ".text" - -_GLOBAL(copy_4K_page) - li r5,4096 /* 4K page size */ +_GLOBAL(copy_page) + lis r5,PAGE_SIZE@h + ori r5,r5,PAGE_SIZE@l BEGIN_FTR_SECTION ld r10,PPC64_CACHES@toc(r2) lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c index deac4d30daf4..e91615abae66 100644 --- a/arch/powerpc/lib/devres.c +++ b/arch/powerpc/lib/devres.c @@ -9,11 +9,11 @@ #include <linux/device.h> /* devres_*(), devm_ioremap_release() */ #include <linux/gfp.h> -#include <linux/io.h> /* ioremap_flags() */ +#include <linux/io.h> /* ioremap_prot() */ #include <linux/module.h> /* EXPORT_SYMBOL() */ /** - * devm_ioremap_prot - Managed ioremap_flags() + * devm_ioremap_prot - Managed ioremap_prot() * @dev: Generic device to remap IO address for * @offset: BUS offset to map * @size: Size of map @@ -31,7 +31,7 @@ void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, if (!ptr) return NULL; - addr = ioremap_flags(offset, size, flags); + addr = ioremap_prot(offset, size, flags); if (addr) { *ptr = addr; devres_add(dev, ptr); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index ae5189ab0049..9a52349874ee 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/ptrace.h> +#include <linux/prefetch.h> #include <asm/sstep.h> #include <asm/processor.h> #include <asm/uaccess.h> @@ -45,6 +46,18 @@ extern int do_stxvd2x(int rn, unsigned long ea); #endif /* + * Emulate the truncation of 64 bit values in 32-bit mode. + */ +static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) +{ +#ifdef __powerpc64__ + if ((msr & MSR_64BIT) == 0) + val &= 0xffffffffUL; +#endif + return val; +} + +/* * Determine whether a conditional branch instruction would branch. */ static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs) @@ -90,11 +103,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs if (instr & 0x04000000) /* update forms */ regs->gpr[ra] = ea; } -#ifdef __powerpc64__ - if (!(regs->msr & MSR_SF)) - ea &= 0xffffffffUL; -#endif - return ea; + + return truncate_if_32bit(regs->msr, ea); } #ifdef __powerpc64__ @@ -113,9 +123,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg if ((instr & 3) == 1) /* update forms */ regs->gpr[ra] = ea; } - if (!(regs->msr & MSR_SF)) - ea &= 0xffffffffUL; - return ea; + + return truncate_if_32bit(regs->msr, ea); } #endif /* __powerpc64 */ @@ -136,11 +145,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs if (do_update) /* update forms */ regs->gpr[ra] = ea; } -#ifdef __powerpc64__ - if (!(regs->msr & MSR_SF)) - ea &= 0xffffffffUL; -#endif - return ea; + + return truncate_if_32bit(regs->msr, ea); } /* @@ -466,7 +472,7 @@ static void __kprobes set_cr0(struct pt_regs *regs, int rd) regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); #ifdef __powerpc64__ - if (!(regs->msr & MSR_SF)) + if (!(regs->msr & MSR_64BIT)) val = (int) val; #endif if (val < 0) @@ -487,7 +493,7 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd, ++val; regs->gpr[rd] = val; #ifdef __powerpc64__ - if (!(regs->msr & MSR_SF)) { + if (!(regs->msr & MSR_64BIT)) { val = (unsigned int) val; val1 = (unsigned int) val1; } @@ -570,8 +576,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) if ((instr & 2) == 0) imm += regs->nip; regs->nip += 4; - if ((regs->msr & MSR_SF) == 0) - regs->nip &= 0xffffffffUL; + regs->nip = truncate_if_32bit(regs->msr, regs->nip); if (instr & 1) regs->link = regs->nip; if (branch_taken(instr, regs)) @@ -604,13 +609,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) imm -= 0x04000000; if ((instr & 2) == 0) imm += regs->nip; - if (instr & 1) { - regs->link = regs->nip + 4; - if ((regs->msr & MSR_SF) == 0) - regs->link &= 0xffffffffUL; - } - if ((regs->msr & MSR_SF) == 0) - imm &= 0xffffffffUL; + if (instr & 1) + regs->link = truncate_if_32bit(regs->msr, regs->nip + 4); + imm = truncate_if_32bit(regs->msr, imm); regs->nip = imm; return 1; case 19: @@ -618,11 +619,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 16: /* bclr */ case 528: /* bcctr */ imm = (instr & 0x400)? regs->ctr: regs->link; - regs->nip += 4; - if ((regs->msr & MSR_SF) == 0) { - regs->nip &= 0xffffffffUL; - imm &= 0xffffffffUL; - } + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + imm = truncate_if_32bit(regs->msr, imm); if (instr & 1) regs->link = regs->nip; if (branch_taken(instr, regs)) @@ -1616,11 +1614,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 0; /* invoke DSI if -EFAULT? */ } instr_done: - regs->nip += 4; -#ifdef __powerpc64__ - if ((regs->msr & MSR_SF) == 0) - regs->nip &= 0xffffffffUL; -#endif + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); return 1; logical_done: diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 5b7dd4ea02b5..a242b5d7cbe4 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -118,7 +118,7 @@ _GLOBAL(__hash_page_4K) BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Calc va and put it in r29 */ rldicr r29,r5,28,63-28 rldicl r3,r3,0,36 @@ -401,7 +401,7 @@ _GLOBAL(__hash_page_4K) BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Calc va and put it in r29 */ rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */ rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */ @@ -715,7 +715,7 @@ BEGIN_FTR_SECTION andi. r0,r31,_PAGE_NO_CACHE /* If so, bail out and refault as a 4k page */ bne- ht64_bail_ok -END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE) /* Prepare new PTE value (turn access RW into DIRTY, then * add BUSY and ACCESSED) */ @@ -736,7 +736,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Calc va and put it in r29 */ rldicr r29,r5,28,63-28 rldicl r3,r3,0,36 diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 784a400e0781..dfd764896db0 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -50,9 +50,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) case MMU_PAGE_4K: va &= ~0xffful; va |= ssize << 8; - asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), - %2) - : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206) + asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) + : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) : "memory"); break; default: @@ -61,9 +60,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) va |= penc << 12; va |= ssize << 8; va |= 1; /* L */ - asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), - %2) - : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206) + asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) + : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) : "memory"); break; } @@ -98,8 +96,8 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize) static inline void tlbie(unsigned long va, int psize, int ssize, int local) { - unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL); - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (use_local) use_local = mmu_psize_defs[psize].tlbiel; @@ -503,7 +501,7 @@ static void native_flush_hash_range(unsigned long number, int local) } pte_iterate_hashed_end(); } - if (cpu_has_feature(CPU_FTR_TLBIEL) && + if (mmu_has_feature(MMU_FTR_TLBIEL) && mmu_psize_defs[psize].tlbiel && local) { asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { @@ -517,7 +515,7 @@ static void native_flush_hash_range(unsigned long number, int local) } asm volatile("ptesync":::"memory"); } else { - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 58a022d0f463..26b2872b3d00 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -53,6 +53,7 @@ #include <asm/sections.h> #include <asm/spu.h> #include <asm/udbg.h> +#include <asm/code-patching.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -258,11 +259,11 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, for (; size >= 4; size -= 4, ++prop) { if (prop[0] == 40) { DBG("1T segment support detected\n"); - cur_cpu_spec->cpu_features |= CPU_FTR_1T_SEGMENT; + cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; return 1; } } - cur_cpu_spec->cpu_features &= ~CPU_FTR_NO_SLBIE_B; + cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B; return 0; } @@ -288,7 +289,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, if (prop != NULL) { DBG("Page sizes from device-tree:\n"); size /= 4; - cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE); + cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); while(size > 0) { unsigned int shift = prop[0]; unsigned int slbenc = prop[1]; @@ -316,7 +317,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, break; case 0x18: idx = MMU_PAGE_16M; - cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE; + cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; break; case 0x22: idx = MMU_PAGE_16G; @@ -411,7 +412,7 @@ static void __init htab_init_page_sizes(void) * Not in the device-tree, let's fallback on known size * list for 16M capable GP & GR */ - if (cpu_has_feature(CPU_FTR_16M_PAGE)) + if (mmu_has_feature(MMU_FTR_16M_PAGE)) memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); found: @@ -441,7 +442,7 @@ static void __init htab_init_page_sizes(void) mmu_vmalloc_psize = MMU_PAGE_64K; if (mmu_linear_psize == MMU_PAGE_4K) mmu_linear_psize = MMU_PAGE_64K; - if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) { + if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) { /* * Don't use 64k pages for ioremap on pSeries, since * that would stop us accessing the HEA ethernet. @@ -547,15 +548,7 @@ int remove_section_mapping(unsigned long start, unsigned long end) } #endif /* CONFIG_MEMORY_HOTPLUG */ -static inline void make_bl(unsigned int *insn_addr, void *func) -{ - unsigned long funcp = *((unsigned long *)func); - int offset = funcp - (unsigned long)insn_addr; - - *insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc)); - flush_icache_range((unsigned long)insn_addr, 4+ - (unsigned long)insn_addr); -} +#define FUNCTION_TEXT(A) ((*(unsigned long *)(A))) static void __init htab_finish_init(void) { @@ -570,16 +563,33 @@ static void __init htab_finish_init(void) extern unsigned int *ht64_call_hpte_remove; extern unsigned int *ht64_call_hpte_updatepp; - make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert); - make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert); - make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove); - make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp); + patch_branch(ht64_call_hpte_insert1, + FUNCTION_TEXT(ppc_md.hpte_insert), + BRANCH_SET_LINK); + patch_branch(ht64_call_hpte_insert2, + FUNCTION_TEXT(ppc_md.hpte_insert), + BRANCH_SET_LINK); + patch_branch(ht64_call_hpte_remove, + FUNCTION_TEXT(ppc_md.hpte_remove), + BRANCH_SET_LINK); + patch_branch(ht64_call_hpte_updatepp, + FUNCTION_TEXT(ppc_md.hpte_updatepp), + BRANCH_SET_LINK); + #endif /* CONFIG_PPC_HAS_HASH_64K */ - make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); - make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); - make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); - make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp); + patch_branch(htab_call_hpte_insert1, + FUNCTION_TEXT(ppc_md.hpte_insert), + BRANCH_SET_LINK); + patch_branch(htab_call_hpte_insert2, + FUNCTION_TEXT(ppc_md.hpte_insert), + BRANCH_SET_LINK); + patch_branch(htab_call_hpte_remove, + FUNCTION_TEXT(ppc_md.hpte_remove), + BRANCH_SET_LINK); + patch_branch(htab_call_hpte_updatepp, + FUNCTION_TEXT(ppc_md.hpte_updatepp), + BRANCH_SET_LINK); } static void __init htab_initialize(void) @@ -598,7 +608,7 @@ static void __init htab_initialize(void) /* Initialize page sizes */ htab_init_page_sizes(); - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { mmu_kernel_ssize = MMU_SEGSIZE_1T; mmu_highuser_ssize = MMU_SEGSIZE_1T; printk(KERN_INFO "Using 1TB segments\n"); @@ -739,7 +749,7 @@ void __init early_init_mmu(void) /* Initialize stab / SLB management except on iSeries */ - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); else if (!firmware_has_feature(FW_FEATURE_ISERIES)) stab_initialize(get_paca()->stab_real); @@ -756,7 +766,7 @@ void __cpuinit early_init_mmu_secondary(void) * in real mode on pSeries and we want a virtual address on * iSeries anyway */ - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); else stab_initialize(get_paca()->stab_addr); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9bb249c3046e..0b9a5c1901b9 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -529,7 +529,7 @@ static int __init hugetlbpage_init(void) { int psize; - if (!cpu_has_feature(CPU_FTR_16M_PAGE)) + if (!mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 2535828aa84b..3bafc3deca6d 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -20,9 +20,205 @@ #include <linux/idr.h> #include <linux/module.h> #include <linux/gfp.h> +#include <linux/slab.h> #include <asm/mmu_context.h> +#ifdef CONFIG_PPC_ICSWX +/* + * The processor and its L2 cache cause the icswx instruction to + * generate a COP_REQ transaction on PowerBus. The transaction has + * no address, and the processor does not perform an MMU access + * to authenticate the transaction. The command portion of the + * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and + * the coprocessor Process ID (PID), which the coprocessor compares + * to the authorized LPID and PID held in the coprocessor, to determine + * if the process is authorized to generate the transaction. + * The data of the COP_REQ transaction is 128-byte or less and is + * placed in cacheable memory on a 128-byte cache line boundary. + * + * The task to use a coprocessor should use use_cop() to allocate + * a coprocessor PID before executing icswx instruction. use_cop() + * also enables the coprocessor context switching. Drop_cop() is + * used to free the coprocessor PID. + * + * Example: + * Host Fabric Interface (HFI) is a PowerPC network coprocessor. + * Each HFI have multiple windows. Each HFI window serves as a + * network device sending to and receiving from HFI network. + * HFI immediate send function uses icswx instruction. The immediate + * send function allows small (single cache-line) packets be sent + * without using the regular HFI send FIFO and doorbell, which are + * much slower than immediate send. + * + * For each task intending to use HFI immediate send, the HFI driver + * calls use_cop() to obtain a coprocessor PID for the task. + * The HFI driver then allocate a free HFI window and save the + * coprocessor PID to the HFI window to allow the task to use the + * HFI window. + * + * The HFI driver repeatedly creates immediate send packets and + * issues icswx instruction to send data through the HFI window. + * The HFI compares the coprocessor PID in the CPU PID register + * to the PID held in the HFI window to determine if the transaction + * is allowed. + * + * When the task to release the HFI window, the HFI driver calls + * drop_cop() to release the coprocessor PID. + */ + +#define COP_PID_NONE 0 +#define COP_PID_MIN (COP_PID_NONE + 1) +#define COP_PID_MAX (0xFFFF) + +static DEFINE_SPINLOCK(mmu_context_acop_lock); +static DEFINE_IDA(cop_ida); + +void switch_cop(struct mm_struct *next) +{ + mtspr(SPRN_PID, next->context.cop_pid); + mtspr(SPRN_ACOP, next->context.acop); +} + +static int new_cop_pid(struct ida *ida, int min_id, int max_id, + spinlock_t *lock) +{ + int index; + int err; + +again: + if (!ida_pre_get(ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(lock); + err = ida_get_new_above(ida, min_id, &index); + spin_unlock(lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > max_id) { + spin_lock(lock); + ida_remove(ida, index); + spin_unlock(lock); + return -ENOMEM; + } + + return index; +} + +static void sync_cop(void *arg) +{ + struct mm_struct *mm = arg; + + if (mm == current->active_mm) + switch_cop(current->active_mm); +} + +/** + * Start using a coprocessor. + * @acop: mask of coprocessor to be used. + * @mm: The mm the coprocessor to associate with. Most likely current mm. + * + * Return a positive PID if successful. Negative errno otherwise. + * The returned PID will be fed to the coprocessor to determine if an + * icswx transaction is authenticated. + */ +int use_cop(unsigned long acop, struct mm_struct *mm) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_ICSWX)) + return -ENODEV; + + if (!mm || !acop) + return -EINVAL; + + /* We need to make sure mm_users doesn't change */ + down_read(&mm->mmap_sem); + spin_lock(mm->context.cop_lockp); + + if (mm->context.cop_pid == COP_PID_NONE) { + ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX, + &mmu_context_acop_lock); + if (ret < 0) + goto out; + + mm->context.cop_pid = ret; + } + mm->context.acop |= acop; + + sync_cop(mm); + + /* + * If this is a threaded process then there might be other threads + * running. We need to send an IPI to force them to pick up any + * change in PID and ACOP. + */ + if (atomic_read(&mm->mm_users) > 1) + smp_call_function(sync_cop, mm, 1); + + ret = mm->context.cop_pid; + +out: + spin_unlock(mm->context.cop_lockp); + up_read(&mm->mmap_sem); + + return ret; +} +EXPORT_SYMBOL_GPL(use_cop); + +/** + * Stop using a coprocessor. + * @acop: mask of coprocessor to be stopped. + * @mm: The mm the coprocessor associated with. + */ +void drop_cop(unsigned long acop, struct mm_struct *mm) +{ + int free_pid = COP_PID_NONE; + + if (!cpu_has_feature(CPU_FTR_ICSWX)) + return; + + if (WARN_ON_ONCE(!mm)) + return; + + /* We need to make sure mm_users doesn't change */ + down_read(&mm->mmap_sem); + spin_lock(mm->context.cop_lockp); + + mm->context.acop &= ~acop; + + if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) { + free_pid = mm->context.cop_pid; + mm->context.cop_pid = COP_PID_NONE; + } + + sync_cop(mm); + + /* + * If this is a threaded process then there might be other threads + * running. We need to send an IPI to force them to pick up any + * change in PID and ACOP. + */ + if (atomic_read(&mm->mm_users) > 1) + smp_call_function(sync_cop, mm, 1); + + if (free_pid != COP_PID_NONE) { + spin_lock(&mmu_context_acop_lock); + ida_remove(&cop_ida, free_pid); + spin_unlock(&mmu_context_acop_lock); + } + + spin_unlock(mm->context.cop_lockp); + up_read(&mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(drop_cop); + +#endif /* CONFIG_PPC_ICSWX */ + static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); @@ -31,7 +227,6 @@ static DEFINE_IDA(mmu_context_ida); * Each segment contains 2^28 bytes. Each context maps 2^44 bytes, * so we can support 2^19-1 contexts (19 == 35 + 28 - 44). */ -#define NO_CONTEXT 0 #define MAX_CONTEXT ((1UL << 19) - 1) int __init_new_context(void) @@ -79,6 +274,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) slice_set_user_psize(mm, mmu_virtual_psize); subpage_prot_init_new_context(mm); mm->context.id = index; +#ifdef CONFIG_PPC_ICSWX + mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL); + if (!mm->context.cop_lockp) { + __destroy_context(index); + subpage_prot_free(mm); + mm->context.id = MMU_NO_CONTEXT; + return -ENOMEM; + } + spin_lock_init(mm->context.cop_lockp); +#endif /* CONFIG_PPC_ICSWX */ return 0; } @@ -93,7 +298,12 @@ EXPORT_SYMBOL_GPL(__destroy_context); void destroy_context(struct mm_struct *mm) { +#ifdef CONFIG_PPC_ICSWX + drop_cop(mm->context.acop, mm); + kfree(mm->context.cop_lockp); + mm->context.cop_lockp = NULL; +#endif /* CONFIG_PPC_ICSWX */ __destroy_context(mm->context.id); subpage_prot_free(mm); - mm->context.id = NO_CONTEXT; + mm->context.id = MMU_NO_CONTEXT; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index c0aab52da3a5..336807de550e 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -338,12 +338,14 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, return NOTIFY_OK; switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); @@ -407,7 +409,17 @@ void __init mmu_context_init(void) } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; last_context = 65535; - } else { + } else +#ifdef CONFIG_PPC_BOOK3E_MMU + if (mmu_has_feature(MMU_FTR_TYPE_3E)) { + u32 mmucfg = mfspr(SPRN_MMUCFG); + u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK) + >> MMUCFG_PIDSIZE_SHIFT; + first_context = 1; + last_context = (1UL << (pid_bits + 1)) - 1; + } else +#endif + { first_context = 1; last_context = 255; } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 5ec1dad2a19d..2164006fe170 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -311,14 +311,13 @@ EXPORT_SYMBOL_GPL(of_node_to_nid); static int __init find_min_common_depth(void) { int depth; - struct device_node *rtas_root; struct device_node *chosen; + struct device_node *root; const char *vec5; - rtas_root = of_find_node_by_path("/rtas"); - - if (!rtas_root) - return -1; + root = of_find_node_by_path("/rtas"); + if (!root) + root = of_find_node_by_path("/"); /* * This property is a set of 32-bit integers, each representing @@ -332,7 +331,7 @@ static int __init find_min_common_depth(void) * NUMA boundary and the following are progressively less significant * boundaries. There can be more than one level of NUMA. */ - distance_ref_points = of_get_property(rtas_root, + distance_ref_points = of_get_property(root, "ibm,associativity-reference-points", &distance_ref_points_depth); @@ -376,11 +375,11 @@ static int __init find_min_common_depth(void) distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; } - of_node_put(rtas_root); + of_node_put(root); return depth; err: - of_node_put(rtas_root); + of_node_put(root); return -1; } @@ -1453,7 +1452,7 @@ int arch_update_cpu_topology(void) unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; struct sys_device *sysdev; - for_each_cpu_mask(cpu, cpu_associativity_changes_mask) { + for_each_cpu(cpu,&cpu_associativity_changes_mask) { vphn_get_associativity(cpu, associativity); nid = associativity_to_nid(associativity); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 8dc41c0157fe..51f87956f8f8 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -133,7 +133,15 @@ ioremap(phys_addr_t addr, unsigned long size) EXPORT_SYMBOL(ioremap); void __iomem * -ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) +ioremap_wc(phys_addr_t addr, unsigned long size) +{ + return __ioremap_caller(addr, size, _PAGE_NO_CACHE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iomem * +ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { /* writeable implies dirty for kernel addresses */ if (flags & _PAGE_RW) @@ -152,7 +160,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); } -EXPORT_SYMBOL(ioremap_flags); +EXPORT_SYMBOL(ioremap_prot); void __iomem * __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 88927a05cdc2..6e595f6496d4 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -255,7 +255,17 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size) return __ioremap_caller(addr, size, flags, caller); } -void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, +void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size) +{ + unsigned long flags = _PAGE_NO_CACHE; + void *caller = __builtin_return_address(0); + + if (ppc_md.ioremap) + return ppc_md.ioremap(addr, size, flags, caller); + return __ioremap_caller(addr, size, flags, caller); +} + +void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { void *caller = __builtin_return_address(0); @@ -311,7 +321,8 @@ void iounmap(volatile void __iomem *token) } EXPORT_SYMBOL(ioremap); -EXPORT_SYMBOL(ioremap_flags); +EXPORT_SYMBOL(ioremap_wc); +EXPORT_SYMBOL(ioremap_prot); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(__ioremap_at); EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 1d98ecc8eecd..e22276cb67a4 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -24,6 +24,7 @@ #include <asm/firmware.h> #include <linux/compiler.h> #include <asm/udbg.h> +#include <asm/code-patching.h> extern void slb_allocate_realmode(unsigned long ea); @@ -166,7 +167,7 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) int esid_1t_count; /* System is not 1T segment size capable. */ - if (!cpu_has_feature(CPU_FTR_1T_SEGMENT)) + if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) return (GET_ESID(addr1) == GET_ESID(addr2)); esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) + @@ -201,7 +202,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) */ hard_irq_disable(); offset = get_paca()->slb_cache_ptr; - if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && + if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { int i; asm volatile("isync" : : : "memory"); @@ -249,9 +250,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) static inline void patch_slb_encoding(unsigned int *insn_addr, unsigned int immed) { - *insn_addr = (*insn_addr & 0xffff0000) | immed; - flush_icache_range((unsigned long)insn_addr, 4+ - (unsigned long)insn_addr); + int insn = (*insn_addr & 0xffff0000) | immed; + patch_instruction(insn_addr, insn); } void slb_set_size(u16 size) diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 95ce35581696..ef653dc95b65 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -58,7 +58,7 @@ _GLOBAL(slb_miss_kernel_load_linear) li r11,0 BEGIN_FTR_SECTION b slb_finish_load -END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T 1: @@ -87,7 +87,7 @@ _GLOBAL(slb_miss_kernel_load_vmemmap) 6: BEGIN_FTR_SECTION b slb_finish_load -END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T 0: /* user address: proto-VSID = context << 15 | ESID. First check @@ -138,11 +138,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) ld r9,PACACONTEXTID(r13) BEGIN_FTR_SECTION cmpldi r10,0x1000 -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) rldimi r10,r9,USER_ESID_BITS,0 BEGIN_FTR_SECTION bge slb_finish_load_1T -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) b slb_finish_load 8: /* invalid EA */ diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 446a01842a73..41e31642a86a 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -243,7 +243,7 @@ void __init stabs_alloc(void) { int cpu; - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) return; for_each_possible_cpu(cpu) { diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index aa46e9d1e771..19395f18b1db 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -87,7 +87,7 @@ static void __cpuinit smp_iss4xx_setup_cpu(int cpu) mpic_setup_this_cpu(); } -static void __cpuinit smp_iss4xx_kick_cpu(int cpu) +static int __cpuinit smp_iss4xx_kick_cpu(int cpu) { struct device_node *cpunode = of_get_cpu_node(cpu, NULL); const u64 *spin_table_addr_prop; @@ -104,7 +104,7 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu) NULL); if (spin_table_addr_prop == NULL) { pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu); - return; + return -ENOENT; } /* Assume it's mapped as part of the linear mapping. This is a bit @@ -117,6 +117,8 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu) smp_wmb(); spin_table[1] = __pa(start_secondary_47x); mb(); + + return 0; } static struct smp_ops_t iss_smp_ops = { diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index cfc4b2009982..9f09319352c0 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -61,7 +61,7 @@ irq_to_pic_bit(unsigned int irq) static void cpld_mask_irq(struct irq_data *d) { - unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d); void __iomem *pic_mask = irq_to_pic_mask(cpld_irq); out_8(pic_mask, @@ -71,7 +71,7 @@ cpld_mask_irq(struct irq_data *d) static void cpld_unmask_irq(struct irq_data *d) { - unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d); void __iomem *pic_mask = irq_to_pic_mask(cpld_irq); out_8(pic_mask, @@ -97,7 +97,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, status |= (ignore | mask); if (status == 0xff) - return NO_IRQ_IGNORE; + return NO_IRQ; cpld_irq = ffz(status) + offset; @@ -109,14 +109,14 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc) { irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, &cpld_regs->pci_mask); - if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) { + if (irq != NO_IRQ) { generic_handle_irq(irq); return; } irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status, &cpld_regs->misc_mask); - if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) { + if (irq != NO_IRQ) { generic_handle_irq(irq); return; } diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 57a6a349e932..96f85e5e0cd3 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -56,7 +56,7 @@ static void media5200_irq_unmask(struct irq_data *d) spin_lock_irqsave(&media5200_irq.lock, flags); val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE); - val |= 1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq); + val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)); out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val); spin_unlock_irqrestore(&media5200_irq.lock, flags); } @@ -68,7 +68,7 @@ static void media5200_irq_mask(struct irq_data *d) spin_lock_irqsave(&media5200_irq.lock, flags); val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE); - val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq)); + val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d))); out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val); spin_unlock_irqrestore(&media5200_irq.lock, flags); } diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 1dd15400f6f0..1a9a49570579 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -157,48 +157,30 @@ static inline void io_be_clrbit(u32 __iomem *addr, int bitno) */ static void mpc52xx_extirq_mask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_clrbit(&intr->ctrl, 11 - l2irq); } static void mpc52xx_extirq_unmask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&intr->ctrl, 11 - l2irq); } static void mpc52xx_extirq_ack(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&intr->ctrl, 27-l2irq); } static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type) { u32 ctrl_reg, type; - int irq; - int l2irq; + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; void *handler = handle_level_irq; - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - - pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, irq, l2irq, flow_type); + pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, + (int) irqd_to_hwirq(d), l2irq, flow_type); switch (flow_type) { case IRQF_TRIGGER_HIGH: type = 0; break; @@ -237,23 +219,13 @@ static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type) static void mpc52xx_main_mask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&intr->main_mask, 16 - l2irq); } static void mpc52xx_main_unmask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_clrbit(&intr->main_mask, 16 - l2irq); } @@ -270,23 +242,13 @@ static struct irq_chip mpc52xx_main_irqchip = { */ static void mpc52xx_periph_mask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&intr->per_mask, 31 - l2irq); } static void mpc52xx_periph_unmask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_clrbit(&intr->per_mask, 31 - l2irq); } @@ -303,34 +265,19 @@ static struct irq_chip mpc52xx_periph_irqchip = { */ static void mpc52xx_sdma_mask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_setbit(&sdma->IntMask, l2irq); } static void mpc52xx_sdma_unmask(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; io_be_clrbit(&sdma->IntMask, l2irq); } static void mpc52xx_sdma_ack(struct irq_data *d) { - int irq; - int l2irq; - - irq = irq_map[d->irq].hwirq; - l2irq = irq & MPC52xx_IRQ_L2_MASK; - + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; out_be32(&sdma->IntPend, 1 << l2irq); } @@ -539,7 +486,7 @@ void __init mpc52xx_init_irq(void) unsigned int mpc52xx_get_irq(void) { u32 status; - int irq = NO_IRQ_IGNORE; + int irq; status = in_be32(&intr->enc_status); if (status & 0x00000400) { /* critical */ @@ -562,6 +509,8 @@ unsigned int mpc52xx_get_irq(void) } else { irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET); } + } else { + return NO_IRQ; } return irq_linear_revmap(mpc52xx_irqhost, irq); diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 4a4eb6ffa12f..8ccf9ed62fe2 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -42,7 +42,7 @@ struct pq2ads_pci_pic { static void pq2ads_pci_mask_irq(struct irq_data *d) { struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d); - int irq = NUM_IRQS - virq_to_hw(d->irq) - 1; + int irq = NUM_IRQS - irqd_to_hwirq(d) - 1; if (irq != -1) { unsigned long flags; @@ -58,7 +58,7 @@ static void pq2ads_pci_mask_irq(struct irq_data *d) static void pq2ads_pci_unmask_irq(struct irq_data *d) { struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d); - int irq = NUM_IRQS - virq_to_hw(d->irq) - 1; + int irq = NUM_IRQS - irqd_to_hwirq(d) - 1; if (irq != -1) { unsigned long flags; @@ -112,16 +112,8 @@ static int pci_pic_host_map(struct irq_host *h, unsigned int virq, return 0; } -static void pci_host_unmap(struct irq_host *h, unsigned int virq) -{ - /* remove chip and handler */ - irq_set_chip_data(virq, NULL); - irq_set_chip(virq, NULL); -} - static struct irq_host_ops pci_pic_host_ops = { .map = pci_pic_host_map, - .unmap = pci_host_unmap, }; int __init pq2ads_pci_init_irq(void) diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 188272934cfb..104faa8aa23c 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; +static struct of_device_id pmc_match[]; static int pmc_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct resource res; struct pmc_type *type; int ret = 0; - if (!ofdev->dev.of_match) + match = of_match_device(pmc_match, &ofdev->dev); + if (!match) return -EINVAL; - type = ofdev->dev.of_match->data; + type = match->data; if (!of_device_is_available(np)) return -ENODEV; diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 0d00ff9d05a0..d6a93a10c0f5 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -41,7 +41,7 @@ extern void __early_start(void); #define NUM_BOOT_ENTRY 8 #define SIZE_BOOT_ENTRY (NUM_BOOT_ENTRY * sizeof(u32)) -static void __init +static int __init smp_85xx_kick_cpu(int nr) { unsigned long flags; @@ -60,7 +60,7 @@ smp_85xx_kick_cpu(int nr) if (cpu_rel_addr == NULL) { printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr); - return; + return -ENOENT; } /* @@ -107,6 +107,8 @@ smp_85xx_kick_cpu(int nr) iounmap(bptr_vaddr); pr_debug("waited %d msecs for CPU #%d.\n", n, nr); + + return 0; } static void __init @@ -233,8 +235,10 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.message_pass = smp_mpic_message_pass; } - if (cpu_has_feature(CPU_FTR_DBELL)) - smp_85xx_ops.message_pass = doorbell_message_pass; + if (cpu_has_feature(CPU_FTR_DBELL)) { + smp_85xx_ops.message_pass = smp_muxed_ipi_message_pass; + smp_85xx_ops.cause_ipi = doorbell_cause_ipi; + } BUG_ON(!smp_85xx_ops.message_pass); diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index db864623b4ae..12cb9bb2cc68 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -48,8 +48,6 @@ static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = { [8] = {0, IRQ_TYPE_LEVEL_HIGH}, }; -#define socrates_fpga_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock); static void __iomem *socrates_fpga_pic_iobase; @@ -110,11 +108,9 @@ void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc) static void socrates_fpga_pic_ack(struct irq_data *d) { unsigned long flags; - unsigned int hwirq, irq_line; + unsigned int irq_line, hwirq = irqd_to_hwirq(d); uint32_t mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -127,12 +123,10 @@ static void socrates_fpga_pic_ack(struct irq_data *d) static void socrates_fpga_pic_mask(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int irq_line; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -145,12 +139,10 @@ static void socrates_fpga_pic_mask(struct irq_data *d) static void socrates_fpga_pic_mask_ack(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int irq_line; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -164,12 +156,10 @@ static void socrates_fpga_pic_mask_ack(struct irq_data *d) static void socrates_fpga_pic_unmask(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int irq_line; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -182,12 +172,10 @@ static void socrates_fpga_pic_unmask(struct irq_data *d) static void socrates_fpga_pic_eoi(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int irq_line; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - irq_line = fpga_irqs[hwirq].irq_line; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) @@ -201,12 +189,10 @@ static int socrates_fpga_pic_set_type(struct irq_data *d, unsigned int flow_type) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); int polarity; u32 mask; - hwirq = socrates_fpga_irq_to_hw(d->irq); - if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE) return -EINVAL; diff --git a/arch/powerpc/platforms/86xx/gef_pic.c b/arch/powerpc/platforms/86xx/gef_pic.c index 0beec7d5566b..94594e58594c 100644 --- a/arch/powerpc/platforms/86xx/gef_pic.c +++ b/arch/powerpc/platforms/86xx/gef_pic.c @@ -46,8 +46,6 @@ #define GEF_PIC_CPU0_MCP_MASK GEF_PIC_MCP_MASK(0) #define GEF_PIC_CPU1_MCP_MASK GEF_PIC_MCP_MASK(1) -#define gef_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - static DEFINE_RAW_SPINLOCK(gef_pic_lock); @@ -113,11 +111,9 @@ void gef_pic_cascade(unsigned int irq, struct irq_desc *desc) static void gef_pic_mask(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); u32 mask; - hwirq = gef_irq_to_hw(d->irq); - raw_spin_lock_irqsave(&gef_pic_lock, flags); mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0)); mask &= ~(1 << hwirq); @@ -136,11 +132,9 @@ static void gef_pic_mask_ack(struct irq_data *d) static void gef_pic_unmask(struct irq_data *d) { unsigned long flags; - unsigned int hwirq; + unsigned int hwirq = irqd_to_hwirq(d); u32 mask; - hwirq = gef_irq_to_hw(d->irq); - raw_spin_lock_irqsave(&gef_pic_lock, flags); mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0)); mask |= (1 << hwirq); diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index 018cc67be426..a896511690c2 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -66,7 +66,7 @@ static void __init mpc8610_suspend_init(void) return; } - ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9/wakeup", NULL); + ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9:wakeup", NULL); if (ret) { pr_err("%s: can't request pixis event IRQ: %d\n", __func__, ret); @@ -105,45 +105,77 @@ machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices); #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) -static u32 get_busfreq(void) -{ - struct device_node *node; - - u32 fs_busfreq = 0; - node = of_find_node_by_type(NULL, "cpu"); - if (node) { - unsigned int size; - const unsigned int *prop = - of_get_property(node, "bus-frequency", &size); - if (prop) - fs_busfreq = *prop; - of_node_put(node); - }; - return fs_busfreq; -} +/* + * DIU Area Descriptor + * + * The MPC8610 reference manual shows the bits of the AD register in + * little-endian order, which causes the BLUE_C field to be split into two + * parts. To simplify the definition of the MAKE_AD() macro, we define the + * fields in big-endian order and byte-swap the result. + * + * So even though the registers don't look like they're in the + * same bit positions as they are on the P1022, the same value is written to + * the AD register on the MPC8610 and on the P1022. + */ +#define AD_BYTE_F 0x10000000 +#define AD_ALPHA_C_MASK 0x0E000000 +#define AD_ALPHA_C_SHIFT 25 +#define AD_BLUE_C_MASK 0x01800000 +#define AD_BLUE_C_SHIFT 23 +#define AD_GREEN_C_MASK 0x00600000 +#define AD_GREEN_C_SHIFT 21 +#define AD_RED_C_MASK 0x00180000 +#define AD_RED_C_SHIFT 19 +#define AD_PALETTE 0x00040000 +#define AD_PIXEL_S_MASK 0x00030000 +#define AD_PIXEL_S_SHIFT 16 +#define AD_COMP_3_MASK 0x0000F000 +#define AD_COMP_3_SHIFT 12 +#define AD_COMP_2_MASK 0x00000F00 +#define AD_COMP_2_SHIFT 8 +#define AD_COMP_1_MASK 0x000000F0 +#define AD_COMP_1_SHIFT 4 +#define AD_COMP_0_MASK 0x0000000F +#define AD_COMP_0_SHIFT 0 + +#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \ + cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \ + (blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \ + (red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \ + (c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \ + (c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT)) unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel, int monitor_port) { static const unsigned long pixelformat[][3] = { - {0x88882317, 0x88083218, 0x65052119}, - {0x88883316, 0x88082219, 0x65053118}, + { + MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8), + MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0), + MAKE_AD(4, 0, 2, 1, 1, 5, 6, 5, 0) + }, + { + MAKE_AD(3, 2, 0, 1, 3, 8, 8, 8, 8), + MAKE_AD(4, 0, 2, 1, 2, 8, 8, 8, 0), + MAKE_AD(4, 2, 0, 1, 1, 5, 6, 5, 0) + }, }; - unsigned int pix_fmt, arch_monitor; + unsigned int arch_monitor; + /* The DVI port is mis-wired on revision 1 of this board. */ arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1; - /* DVI port for board version 0x01 */ - - if (bits_per_pixel == 32) - pix_fmt = pixelformat[arch_monitor][0]; - else if (bits_per_pixel == 24) - pix_fmt = pixelformat[arch_monitor][1]; - else if (bits_per_pixel == 16) - pix_fmt = pixelformat[arch_monitor][2]; - else - pix_fmt = pixelformat[1][0]; - - return pix_fmt; + + switch (bits_per_pixel) { + case 32: + return pixelformat[arch_monitor][0]; + case 24: + return pixelformat[arch_monitor][1]; + case 16: + return pixelformat[arch_monitor][2]; + default: + pr_err("fsl-diu: unsupported pixel depth %u\n", bits_per_pixel); + return 0; + } } void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base) @@ -190,8 +222,7 @@ void mpc8610hpcd_set_pixel_clock(unsigned int pixclock) } /* Pixel Clock configuration */ - pr_debug("DIU: Bus Frequency = %d\n", get_busfreq()); - speed_ccb = get_busfreq(); + speed_ccb = fsl_get_sys_freq(); /* Calculate the pixel clock with the smallest error */ /* calculate the following in steps to avoid overflow */ diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index eacea0e3fcc8..af09baee22cb 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -56,7 +56,7 @@ smp_86xx_release_core(int nr) } -static void __init +static int __init smp_86xx_kick_cpu(int nr) { unsigned int save_vector; @@ -65,7 +65,7 @@ smp_86xx_kick_cpu(int nr) unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100); if (nr < 0 || nr >= NR_CPUS) - return; + return -ENOENT; pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr); @@ -92,6 +92,8 @@ smp_86xx_kick_cpu(int nr) local_irq_restore(flags); pr_debug("wait CPU #%d for %d msecs.\n", nr, n); + + return 0; } diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 9ecce995dd4b..1e121088826f 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -150,7 +150,7 @@ void __init mpc8xx_calibrate_decr(void) */ cpu = of_find_node_by_type(NULL, "cpu"); virq= irq_of_parse_and_map(cpu, 0); - irq = irq_map[virq].hwirq; + irq = virq_to_hw(virq); sys_tmr2 = immr_map(im_sit); out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) | diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index f7b07720aa30..f970ca2b180c 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -20,6 +20,7 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" +source "arch/powerpc/platforms/wsp/Kconfig" config KVM_GUEST bool "KVM Guest support" @@ -56,16 +57,19 @@ config UDBG_RTAS_CONSOLE depends on PPC_RTAS default n +config PPC_SMP_MUXED_IPI + bool + help + Select this opton if your platform supports SMP and your + interrupt controller provides less than 4 interrupts to each + cpu. This will enable the generic code to multiplex the 4 + messages on to one ipi. + config PPC_UDBG_BEAT bool "BEAT based debug console" depends on PPC_CELLEB default n -config XICS - depends on PPC_PSERIES - bool - default y - config IPIC bool default n @@ -147,14 +151,27 @@ config PPC_970_NAP bool default n +config PPC_P7_NAP + bool + default n + config PPC_INDIRECT_IO bool select GENERIC_IOMAP - default n + +config PPC_INDIRECT_PIO + bool + select PPC_INDIRECT_IO + +config PPC_INDIRECT_MMIO + bool + select PPC_INDIRECT_IO + +config PPC_IO_WORKAROUNDS + bool config GENERIC_IOMAP bool - default n source "drivers/cpufreq/Kconfig" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 111138c55f9c..2165b65876f9 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -73,6 +73,7 @@ config PPC_BOOK3S_64 config PPC_BOOK3E_64 bool "Embedded processors" select PPC_FPU # Make it a choice ? + select PPC_SMP_MUXED_IPI endchoice @@ -107,6 +108,10 @@ config POWER4 depends on PPC64 && PPC_BOOK3S def_bool y +config PPC_A2 + bool + depends on PPC_BOOK3E_64 + config TUNE_CELL bool "Optimize for Cell Broadband Engine" depends on PPC64 && PPC_BOOK3S @@ -174,6 +179,7 @@ config FSL_BOOKE config PPC_FSL_BOOK3E bool select FSL_EMB_PERFMON + select PPC_SMP_MUXED_IPI default y if FSL_BOOKE config PTE_64BIT @@ -226,6 +232,24 @@ config VSX If in doubt, say Y here. +config PPC_ICSWX + bool "Support for PowerPC icswx coprocessor instruction" + depends on POWER4 + default n + ---help--- + + This option enables kernel support for the PowerPC Initiate + Coprocessor Store Word (icswx) coprocessor instruction on POWER7 + or newer processors. + + This option is only useful if you have a processor that supports + the icswx coprocessor instruction. It does not have any effect + on processors without the icswx coprocessor instruction. + + This option slightly increases kernel memory usage. + + If in doubt, say N here. + config SPE bool "SPE Support" depends on E200 || (E500 && !PPC_E500MC) diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index fdb9f0b0d7a8..73e2116cfeed 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ +obj-$(CONFIG_PPC_WSP) += wsp/ diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 81239ebed83f..67d5009b4e86 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -6,7 +6,8 @@ config PPC_CELL_COMMON bool select PPC_CELL select PPC_DCR_MMIO - select PPC_INDIRECT_IO + select PPC_INDIRECT_PIO + select PPC_INDIRECT_MMIO select PPC_NATIVE select PPC_RTAS select IRQ_EDGE_EOI_HANDLER @@ -15,6 +16,7 @@ config PPC_CELL_NATIVE bool select PPC_CELL_COMMON select MPIC + select PPC_IO_WORKAROUNDS select IBM_NEW_EMAC_EMAC4 select IBM_NEW_EMAC_RGMII select IBM_NEW_EMAC_ZMII #test only diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 83fafe922641..a4a89350bcfc 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \ - pmu.o io-workarounds.o spider-pci.o + pmu.o spider-pci.o obj-$(CONFIG_CBE_RAS) += ras.o obj-$(CONFIG_CBE_THERM) += cbe_thermal.o @@ -39,11 +39,10 @@ obj-y += celleb_setup.o \ celleb_pci.o celleb_scc_epci.o \ celleb_scc_pciex.o \ celleb_scc_uhc.o \ - io-workarounds.o spider-pci.o \ - beat.o beat_htab.o beat_hvCall.o \ - beat_interrupt.o beat_iommu.o + spider-pci.o beat.o beat_htab.o \ + beat_hvCall.o beat_interrupt.o \ + beat_iommu.o -obj-$(CONFIG_SMP) += beat_smp.o obj-$(CONFIG_PPC_UDBG_BEAT) += beat_udbg.o obj-$(CONFIG_SERIAL_TXX9) += celleb_scc_sio.o obj-$(CONFIG_SPU_BASE) += beat_spu_priv1.o diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index bb5ebf8fa80b..ac06903e136a 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -113,7 +113,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) pr_devel("axon_msi: woff %x roff %x msi %x\n", write_offset, msic->read_offset, msi); - if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) { + if (msi < NR_IRQS && irq_get_chip_data(msi) == msic) { generic_handle_irq(msi); msic->fifo_virt[idx] = cpu_to_le32(0xffffffff); } else { @@ -320,6 +320,7 @@ static struct irq_chip msic_irq_chip = { static int msic_host_map(struct irq_host *h, unsigned int virq, irq_hw_number_t hw) { + irq_set_chip_data(virq, h->host_data); irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq); return 0; diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 4cb9e147c307..55015e1f6939 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -148,16 +148,6 @@ static int beatic_pic_host_map(struct irq_host *h, unsigned int virq, } /* - * Update binding hardware IRQ number (hw) and Virtuql - * IRQ number (virq). This is called only once for a given mapping. - */ -static void beatic_pic_host_remap(struct irq_host *h, unsigned int virq, - irq_hw_number_t hw) -{ - beat_construct_and_connect_irq_plug(virq, hw); -} - -/* * Translate device-tree interrupt spec to irq_hw_number_t style (ulong), * to pass away to irq_create_mapping(). * @@ -184,7 +174,6 @@ static int beatic_pic_host_match(struct irq_host *h, struct device_node *np) static struct irq_host_ops beatic_pic_host_ops = { .map = beatic_pic_host_map, - .remap = beatic_pic_host_remap, .unmap = beatic_pic_host_unmap, .xlate = beatic_pic_host_xlate, .match = beatic_pic_host_match, @@ -257,22 +246,6 @@ void __init beatic_init_IRQ(void) irq_set_default_host(beatic_host); } -#ifdef CONFIG_SMP - -/* Nullified to compile with SMP mode */ -void beatic_setup_cpu(int cpu) -{ -} - -void beatic_cause_IPI(int cpu, int mesg) -{ -} - -void beatic_request_IPIs(void) -{ -} -#endif /* CONFIG_SMP */ - void beatic_deinit_IRQ(void) { int i; diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h index b470fd0051f1..a7e52f91a078 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.h +++ b/arch/powerpc/platforms/cell/beat_interrupt.h @@ -24,9 +24,6 @@ extern void beatic_init_IRQ(void); extern unsigned int beatic_get_irq(void); -extern void beatic_cause_IPI(int cpu, int mesg); -extern void beatic_request_IPIs(void); -extern void beatic_setup_cpu(int); extern void beatic_deinit_IRQ(void); #endif diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c deleted file mode 100644 index 26efc204c47f..000000000000 --- a/arch/powerpc/platforms/cell/beat_smp.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * SMP support for Celleb platform. (Incomplete) - * - * (C) Copyright 2006 TOSHIBA CORPORATION - * - * This code is based on arch/powerpc/platforms/cell/smp.c: - * Dave Engebretsen, Peter Bergner, and - * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com - * Plus various changes from other IBM teams... - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#undef DEBUG - -#include <linux/kernel.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/threads.h> -#include <linux/cpu.h> - -#include <asm/irq.h> -#include <asm/smp.h> -#include <asm/machdep.h> -#include <asm/udbg.h> - -#include "beat_interrupt.h" - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -/* - * The primary thread of each non-boot processor is recorded here before - * smp init. - */ -/* static cpumask_t of_spin_map; */ - -/** - * smp_startup_cpu() - start the given cpu - * - * At boot time, there is nothing to do for primary threads which were - * started from Open Firmware. For anything else, call RTAS with the - * appropriate start location. - * - * Returns: - * 0 - failure - * 1 - success - */ -static inline int __devinit smp_startup_cpu(unsigned int lcpu) -{ - return 0; -} - -static void smp_beatic_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - beatic_cause_IPI(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - beatic_cause_IPI(i, msg); - } - } -} - -static int __init smp_beatic_probe(void) -{ - return cpus_weight(cpu_possible_map); -} - -static void __devinit smp_beatic_setup_cpu(int cpu) -{ - beatic_setup_cpu(cpu); -} - -static void __devinit smp_celleb_kick_cpu(int nr) -{ - BUG_ON(nr < 0 || nr >= NR_CPUS); - - if (!smp_startup_cpu(nr)) - return; -} - -static int smp_celleb_cpu_bootable(unsigned int nr) -{ - return 1; -} -static struct smp_ops_t bpa_beatic_smp_ops = { - .message_pass = smp_beatic_message_pass, - .probe = smp_beatic_probe, - .kick_cpu = smp_celleb_kick_cpu, - .setup_cpu = smp_beatic_setup_cpu, - .cpu_bootable = smp_celleb_cpu_bootable, -}; - -/* This is called very early */ -void __init smp_init_celleb(void) -{ - DBG(" -> smp_init_celleb()\n"); - - smp_ops = &bpa_beatic_smp_ops; - - DBG(" <- smp_init_celleb()\n"); -} diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index dbc338f187a2..f3917e7a5b44 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -45,8 +45,8 @@ static struct cbe_thread_map unsigned int cbe_id; } cbe_thread_map[NR_CPUS]; -static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE }; -static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE; +static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} }; +static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE }; static struct cbe_regs_map *cbe_find_map(struct device_node *np) { @@ -159,7 +159,8 @@ EXPORT_SYMBOL_GPL(cbe_cpu_to_node); u32 cbe_node_to_cpu(int node) { - return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t)); + return cpumask_first(&cbe_local_mask[node]); + } EXPORT_SYMBOL_GPL(cbe_node_to_cpu); @@ -268,9 +269,9 @@ void __init cbe_regs_init(void) thread->regs = map; thread->cbe_id = cbe_id; map->be_node = thread->be_node; - cpu_set(i, cbe_local_mask[cbe_id]); + cpumask_set_cpu(i, &cbe_local_mask[cbe_id]); if(thread->thread_id == 0) - cpu_set(i, cbe_first_online_cpu); + cpumask_set_cpu(i, &cbe_first_online_cpu); } } diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 404d1fc04d59..5822141aa63f 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -41,7 +41,6 @@ #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> -#include "io-workarounds.h" #include "celleb_pci.h" #define MAX_PCI_DEVICES 32 @@ -320,7 +319,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node, size = 256; config = &private->fake_config[devno][fn]; - *config = alloc_maybe_bootmem(size, GFP_KERNEL); + *config = zalloc_maybe_bootmem(size, GFP_KERNEL); if (*config == NULL) { printk(KERN_ERR "PCI: " "not enough memory for fake configuration space\n"); @@ -331,7 +330,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node, size = sizeof(struct celleb_pci_resource); res = &private->res[devno][fn]; - *res = alloc_maybe_bootmem(size, GFP_KERNEL); + *res = zalloc_maybe_bootmem(size, GFP_KERNEL); if (*res == NULL) { printk(KERN_ERR "PCI: not enough memory for resource data space\n"); @@ -432,7 +431,7 @@ static int __init phb_set_bus_ranges(struct device_node *dev, static void __init celleb_alloc_private_mem(struct pci_controller *hose) { hose->private_data = - alloc_maybe_bootmem(sizeof(struct celleb_pci_private), + zalloc_maybe_bootmem(sizeof(struct celleb_pci_private), GFP_KERNEL); } @@ -469,18 +468,6 @@ static struct of_device_id celleb_phb_match[] __initdata = { }, }; -static int __init celleb_io_workaround_init(struct pci_controller *phb, - struct celleb_phb_spec *phb_spec) -{ - if (phb_spec->ops) { - iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init, - phb_spec->iowa_data); - io_workaround_init(); - } - - return 0; -} - int __init celleb_setup_phb(struct pci_controller *phb) { struct device_node *dev = phb->dn; @@ -500,7 +487,11 @@ int __init celleb_setup_phb(struct pci_controller *phb) if (rc) return 1; - return celleb_io_workaround_init(phb, phb_spec); + if (phb_spec->ops) + iowa_register_bus(phb, phb_spec->ops, + phb_spec->iowa_init, + phb_spec->iowa_data); + return 0; } int celleb_pci_probe_mode(struct pci_bus *bus) diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h index 4cba1523ec50..a801fcc5f389 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.h +++ b/arch/powerpc/platforms/cell/celleb_pci.h @@ -26,8 +26,9 @@ #include <asm/pci-bridge.h> #include <asm/prom.h> #include <asm/ppc-pci.h> +#include <asm/io-workarounds.h> -#include "io-workarounds.h" +struct iowa_bus; struct celleb_phb_spec { int (*setup)(struct device_node *, struct pci_controller *); diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index e53845579770..d58d9bae4b9b 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -128,10 +128,6 @@ static void __init celleb_setup_arch_beat(void) spu_management_ops = &spu_management_of_ops; #endif -#ifdef CONFIG_SMP - smp_init_celleb(); -#endif - celleb_setup_arch_common(); } diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 44cfd1bef89b..449c08c15862 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -196,8 +196,20 @@ static irqreturn_t iic_ipi_action(int irq, void *dev_id) { int ipi = (int)(long)dev_id; - smp_message_recv(ipi); - + switch(ipi) { + case PPC_MSG_CALL_FUNCTION: + generic_smp_call_function_interrupt(); + break; + case PPC_MSG_RESCHEDULE: + scheduler_ipi(); + break; + case PPC_MSG_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; + case PPC_MSG_DEBUGGER_BREAK: + debug_ipi_action(0, NULL); + break; + } return IRQ_HANDLED; } static void iic_request_ipi(int ipi, const char *name) diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c index d31c594cfdf3..51e290126bc1 100644 --- a/arch/powerpc/platforms/cell/qpace_setup.c +++ b/arch/powerpc/platforms/cell/qpace_setup.c @@ -42,7 +42,6 @@ #include "interrupt.h" #include "pervasive.h" #include "ras.h" -#include "io-workarounds.h" static void qpace_show_cpuinfo(struct seq_file *m) { diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index fd57bfe00edf..c73cf4c43fc2 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -51,11 +51,11 @@ #include <asm/udbg.h> #include <asm/mpic.h> #include <asm/cell-regs.h> +#include <asm/io-workarounds.h> #include "interrupt.h" #include "pervasive.h" #include "ras.h" -#include "io-workarounds.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -136,8 +136,6 @@ static int __devinit cell_setup_phb(struct pci_controller *phb) iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init, (void *)SPIDER_PCI_REG_BASE); - io_workaround_init(); - return 0; } diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index f774530075b7..d176e6148e3f 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -77,7 +77,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) unsigned int pcpu; int start_cpu; - if (cpu_isset(lcpu, of_spin_map)) + if (cpumask_test_cpu(lcpu, &of_spin_map)) /* Already started by OF and sitting in spin loop */ return 1; @@ -103,27 +103,11 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) return 1; } -static void smp_iic_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - iic_cause_IPI(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - iic_cause_IPI(i, msg); - } - } -} - static int __init smp_iic_probe(void) { iic_request_IPIs(); - return cpus_weight(cpu_possible_map); + return cpumask_weight(cpu_possible_mask); } static void __devinit smp_cell_setup_cpu(int cpu) @@ -137,12 +121,12 @@ static void __devinit smp_cell_setup_cpu(int cpu) mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER); } -static void __devinit smp_cell_kick_cpu(int nr) +static int __devinit smp_cell_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); if (!smp_startup_cpu(nr)) - return; + return -ENOENT; /* * The processor is currently spinning, waiting for the @@ -150,6 +134,8 @@ static void __devinit smp_cell_kick_cpu(int nr) * the processor will continue on to secondary_start */ paca[nr].cpu_start = 1; + + return 0; } static int smp_cell_cpu_bootable(unsigned int nr) @@ -166,7 +152,7 @@ static int smp_cell_cpu_bootable(unsigned int nr) return 1; } static struct smp_ops_t bpa_iic_smp_ops = { - .message_pass = smp_iic_message_pass, + .message_pass = iic_cause_IPI, .probe = smp_iic_probe, .kick_cpu = smp_cell_kick_cpu, .setup_cpu = smp_cell_setup_cpu, @@ -186,13 +172,12 @@ void __init smp_init_cell(void) if (cpu_has_feature(CPU_FTR_SMT)) { for_each_present_cpu(i) { if (cpu_thread_in_core(i) == 0) - cpu_set(i, of_spin_map); + cpumask_set_cpu(i, &of_spin_map); } - } else { - of_spin_map = cpu_present_map; - } + } else + cpumask_copy(&of_spin_map, cpu_present_mask); - cpu_clear(boot_cpuid, of_spin_map); + cpumask_clear_cpu(boot_cpuid, &of_spin_map); /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index ca7731c0b595..f1f7878893f3 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -27,8 +27,7 @@ #include <asm/ppc-pci.h> #include <asm/pci-bridge.h> - -#include "io-workarounds.h" +#include <asm/io-workarounds.h> #define SPIDER_PCI_DISABLE_PREFETCH diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index c5cf50e6b45a..442c28c00f88 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -68,9 +68,9 @@ struct spider_pic { }; static struct spider_pic spider_pics[SPIDER_CHIP_COUNT]; -static struct spider_pic *spider_virq_to_pic(unsigned int virq) +static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d) { - return irq_map[virq].host->host_data; + return irq_data_get_irq_chip_data(d); } static void __iomem *spider_get_irq_config(struct spider_pic *pic, @@ -81,24 +81,24 @@ static void __iomem *spider_get_irq_config(struct spider_pic *pic, static void spider_unmask_irq(struct irq_data *d) { - struct spider_pic *pic = spider_virq_to_pic(d->irq); - void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq); + struct spider_pic *pic = spider_irq_data_to_pic(d); + void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d)); out_be32(cfg, in_be32(cfg) | 0x30000000u); } static void spider_mask_irq(struct irq_data *d) { - struct spider_pic *pic = spider_virq_to_pic(d->irq); - void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq); + struct spider_pic *pic = spider_irq_data_to_pic(d); + void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d)); out_be32(cfg, in_be32(cfg) & ~0x30000000u); } static void spider_ack_irq(struct irq_data *d) { - struct spider_pic *pic = spider_virq_to_pic(d->irq); - unsigned int src = irq_map[d->irq].hwirq; + struct spider_pic *pic = spider_irq_data_to_pic(d); + unsigned int src = irqd_to_hwirq(d); /* Reset edge detection logic if necessary */ @@ -116,8 +116,8 @@ static void spider_ack_irq(struct irq_data *d) static int spider_set_irq_type(struct irq_data *d, unsigned int type) { unsigned int sense = type & IRQ_TYPE_SENSE_MASK; - struct spider_pic *pic = spider_virq_to_pic(d->irq); - unsigned int hw = irq_map[d->irq].hwirq; + struct spider_pic *pic = spider_irq_data_to_pic(d); + unsigned int hw = irqd_to_hwirq(d); void __iomem *cfg = spider_get_irq_config(pic, hw); u32 old_mask; u32 ic; @@ -171,6 +171,7 @@ static struct irq_chip spider_pic = { static int spider_host_map(struct irq_host *h, unsigned int virq, irq_hw_number_t hw) { + irq_set_chip_data(virq, h->host_data); irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq); /* Set default irq type */ diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index acfaccea5f4f..3675da73623f 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -32,6 +32,7 @@ #include <linux/io.h> #include <linux/mutex.h> #include <linux/linux_logo.h> +#include <linux/syscore_ops.h> #include <asm/spu.h> #include <asm/spu_priv1.h> #include <asm/spu_csa.h> @@ -521,18 +522,8 @@ void spu_init_channels(struct spu *spu) } EXPORT_SYMBOL_GPL(spu_init_channels); -static int spu_shutdown(struct sys_device *sysdev) -{ - struct spu *spu = container_of(sysdev, struct spu, sysdev); - - spu_free_irqs(spu); - spu_destroy_spu(spu); - return 0; -} - static struct sysdev_class spu_sysdev_class = { .name = "spu", - .shutdown = spu_shutdown, }; int spu_add_sysdev_attr(struct sysdev_attribute *attr) @@ -797,6 +788,22 @@ static inline void crash_register_spus(struct list_head *list) } #endif +static void spu_shutdown(void) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) { + spu_free_irqs(spu); + spu_destroy_spu(spu); + } + mutex_unlock(&spu_full_list_mutex); +} + +static struct syscore_ops spu_syscore_ops = { + .shutdown = spu_shutdown, +}; + static int __init init_spu_base(void) { int i, ret = 0; @@ -830,6 +837,7 @@ static int __init init_spu_base(void) crash_register_spus(&spu_full_list); mutex_unlock(&spu_full_list_mutex); spu_add_sysdev_attr(&attr_stat); + register_syscore_ops(&spu_syscore_ops); spu_init_affinity(); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 65203857b0ce..32cb4e66d2cd 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx) * runqueue. The context will be rescheduled on the proper node * if it is timesliced or preempted. */ - ctx->cpus_allowed = current->cpus_allowed; + cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current)); /* Save the current cpu id for spu interrupt routing. */ ctx->last_ran = raw_smp_processor_id(); diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index 02cafecc90e3..a800122e4dda 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -30,10 +30,12 @@ #include <asm/mpic.h> #include <asm/rtas.h> -static void __devinit smp_chrp_kick_cpu(int nr) +static int __devinit smp_chrp_kick_cpu(int nr) { *(unsigned long *)KERNELBASE = nr; asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory"); + + return 0; } static void __devinit smp_chrp_setup_cpu(int cpu_nr) diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index 12aa62b6f227..f61a2dd96b99 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -48,7 +48,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); u32 mask = 1 << irq; @@ -59,7 +59,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d) static void flipper_pic_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); /* this is at least needed for RSW */ @@ -68,7 +68,7 @@ static void flipper_pic_ack(struct irq_data *d) static void flipper_pic_mask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); clrbits32(io_base + FLIPPER_IMR, 1 << irq); @@ -76,7 +76,7 @@ static void flipper_pic_mask(struct irq_data *d) static void flipper_pic_unmask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); setbits32(io_base + FLIPPER_IMR, 1 << irq); @@ -107,12 +107,6 @@ static int flipper_pic_map(struct irq_host *h, unsigned int virq, return 0; } -static void flipper_pic_unmap(struct irq_host *h, unsigned int irq) -{ - irq_set_chip_data(irq, NULL); - irq_set_chip(irq, NULL); -} - static int flipper_pic_match(struct irq_host *h, struct device_node *np) { return 1; @@ -121,7 +115,6 @@ static int flipper_pic_match(struct irq_host *h, struct device_node *np) static struct irq_host_ops flipper_irq_host_ops = { .map = flipper_pic_map, - .unmap = flipper_pic_unmap, .match = flipper_pic_match, }; diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 2bdddfc9d520..e4919170c6bc 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -43,7 +43,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); u32 mask = 1 << irq; @@ -53,7 +53,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d) static void hlwd_pic_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); out_be32(io_base + HW_BROADWAY_ICR, 1 << irq); @@ -61,7 +61,7 @@ static void hlwd_pic_ack(struct irq_data *d) static void hlwd_pic_mask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq); @@ -69,7 +69,7 @@ static void hlwd_pic_mask(struct irq_data *d) static void hlwd_pic_unmask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void __iomem *io_base = irq_data_get_irq_chip_data(d); setbits32(io_base + HW_BROADWAY_IMR, 1 << irq); @@ -100,15 +100,8 @@ static int hlwd_pic_map(struct irq_host *h, unsigned int virq, return 0; } -static void hlwd_pic_unmap(struct irq_host *h, unsigned int irq) -{ - irq_set_chip_data(irq, NULL); - irq_set_chip(irq, NULL); -} - static struct irq_host_ops hlwd_irq_host_ops = { .map = hlwd_pic_map, - .unmap = hlwd_pic_unmap, }; static unsigned int __hlwd_pic_get_irq(struct irq_host *h) diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig index e5bc9f75d474..b57cda3a0817 100644 --- a/arch/powerpc/platforms/iseries/Kconfig +++ b/arch/powerpc/platforms/iseries/Kconfig @@ -1,7 +1,9 @@ config PPC_ISERIES bool "IBM Legacy iSeries" depends on PPC64 && PPC_BOOK3S - select PPC_INDIRECT_IO + select PPC_SMP_MUXED_IPI + select PPC_INDIRECT_PIO + select PPC_INDIRECT_MMIO select PPC_PCI_CHOICE if EXPERT menu "iSeries device drivers" diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S index 32a56c6dfa72..29c02f36b32f 100644 --- a/arch/powerpc/platforms/iseries/exception.S +++ b/arch/powerpc/platforms/iseries/exception.S @@ -31,6 +31,7 @@ #include <asm/thread_info.h> #include <asm/ptrace.h> #include <asm/cputable.h> +#include <asm/mmu.h> #include "exception.h" @@ -60,29 +61,31 @@ system_reset_iSeries: /* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */ /* In the UP case we'll yield() later, and we will not access the paca anyway */ #ifdef CONFIG_SMP -1: +iSeries_secondary_wait_paca: HMT_LOW LOAD_REG_ADDR(r23, __secondary_hold_spinloop) ld r23,0(r23) - sync - LOAD_REG_ADDR(r3,current_set) - sldi r28,r24,3 /* get current_set[cpu#] */ - ldx r3,r3,r28 - addi r1,r3,THREAD_SIZE - subi r1,r1,STACK_FRAME_OVERHEAD - cmpwi 0,r23,0 /* Keep poking the Hypervisor until */ - bne 2f /* we're released */ - /* Let the Hypervisor know we are alive */ + cmpdi 0,r23,0 + bne 2f /* go on when the master is ready */ + + /* Keep poking the Hypervisor until we're released */ /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ lis r3,0x8002 rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ li r0,-1 /* r0=-1 indicates a Hypervisor call */ sc /* Invoke the hypervisor via a system call */ - b 1b -#endif + b iSeries_secondary_wait_paca 2: + HMT_MEDIUM + sync + + LOAD_REG_ADDR(r3, nr_cpu_ids) /* get number of pacas allocated */ + lwz r3,0(r3) /* nr_cpus= or NR_CPUS can limit */ + cmpld 0,r24,r3 /* is our cpu number allocated? */ + bge iSeries_secondary_yield /* no, yield forever */ + /* Load our paca now that it's been allocated */ LOAD_REG_ADDR(r13, paca) ld r13,0(r13) @@ -93,10 +96,24 @@ system_reset_iSeries: ori r23,r23,MSR_RI mtmsrd r23 /* RI on */ - HMT_LOW -#ifdef CONFIG_SMP +iSeries_secondary_smp_loop: lbz r23,PACAPROCSTART(r13) /* Test if this processor * should start */ + cmpwi 0,r23,0 + bne 3f /* go on when we are told */ + + HMT_LOW + /* Let the Hypervisor know we are alive */ + /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ + lis r3,0x8002 + rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ + li r0,-1 /* r0=-1 indicates a Hypervisor call */ + sc /* Invoke the hypervisor via a system call */ + mfspr r13,SPRN_SPRG_PACA /* Put r13 back ???? */ + b iSeries_secondary_smp_loop /* wait for signal to start */ + +3: + HMT_MEDIUM sync LOAD_REG_ADDR(r3,current_set) sldi r28,r24,3 /* get current_set[cpu#] */ @@ -104,27 +121,22 @@ system_reset_iSeries: addi r1,r3,THREAD_SIZE subi r1,r1,STACK_FRAME_OVERHEAD - cmpwi 0,r23,0 - beq iSeries_secondary_smp_loop /* Loop until told to go */ b __secondary_start /* Loop until told to go */ -iSeries_secondary_smp_loop: - /* Let the Hypervisor know we are alive */ - /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ - lis r3,0x8002 - rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ -#else /* CONFIG_SMP */ +#endif /* CONFIG_SMP */ + +iSeries_secondary_yield: /* Yield the processor. This is required for non-SMP kernels which are running on multi-threaded machines. */ + HMT_LOW lis r3,0x8000 rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */ addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */ li r4,0 /* "yield timed" */ li r5,-1 /* "yield forever" */ -#endif /* CONFIG_SMP */ li r0,-1 /* r0=-1 indicates a Hypervisor call */ sc /* Invoke the hypervisor via a system call */ mfspr r13,SPRN_SPRG_PACA /* Put r13 back ???? */ - b 2b /* If SMP not configured, secondaries + b iSeries_secondary_yield /* If SMP not configured, secondaries * loop forever */ /*** ISeries-LPAR interrupt handlers ***/ @@ -157,7 +169,7 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE EXCEPTION_PROLOG_1(PACA_EXGEN) EXCEPTION_PROLOG_ISERIES_1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) b data_access_common .do_stab_bolted_iSeries: diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index 52a6889832c7..b2103453eb01 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -42,7 +42,6 @@ #include "irq.h" #include "pci.h" #include "call_pci.h" -#include "smp.h" #ifdef CONFIG_PCI @@ -171,7 +170,7 @@ static void iseries_enable_IRQ(struct irq_data *d) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); /* The IRQ has already been locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -188,7 +187,7 @@ static unsigned int iseries_startup_IRQ(struct irq_data *d) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); bus = REAL_IRQ_TO_BUS(rirq); function = REAL_IRQ_TO_FUNC(rirq); @@ -234,7 +233,7 @@ static void iseries_shutdown_IRQ(struct irq_data *d) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); /* irq should be locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -257,7 +256,7 @@ static void iseries_disable_IRQ(struct irq_data *d) { u32 bus, dev_id, function, mask; const u32 sub_bus = 0; - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); /* The IRQ has already been locked by the caller */ bus = REAL_IRQ_TO_BUS(rirq); @@ -271,7 +270,7 @@ static void iseries_disable_IRQ(struct irq_data *d) static void iseries_end_IRQ(struct irq_data *d) { - unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq; + unsigned int rirq = (unsigned int)irqd_to_hwirq(d); HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq), (REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq)); @@ -316,7 +315,7 @@ unsigned int iSeries_get_irq(void) #ifdef CONFIG_SMP if (get_lppaca()->int_dword.fields.ipi_cnt) { get_lppaca()->int_dword.fields.ipi_cnt = 0; - iSeries_smp_message_recv(); + smp_ipi_demux(); } #endif /* CONFIG_SMP */ if (hvlpevent_is_pending()) diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index 2946ae10fbfd..c25a0815c26b 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -249,7 +249,7 @@ static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array, unsigned long i; unsigned long mem_blocks = 0; - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array, max_entries); else @@ -634,7 +634,7 @@ static int __init iseries_probe(void) hpte_init_iSeries(); /* iSeries does not support 16M pages */ - cur_cpu_spec->cpu_features &= ~CPU_FTR_16M_PAGE; + cur_cpu_spec->mmu_features &= ~MMU_FTR_16M_PAGE; return 1; } @@ -685,6 +685,11 @@ void * __init iSeries_early_setup(void) powerpc_firmware_features |= FW_FEATURE_ISERIES; powerpc_firmware_features |= FW_FEATURE_LPAR; +#ifdef CONFIG_SMP + /* On iSeries we know we can never have more than 64 cpus */ + nr_cpu_ids = max(nr_cpu_ids, 64); +#endif + iSeries_fixup_klimit(); /* diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c index 6c6029914dbc..e3265adde5d3 100644 --- a/arch/powerpc/platforms/iseries/smp.c +++ b/arch/powerpc/platforms/iseries/smp.c @@ -42,57 +42,23 @@ #include <asm/cputable.h> #include <asm/system.h> -#include "smp.h" - -static unsigned long iSeries_smp_message[NR_CPUS]; - -void iSeries_smp_message_recv(void) -{ - int cpu = smp_processor_id(); - int msg; - - if (num_online_cpus() < 2) - return; - - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &iSeries_smp_message[cpu])) - smp_message_recv(msg); -} - -static inline void smp_iSeries_do_message(int cpu, int msg) +static void smp_iSeries_cause_ipi(int cpu, unsigned long data) { - set_bit(msg, &iSeries_smp_message[cpu]); HvCall_sendIPI(&(paca[cpu])); } -static void smp_iSeries_message_pass(int target, int msg) -{ - int i; - - if (target < NR_CPUS) - smp_iSeries_do_message(target, msg); - else { - for_each_online_cpu(i) { - if ((target == MSG_ALL_BUT_SELF) && - (i == smp_processor_id())) - continue; - smp_iSeries_do_message(i, msg); - } - } -} - static int smp_iSeries_probe(void) { return cpumask_weight(cpu_possible_mask); } -static void smp_iSeries_kick_cpu(int nr) +static int smp_iSeries_kick_cpu(int nr) { BUG_ON((nr < 0) || (nr >= NR_CPUS)); /* Verify that our partition has a processor nr */ if (lppaca_of(nr).dyn_proc_status >= 2) - return; + return -ENOENT; /* The processor is currently spinning, waiting * for the cpu_start field to become non-zero @@ -100,6 +66,8 @@ static void smp_iSeries_kick_cpu(int nr) * continue on to secondary_start in iSeries_head.S */ paca[nr].cpu_start = 1; + + return 0; } static void __devinit smp_iSeries_setup_cpu(int nr) @@ -107,7 +75,8 @@ static void __devinit smp_iSeries_setup_cpu(int nr) } static struct smp_ops_t iSeries_smp_ops = { - .message_pass = smp_iSeries_message_pass, + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = smp_iSeries_cause_ipi, .probe = smp_iSeries_probe, .kick_cpu = smp_iSeries_kick_cpu, .setup_cpu = smp_iSeries_setup_cpu, diff --git a/arch/powerpc/platforms/iseries/smp.h b/arch/powerpc/platforms/iseries/smp.h deleted file mode 100644 index d501f7de01e7..000000000000 --- a/arch/powerpc/platforms/iseries/smp.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _PLATFORMS_ISERIES_SMP_H -#define _PLATFORMS_ISERIES_SMP_H - -extern void iSeries_smp_message_recv(void); - -#endif /* _PLATFORMS_ISERIES_SMP_H */ diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index 1e1a0873e1dd..1afd10f67858 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -18,4 +18,13 @@ config PPC_PMAC64 select PPC_970_NAP default y - +config PPC_PMAC32_PSURGE + bool "Support for powersurge upgrade cards" if EXPERT + depends on SMP && PPC32 && PPC_PMAC + select PPC_SMP_MUXED_IPI + default y + help + The powersurge cpu boards can be used in the generation + of powermacs that have a socket for an upgradeable cpu card, + including the 7500, 8500, 9500, 9600. Support exists for + both dual and quad socket upgrade cards. diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 023f24086a0a..9089b0421191 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -21,7 +21,7 @@ #include <linux/signal.h> #include <linux/pci.h> #include <linux/interrupt.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/adb.h> #include <linux/pmu.h> #include <linux/module.h> @@ -84,7 +84,7 @@ static void __pmac_retrigger(unsigned int irq_nr) static void pmac_mask_and_ack_irq(struct irq_data *d) { - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); unsigned long bit = 1UL << (src & 0x1f); int i = src >> 5; unsigned long flags; @@ -106,7 +106,7 @@ static void pmac_mask_and_ack_irq(struct irq_data *d) static void pmac_ack_irq(struct irq_data *d) { - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); unsigned long bit = 1UL << (src & 0x1f); int i = src >> 5; unsigned long flags; @@ -152,7 +152,7 @@ static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) static unsigned int pmac_startup_irq(struct irq_data *d) { unsigned long flags; - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); unsigned long bit = 1UL << (src & 0x1f); int i = src >> 5; @@ -169,7 +169,7 @@ static unsigned int pmac_startup_irq(struct irq_data *d) static void pmac_mask_irq(struct irq_data *d) { unsigned long flags; - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); raw_spin_lock_irqsave(&pmac_pic_lock, flags); __clear_bit(src, ppc_cached_irq_mask); @@ -180,7 +180,7 @@ static void pmac_mask_irq(struct irq_data *d) static void pmac_unmask_irq(struct irq_data *d) { unsigned long flags; - unsigned int src = irq_map[d->irq].hwirq; + unsigned int src = irqd_to_hwirq(d); raw_spin_lock_irqsave(&pmac_pic_lock, flags); __set_bit(src, ppc_cached_irq_mask); @@ -193,7 +193,7 @@ static int pmac_retrigger(struct irq_data *d) unsigned long flags; raw_spin_lock_irqsave(&pmac_pic_lock, flags); - __pmac_retrigger(irq_map[d->irq].hwirq); + __pmac_retrigger(irqd_to_hwirq(d)); raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); return 1; } @@ -239,15 +239,12 @@ static unsigned int pmac_pic_get_irq(void) unsigned long bits = 0; unsigned long flags; -#ifdef CONFIG_SMP - void psurge_smp_message_recv(void); - - /* IPI's are a hack on the powersurge -- Cort */ - if ( smp_processor_id() != 0 ) { - psurge_smp_message_recv(); - return NO_IRQ_IGNORE; /* ignore, already handled */ +#ifdef CONFIG_PPC_PMAC32_PSURGE + /* IPI's are a hack on the powersurge -- Cort */ + if (smp_processor_id() != 0) { + return psurge_secondary_virq; } -#endif /* CONFIG_SMP */ +#endif /* CONFIG_PPC_PMAC32_PSURGE */ raw_spin_lock_irqsave(&pmac_pic_lock, flags); for (irq = max_real_irqs; (irq -= 32) >= 0; ) { int i = irq >> 5; @@ -677,7 +674,7 @@ not_found: return viaint; } -static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state) +static int pmacpic_suspend(void) { int viaint = pmacpic_find_viaint(); @@ -698,7 +695,7 @@ static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state) return 0; } -static int pmacpic_resume(struct sys_device *sysdev) +static void pmacpic_resume(void) { int i; @@ -709,39 +706,19 @@ static int pmacpic_resume(struct sys_device *sysdev) for (i = 0; i < max_real_irqs; ++i) if (test_bit(i, sleep_save_mask)) pmac_unmask_irq(irq_get_irq_data(i)); - - return 0; } -#endif /* CONFIG_PM && CONFIG_PPC32 */ - -static struct sysdev_class pmacpic_sysclass = { - .name = "pmac_pic", -}; - -static struct sys_device device_pmacpic = { - .id = 0, - .cls = &pmacpic_sysclass, -}; - -static struct sysdev_driver driver_pmacpic = { -#if defined(CONFIG_PM) && defined(CONFIG_PPC32) - .suspend = &pmacpic_suspend, - .resume = &pmacpic_resume, -#endif /* CONFIG_PM && CONFIG_PPC32 */ +static struct syscore_ops pmacpic_syscore_ops = { + .suspend = pmacpic_suspend, + .resume = pmacpic_resume, }; -static int __init init_pmacpic_sysfs(void) +static int __init init_pmacpic_syscore(void) { -#ifdef CONFIG_PPC32 - if (max_irqs == 0) - return -ENODEV; -#endif - printk(KERN_DEBUG "Registering pmac pic with sysfs...\n"); - sysdev_class_register(&pmacpic_sysclass); - sysdev_register(&device_pmacpic); - sysdev_driver_register(&pmacpic_sysclass, &driver_pmacpic); + register_syscore_ops(&pmacpic_syscore_ops); return 0; } -machine_subsys_initcall(powermac, init_pmacpic_sysfs); +machine_subsys_initcall(powermac, init_pmacpic_syscore); + +#endif /* CONFIG_PM && CONFIG_PPC32 */ diff --git a/arch/powerpc/platforms/powermac/pic.h b/arch/powerpc/platforms/powermac/pic.h deleted file mode 100644 index d622a8345aaa..000000000000 --- a/arch/powerpc/platforms/powermac/pic.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __PPC_PLATFORMS_PMAC_PIC_H -#define __PPC_PLATFORMS_PMAC_PIC_H - -#include <linux/irq.h> - -extern struct irq_chip pmac_pic; - -extern void pmac_pic_init(void); -extern int pmac_get_irq(void); - -#endif /* __PPC_PLATFORMS_PMAC_PIC_H */ diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 20468f49aec0..8327cce2bdb0 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -33,6 +33,7 @@ extern void pmac_setup_pci_dma(void); extern void pmac_check_ht_link(void); extern void pmac_setup_smp(void); +extern int psurge_secondary_virq; extern void low_cpu_die(void) __attribute__((noreturn)); extern int pmac_nvram_init(void); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index bc5f0dc6ae1e..db092d7c4c5b 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -70,7 +70,7 @@ static void (*pmac_tb_freeze)(int freeze); static u64 timebase; static int tb_req; -#ifdef CONFIG_PPC32 +#ifdef CONFIG_PPC_PMAC32_PSURGE /* * Powersurge (old powermac SMP) support. @@ -124,6 +124,10 @@ static volatile u32 __iomem *psurge_start; /* what sort of powersurge board we have */ static int psurge_type = PSURGE_NONE; +/* irq for secondary cpus to report */ +static struct irq_host *psurge_host; +int psurge_secondary_virq; + /* * Set and clear IPIs for powersurge. */ @@ -156,51 +160,52 @@ static inline void psurge_clr_ipi(int cpu) /* * On powersurge (old SMP powermac architecture) we don't have * separate IPIs for separate messages like openpic does. Instead - * we have a bitmap for each processor, where a 1 bit means that - * the corresponding message is pending for that processor. - * Ideally each cpu's entry would be in a different cache line. + * use the generic demux helpers * -- paulus. */ -static unsigned long psurge_smp_message[NR_CPUS]; - -void psurge_smp_message_recv(void) +static irqreturn_t psurge_ipi_intr(int irq, void *d) { - int cpu = smp_processor_id(); - int msg; + psurge_clr_ipi(smp_processor_id()); + smp_ipi_demux(); - /* clear interrupt */ - psurge_clr_ipi(cpu); - - if (num_online_cpus() < 2) - return; + return IRQ_HANDLED; +} - /* make sure there is a message there */ - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &psurge_smp_message[cpu])) - smp_message_recv(msg); +static void smp_psurge_cause_ipi(int cpu, unsigned long data) +{ + psurge_set_ipi(cpu); } -irqreturn_t psurge_primary_intr(int irq, void *d) +static int psurge_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw) { - psurge_smp_message_recv(); - return IRQ_HANDLED; + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq); + + return 0; } -static void smp_psurge_message_pass(int target, int msg) +struct irq_host_ops psurge_host_ops = { + .map = psurge_host_map, +}; + +static int psurge_secondary_ipi_init(void) { - int i; + int rc = -ENOMEM; - if (num_online_cpus() < 2) - return; + psurge_host = irq_alloc_host(NULL, IRQ_HOST_MAP_NOMAP, 0, + &psurge_host_ops, 0); - for_each_online_cpu(i) { - if (target == MSG_ALL - || (target == MSG_ALL_BUT_SELF && i != smp_processor_id()) - || target == i) { - set_bit(msg, &psurge_smp_message[i]); - psurge_set_ipi(i); - } - } + if (psurge_host) + psurge_secondary_virq = irq_create_direct_mapping(psurge_host); + + if (psurge_secondary_virq) + rc = request_irq(psurge_secondary_virq, psurge_ipi_intr, + IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL); + + if (rc) + pr_err("Failed to setup secondary cpu IPI\n"); + + return rc; } /* @@ -311,6 +316,9 @@ static int __init smp_psurge_probe(void) ncpus = 2; } + if (psurge_secondary_ipi_init()) + return 1; + psurge_start = ioremap(PSURGE_START, 4); psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4); @@ -329,7 +337,7 @@ static int __init smp_psurge_probe(void) return ncpus; } -static void __init smp_psurge_kick_cpu(int nr) +static int __init smp_psurge_kick_cpu(int nr) { unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8; unsigned long a, flags; @@ -394,11 +402,13 @@ static void __init smp_psurge_kick_cpu(int nr) psurge_set_ipi(1); if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354); + + return 0; } static struct irqaction psurge_irqaction = { - .handler = psurge_primary_intr, - .flags = IRQF_DISABLED, + .handler = psurge_ipi_intr, + .flags = IRQF_DISABLED|IRQF_PERCPU, .name = "primary IPI", }; @@ -437,14 +447,15 @@ void __init smp_psurge_give_timebase(void) /* PowerSurge-style Macs */ struct smp_ops_t psurge_smp_ops = { - .message_pass = smp_psurge_message_pass, + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = smp_psurge_cause_ipi, .probe = smp_psurge_probe, .kick_cpu = smp_psurge_kick_cpu, .setup_cpu = smp_psurge_setup_cpu, .give_timebase = smp_psurge_give_timebase, .take_timebase = smp_psurge_take_timebase, }; -#endif /* CONFIG_PPC32 - actually powersurge support */ +#endif /* CONFIG_PPC_PMAC32_PSURGE */ /* * Core 99 and later support @@ -791,14 +802,14 @@ static int __init smp_core99_probe(void) return ncpus; } -static void __devinit smp_core99_kick_cpu(int nr) +static int __devinit smp_core99_kick_cpu(int nr) { unsigned int save_vector; unsigned long target, flags; unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100); if (nr < 0 || nr > 3) - return; + return -ENOENT; if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); @@ -830,6 +841,8 @@ static void __devinit smp_core99_kick_cpu(int nr) local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); + + return 0; } static void __devinit smp_core99_setup_cpu(int cpu_nr) @@ -1002,7 +1015,7 @@ void __init pmac_setup_smp(void) of_node_put(np); smp_ops = &core99_smp_ops; } -#ifdef CONFIG_PPC32 +#ifdef CONFIG_PPC_PMAC32_PSURGE else { /* We have to set bits in cpu_possible_mask here since the * secondary CPU(s) aren't in the device tree. Various @@ -1015,7 +1028,7 @@ void __init pmac_setup_smp(void) set_cpu_possible(cpu, true); smp_ops = &psurge_smp_ops; } -#endif /* CONFIG_PPC32 */ +#endif /* CONFIG_PPC_PMAC32_PSURGE */ #ifdef CONFIG_HOTPLUG_CPU ppc_md.cpu_die = pmac_cpu_die; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index f2f6413b81d3..600ed2c0ed59 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -197,7 +197,7 @@ static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet, result = irq_set_chip_data(*virq, pd); if (result) { - pr_debug("%s:%d: set_irq_chip_data failed\n", + pr_debug("%s:%d: irq_set_chip_data failed\n", __func__, __LINE__); goto fail_set; } @@ -659,11 +659,6 @@ static void __maybe_unused _dump_mask(struct ps3_private *pd, static void dump_bmp(struct ps3_private* pd) {}; #endif /* defined(DEBUG) */ -static void ps3_host_unmap(struct irq_host *h, unsigned int virq) -{ - irq_set_chip_data(virq, NULL); -} - static int ps3_host_map(struct irq_host *h, unsigned int virq, irq_hw_number_t hwirq) { @@ -683,7 +678,6 @@ static int ps3_host_match(struct irq_host *h, struct device_node *np) static struct irq_host_ops ps3_host_ops = { .map = ps3_host_map, - .unmap = ps3_host_unmap, .match = ps3_host_match, }; diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index 51ffde40af2b..4c44794faac0 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -39,7 +39,7 @@ #define MSG_COUNT 4 static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs); -static void do_message_pass(int target, int msg) +static void ps3_smp_message_pass(int cpu, int msg) { int result; unsigned int virq; @@ -49,28 +49,12 @@ static void do_message_pass(int target, int msg) return; } - virq = per_cpu(ps3_ipi_virqs, target)[msg]; + virq = per_cpu(ps3_ipi_virqs, cpu)[msg]; result = ps3_send_event_locally(virq); if (result) DBG("%s:%d: ps3_send_event_locally(%d, %d) failed" - " (%d)\n", __func__, __LINE__, target, msg, result); -} - -static void ps3_smp_message_pass(int target, int msg) -{ - int cpu; - - if (target < NR_CPUS) - do_message_pass(target, msg); - else if (target == MSG_ALL_BUT_SELF) { - for_each_online_cpu(cpu) - if (cpu != smp_processor_id()) - do_message_pass(cpu, msg); - } else { - for_each_online_cpu(cpu) - do_message_pass(cpu, msg); - } + " (%d)\n", __func__, __LINE__, cpu, msg, result); } static int ps3_smp_probe(void) diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 39a472e9e80f..375a9f92158d 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -197,7 +197,7 @@ static void spu_unmap(struct spu *spu) * The current HV requires the spu shadow regs to be mapped with the * PTE page protection bits set as read-only (PP=3). This implementation * uses the low level __ioremap() to bypass the page protection settings - * inforced by ioremap_flags() to get the needed PTE bits set for the + * inforced by ioremap_prot() to get the needed PTE bits set for the * shadow regs. */ @@ -214,7 +214,7 @@ static int __init setup_areas(struct spu *spu) goto fail_ioremap; } - spu->local_store = (__force void *)ioremap_flags(spu->local_store_phys, + spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys, LS_SIZE, _PAGE_NO_CACHE); if (!spu->local_store) { diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 5b3da4b4ea79..71af4c5d6c05 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -3,7 +3,10 @@ config PPC_PSERIES bool "IBM pSeries & new (POWER5-based) iSeries" select MPIC select PCI_MSI - select XICS + select PPC_XICS + select PPC_ICP_NATIVE + select PPC_ICP_HV + select PPC_ICS_RTAS select PPC_I8259 select PPC_RTAS select PPC_RTAS_DAEMON @@ -47,6 +50,24 @@ config SCANLOG tristate "Scanlog dump interface" depends on RTAS_PROC && PPC_PSERIES +config IO_EVENT_IRQ + bool "IO Event Interrupt support" + depends on PPC_PSERIES + default y + help + Select this option, if you want to enable support for IO Event + interrupts. IO event interrupt is a mechanism provided by RTAS + to return information about hardware error and non-error events + which may need OS attention. RTAS returns events for multiple + event types and scopes. Device drivers can register their handlers + to receive events. + + This option will only enable the IO event platform code. You + will still need to enable or compile the actual drivers + that use this infrastruture to handle IO event interrupts. + + Say Y if you are unsure. + config LPARCFG bool "LPAR Configuration Data" depends on PPC_PSERIES || PPC_ISERIES diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index fc5237810ece..3556e402cbf5 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -5,7 +5,6 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_KEXEC) += kexec.o @@ -22,6 +21,7 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o +obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o ifeq ($(CONFIG_PPC_PSERIES),y) obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index c371bc06434b..e9190073bb97 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -52,10 +52,10 @@ static u8 dtl_event_mask = 0x7; /* - * Size of per-cpu log buffers. Default is just under 16 pages worth. + * Size of per-cpu log buffers. Firmware requires that the buffer does + * not cross a 4k boundary. */ -static int dtl_buf_entries = (16 * 85); - +static int dtl_buf_entries = N_DISPATCH_LOG; #ifdef CONFIG_VIRT_CPU_ACCOUNTING struct dtl_ring { @@ -151,7 +151,7 @@ static int dtl_start(struct dtl *dtl) /* Register our dtl buffer with the hypervisor. The HV expects the * buffer size to be passed in the second word of the buffer */ - ((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry); + ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES; hwcpu = get_hard_smp_processor_id(dtl->cpu); addr = __pa(dtl->buf); @@ -196,13 +196,15 @@ static int dtl_enable(struct dtl *dtl) long int rc; struct dtl_entry *buf = NULL; + if (!dtl_cache) + return -ENOMEM; + /* only allow one reader */ if (dtl->buf) return -EBUSY; n_entries = dtl_buf_entries; - buf = kmalloc_node(n_entries * sizeof(struct dtl_entry), - GFP_KERNEL, cpu_to_node(dtl->cpu)); + buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu)); if (!buf) { printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", __func__, dtl->cpu); @@ -223,7 +225,7 @@ static int dtl_enable(struct dtl *dtl) spin_unlock(&dtl->lock); if (rc) - kfree(buf); + kmem_cache_free(dtl_cache, buf); return rc; } @@ -231,7 +233,7 @@ static void dtl_disable(struct dtl *dtl) { spin_lock(&dtl->lock); dtl_stop(dtl); - kfree(dtl->buf); + kmem_cache_free(dtl_cache, dtl->buf); dtl->buf = NULL; dtl->buf_entries = 0; spin_unlock(&dtl->lock); @@ -365,7 +367,7 @@ static int dtl_init(void) event_mask_file = debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask); - buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600, + buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries); if (!event_mask_file || !buf_entries_file) { diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 89649173d3a3..46b55cf563e3 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -93,6 +93,7 @@ static int ibm_slot_error_detail; static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_bridge; +static int ibm_configure_pe; int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); @@ -261,6 +262,8 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) pci_regs_buf[0] = 0; rtas_pci_enable(pdn, EEH_THAW_MMIO); + rtas_configure_bridge(pdn); + eeh_restore_bars(pdn); loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); @@ -448,6 +451,39 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } +void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) +{ + struct device_node *dn; + + for_each_child_of_node(parent, dn) { + if (PCI_DN(dn)) { + + struct pci_dev *dev = PCI_DN(dn)->pcidev; + + if (dev && dev->driver) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); + } + } +} + +void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) +{ + struct pci_dev *dev; + dn = find_device_pe(dn); + + /* Back up one, since config addrs might be shared */ + if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + dn = dn->parent; + + dev = PCI_DN(dn)->pcidev; + if (dev) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); +} + /** * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze * @dn device node @@ -692,15 +728,24 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) if (pdn->eeh_pe_config_addr) config_addr = pdn->eeh_pe_config_addr; - rc = rtas_call(ibm_set_slot_reset,4,1, NULL, + rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid), state); - if (rc) - printk (KERN_WARNING "EEH: Unable to reset the failed slot," - " (%d) #RST=%d dn=%s\n", - rc, state, pdn->node->full_name); + + /* Fundamental-reset not supported on this PE, try hot-reset */ + if (rc == -8 && state == 3) { + rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), 1); + if (rc) + printk(KERN_WARNING + "EEH: Unable to reset the failed slot," + " #RST=%d dn=%s\n", + rc, pdn->node->full_name); + } } /** @@ -736,18 +781,21 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat /** * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second * @pdn: pci device node to be reset. - * - * Return 0 if success, else a non-zero value. */ static void __rtas_set_slot_reset(struct pci_dn *pdn) { - struct pci_dev *dev = pdn->pcidev; + unsigned int freset = 0; - /* Determine type of EEH reset required by device, - * default hot reset or fundamental reset - */ - if (dev && dev->needs_freset) + /* Determine type of EEH reset required for + * Partitionable Endpoint, a hot-reset (1) + * or a fundamental reset (3). + * A fundamental reset required by any device under + * Partitionable Endpoint trumps hot-reset. + */ + eeh_set_pe_freset(pdn->node, &freset); + + if (freset) rtas_pci_slot_reset(pdn, 3); else rtas_pci_slot_reset(pdn, 1); @@ -895,13 +943,20 @@ rtas_configure_bridge(struct pci_dn *pdn) { int config_addr; int rc; + int token; /* Use PE configuration address, if present */ config_addr = pdn->eeh_config_addr; if (pdn->eeh_pe_config_addr) config_addr = pdn->eeh_pe_config_addr; - rc = rtas_call(ibm_configure_bridge,3,1, NULL, + /* Use new configure-pe function, if supported */ + if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) + token = ibm_configure_pe; + else + token = ibm_configure_bridge; + + rc = rtas_call(token, 3, 1, NULL, config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); @@ -1077,6 +1132,7 @@ void __init eeh_init(void) ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); + ibm_configure_pe = rtas_token("ibm,configure-pe"); if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) return; diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index b8d70f5d9aa9..1b6cb10589e0 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -328,7 +328,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; - const char *location, *pci_str, *drv_str; + const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; frozen_dn = find_device_pe(event->dn); if (!frozen_dn) { @@ -364,13 +364,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) frozen_pdn = PCI_DN(frozen_dn); frozen_pdn->eeh_freeze_count++; - if (frozen_pdn->pcidev) { - pci_str = pci_name (frozen_pdn->pcidev); - drv_str = pcid_name (frozen_pdn->pcidev); - } else { - pci_str = eeh_pci_name(event->dev); - drv_str = pcid_name (event->dev); - } + pci_str = eeh_pci_name(event->dev); + drv_str = pcid_name(event->dev); if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; @@ -378,8 +373,17 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) printk(KERN_WARNING "EEH: This PCI device has failed %d times in the last hour:\n", frozen_pdn->eeh_freeze_count); + + if (frozen_pdn->pcidev) { + bus_pci_str = pci_name(frozen_pdn->pcidev); + bus_drv_str = pcid_name(frozen_pdn->pcidev); + printk(KERN_WARNING + "EEH: Bus location=%s driver=%s pci addr=%s\n", + location, bus_drv_str, bus_pci_str); + } + printk(KERN_WARNING - "EEH: location=%s driver=%s pci addr=%s\n", + "EEH: Device location=%s driver=%s pci addr=%s\n", location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index ef8c45489e20..46f13a3c5d09 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -19,6 +19,7 @@ */ #include <linux/kernel.h> +#include <linux/interrupt.h> #include <linux/delay.h> #include <linux/cpu.h> #include <asm/system.h> @@ -28,7 +29,7 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/pSeries_reconfig.h> -#include "xics.h" +#include <asm/xics.h> #include "plpar_wrappers.h" #include "offline_states.h" @@ -280,7 +281,7 @@ static int pseries_add_processor(struct device_node *np) } for_each_cpu(cpu, tmp) { - BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask)); + BUG_ON(cpu_present(cpu)); set_cpu_present(cpu, true); set_hard_smp_processor_id(cpu, *intserv++); } diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c new file mode 100644 index 000000000000..c829e6067d54 --- /dev/null +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -0,0 +1,231 @@ +/* + * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/list.h> +#include <linux/notifier.h> + +#include <asm/machdep.h> +#include <asm/rtas.h> +#include <asm/irq.h> +#include <asm/io_event_irq.h> + +#include "pseries.h" + +/* + * IO event interrupt is a mechanism provided by RTAS to return + * information about hardware error and non-error events. Device + * drivers can register their event handlers to receive events. + * Device drivers are expected to use atomic_notifier_chain_register() + * and atomic_notifier_chain_unregister() to register and unregister + * their event handlers. Since multiple IO event types and scopes + * share an IO event interrupt, the event handlers are called one + * by one until the IO event is claimed by one of the handlers. + * The event handlers are expected to return NOTIFY_OK if the + * event is handled by the event handler or NOTIFY_DONE if the + * event does not belong to the handler. + * + * Usage: + * + * Notifier function: + * #include <asm/io_event_irq.h> + * int event_handler(struct notifier_block *nb, unsigned long val, void *data) { + * p = (struct pseries_io_event_sect_data *) data; + * if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE; + * : + * : + * return NOTIFY_OK; + * } + * struct notifier_block event_nb = { + * .notifier_call = event_handler, + * } + * + * Registration: + * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb); + * + * Unregistration: + * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb); + */ + +ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list); +EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list); + +static int ioei_check_exception_token; + +/* pSeries event log format */ + +/* Two bytes ASCII section IDs */ +#define PSERIES_ELOG_SECT_ID_PRIV_HDR (('P' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_USER_HDR (('U' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC (('P' << 8) | 'S') +#define PSERIES_ELOG_SECT_ID_EXTENDED_UH (('E' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_FAILING_MTMS (('M' << 8) | 'T') +#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC (('S' << 8) | 'S') +#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR (('D' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_FW_ERROR (('S' << 8) | 'W') +#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID (('L' << 8) | 'P') +#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID (('L' << 8) | 'R') +#define PSERIES_ELOG_SECT_ID_HMC_ID (('H' << 8) | 'M') +#define PSERIES_ELOG_SECT_ID_EPOW (('E' << 8) | 'P') +#define PSERIES_ELOG_SECT_ID_IO_EVENT (('I' << 8) | 'E') +#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' << 8) | 'I') +#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D') + +/* Vendor specific Platform Event Log Format, Version 6, section header */ +struct pseries_elog_section { + uint16_t id; /* 0x00 2-byte ASCII section ID */ + uint16_t length; /* 0x02 Section length in bytes */ + uint8_t version; /* 0x04 Section version */ + uint8_t subtype; /* 0x05 Section subtype */ + uint16_t creator_component; /* 0x06 Creator component ID */ + uint8_t data[]; /* 0x08 Start of section data */ +}; + +static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; + +/** + * Find data portion of a specific section in RTAS extended event log. + * @elog: RTAS error/event log. + * @sect_id: secsion ID. + * + * Return: + * pointer to the section data of the specified section + * NULL if not found + */ +static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog, + uint16_t sect_id) +{ + struct rtas_ext_event_log_v6 *xelog = + (struct rtas_ext_event_log_v6 *) elog->buffer; + struct pseries_elog_section *sect; + unsigned char *p, *log_end; + + /* Check that we understand the format */ + if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) || + xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || + xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM) + return NULL; + + log_end = elog->buffer + elog->extended_log_length; + p = xelog->vendor_log; + while (p < log_end) { + sect = (struct pseries_elog_section *)p; + if (sect->id == sect_id) + return sect; + p += sect->length; + } + return NULL; +} + +/** + * Find the data portion of an IO Event section from event log. + * @elog: RTAS error/event log. + * + * Return: + * pointer to a valid IO event section data. NULL if not found. + */ +static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) +{ + struct pseries_elog_section *sect; + + /* We should only ever get called for io-event interrupts, but if + * we do get called for another type then something went wrong so + * make some noise about it. + * RTAS_TYPE_IO only exists in extended event log version 6 or later. + * No need to check event log version. + */ + if (unlikely(elog->type != RTAS_TYPE_IO)) { + printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d", + elog->type); + return NULL; + } + + sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT); + if (unlikely(!sect)) { + printk_once(KERN_WARNING "io_event_irq: RTAS extended event " + "log does not contain an IO Event section. " + "Could be a bug in system firmware!\n"); + return NULL; + } + return (struct pseries_io_event *) §->data; +} + +/* + * PAPR: + * - check-exception returns the first found error or event and clear that + * error or event so it is reported once. + * - Each interrupt returns one event. If a plateform chooses to report + * multiple events through a single interrupt, it must ensure that the + * interrupt remains asserted until check-exception has been used to + * process all out-standing events for that interrupt. + * + * Implementation notes: + * - Events must be processed in the order they are returned. Hence, + * sequential in nature. + * - The owner of an event is determined by combinations of scope, + * event type, and sub-type. There is no easy way to pre-sort clients + * by scope or event type alone. For example, Torrent ISR route change + * event is reported with scope 0x00 (Not Applicatable) rather than + * 0x3B (Torrent-hub). It is better to let the clients to identify + * who owns the the event. + */ + +static irqreturn_t ioei_interrupt(int irq, void *dev_id) +{ + struct pseries_io_event *event; + int rtas_rc; + + for (;;) { + rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL, + RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), + RTAS_IO_EVENTS, 1 /* Time Critical */, + __pa(ioei_rtas_buf), + RTAS_DATA_BUF_SIZE); + if (rtas_rc != 0) + break; + + event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf); + if (!event) + continue; + + atomic_notifier_call_chain(&pseries_ioei_notifier_list, + 0, event); + } + return IRQ_HANDLED; +} + +static int __init ioei_init(void) +{ + struct device_node *np; + + ioei_check_exception_token = rtas_token("check-exception"); + if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) { + pr_warning("IO Event IRQ not supported on this system !\n"); + return -ENODEV; + } + np = of_find_node_by_path("/event-sources/ibm,io-events"); + if (np) { + request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT"); + of_node_put(np); + } else { + pr_err("io_event_irq: No ibm,io-events on system! " + "IO Event interrupt disabled.\n"); + return -ENODEV; + } + return 0; +} +machine_subsys_initcall(pseries, ioei_init); + diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6d5412a18b26..01faab9456ca 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -659,15 +659,18 @@ static void remove_ddw(struct device_node *np) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddr_avail; + const u32 *ddw_avail; u64 liobn; int len, ret; - ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len); + ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); - if (!win64 || !ddr_avail || len < 3 * sizeof(u32)) + if (!win64) return; + if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + goto delprop; + dwp = win64->value; liobn = (u64)be32_to_cpu(dwp->liobn); @@ -681,28 +684,29 @@ static void remove_ddw(struct device_node *np) pr_debug("%s successfully cleared tces in window.\n", np->full_name); - ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn); + ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); if (ret) pr_warning("%s: failed to remove direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np->full_name, ret, ddr_avail[2], liobn); + np->full_name, ret, ddw_avail[2], liobn); else pr_debug("%s: successfully removed direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np->full_name, ret, ddr_avail[2], liobn); -} + np->full_name, ret, ddw_avail[2], liobn); +delprop: + ret = prom_remove_property(np, win64); + if (ret) + pr_warning("%s: failed to remove direct window property: %d\n", + np->full_name, ret); +} -static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn) +static u64 find_existing_ddw(struct device_node *pdn) { - struct device_node *dn; - struct pci_dn *pcidn; struct direct_window *window; const struct dynamic_dma_window_prop *direct64; u64 dma_addr = 0; - dn = pci_device_to_OF_node(dev); - pcidn = PCI_DN(dn); spin_lock(&direct_window_list_lock); /* check if we already created a window and dupe that config if so */ list_for_each_entry(window, &direct_window_list, list) { @@ -717,36 +721,40 @@ static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node * return dma_addr; } -static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn) +static int find_existing_ddw_windows(void) { - struct device_node *dn; - struct pci_dn *pcidn; int len; + struct device_node *pdn; struct direct_window *window; const struct dynamic_dma_window_prop *direct64; - u64 dma_addr = 0; - dn = pci_device_to_OF_node(dev); - pcidn = PCI_DN(dn); - direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); - if (direct64) { + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + for_each_node_with_property(pdn, DIRECT64_PROPNAME) { + direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); + if (!direct64) + continue; + window = kzalloc(sizeof(*window), GFP_KERNEL); - if (!window) { + if (!window || len < sizeof(struct dynamic_dma_window_prop)) { + kfree(window); remove_ddw(pdn); - } else { - window->device = pdn; - window->prop = direct64; - spin_lock(&direct_window_list_lock); - list_add(&window->list, &direct_window_list); - spin_unlock(&direct_window_list_lock); - dma_addr = direct64->dma_base; + continue; } + + window->device = pdn; + window->prop = direct64; + spin_lock(&direct_window_list_lock); + list_add(&window->list, &direct_window_list); + spin_unlock(&direct_window_list_lock); } - return dma_addr; + return 0; } +machine_arch_initcall(pseries, find_existing_ddw_windows); -static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, +static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_query_response *query) { struct device_node *dn; @@ -767,15 +775,15 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, if (pcidn->eeh_pe_config_addr) cfg_addr = pcidn->eeh_pe_config_addr; buid = pcidn->phb->buid; - ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query, + ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, cfg_addr, BUID_HI(buid), BUID_LO(buid)); dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" - " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid), + " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid), BUID_LO(buid), ret); return ret; } -static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, +static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_create_response *create, int page_shift, int window_shift) { @@ -800,12 +808,12 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr, + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " - "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1], + "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1], cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); @@ -831,18 +839,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddr_avail); + const u32 *uninitialized_var(ddw_avail); struct direct_window *window; - struct property *uninitialized_var(win64); + struct property *win64; struct dynamic_dma_window_prop *ddwprop; mutex_lock(&direct_window_init_mutex); - dma_addr = dupe_ddw_if_already_created(dev, pdn); - if (dma_addr != 0) - goto out_unlock; - - dma_addr = dupe_ddw_if_kexec(dev, pdn); + dma_addr = find_existing_ddw(pdn); if (dma_addr != 0) goto out_unlock; @@ -854,8 +858,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for the given node in that order. * the property is actually in the parent, not the PE */ - ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddr_avail || len < 3 * sizeof(u32)) + ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); + if (!ddw_avail || len < 3 * sizeof(u32)) goto out_unlock; /* @@ -865,7 +869,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * of page sizes: supported and supported for migrate-dma. */ dn = pci_device_to_OF_node(dev); - ret = query_ddw(dev, ddr_avail, &query); + ret = query_ddw(dev, ddw_avail, &query); if (ret != 0) goto out_unlock; @@ -907,13 +911,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) } win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); + win64->length = sizeof(*ddwprop); if (!win64->name || !win64->value) { dev_info(&dev->dev, "couldn't allocate property name and value\n"); goto out_free_prop; } - ret = create_ddw(dev, ddr_avail, &create, page_shift, len); + ret = create_ddw(dev, ddw_avail, &create, page_shift, len); if (ret != 0) goto out_free_prop; @@ -1021,13 +1026,16 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) const void *dma_window = NULL; u64 dma_offset; - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + if (!dev->dma_mask) return -EIO; + if (!dev_is_pci(dev)) + goto check_mask; + + pdev = to_pci_dev(dev); + /* only attempt to use a new window if 64-bit DMA is requested */ if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) { - pdev = to_pci_dev(dev); - dn = pci_device_to_OF_node(pdev); dev_dbg(dev, "node is %s\n", dn->full_name); @@ -1054,12 +1062,17 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) } } - /* fall-through to iommu ops */ - if (!ddw_enabled) { - dev_info(dev, "Using 32-bit DMA via iommu\n"); + /* fall back on iommu ops, restore table pointer with ops */ + if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) { + dev_info(dev, "Restoring 32-bit DMA via iommu\n"); set_dma_ops(dev, &dma_iommu_ops); + pci_dma_dev_setup_pSeriesLP(pdev); } +check_mask: + if (!dma_supported(dev, dma_mask)) + return -EIO; + *dev->dma_mask = dma_mask; return 0; } diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 77d38a5e2ff9..54cf3a4aa16b 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -7,15 +7,18 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/kernel.h> +#include <linux/interrupt.h> + #include <asm/machdep.h> #include <asm/page.h> #include <asm/firmware.h> #include <asm/kexec.h> #include <asm/mpic.h> +#include <asm/xics.h> #include <asm/smp.h> #include "pseries.h" -#include "xics.h" #include "plpar_wrappers.h" static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index ca5d5898d320..39e6e0a7b2fa 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -329,6 +329,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, /* Make pHyp happy */ if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~_PAGE_COHERENT; + if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) + flags |= H_COALESCE_CAND; lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); if (unlikely(lpar_rc == H_PTEG_FULL)) { @@ -573,7 +575,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long i, pix, rc; unsigned long flags = 0; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long va; unsigned long hash, index, shift, hidx, slot; @@ -771,3 +773,47 @@ out: local_irq_restore(flags); } #endif + +/** + * h_get_mpp + * H_GET_MPP hcall returns info in 7 parms + */ +int h_get_mpp(struct hvcall_mpp_data *mpp_data) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_MPP, retbuf); + + mpp_data->entitled_mem = retbuf[0]; + mpp_data->mapped_mem = retbuf[1]; + + mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + mpp_data->pool_num = retbuf[2] & 0xffff; + + mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; + mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; + mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; + + mpp_data->pool_size = retbuf[4]; + mpp_data->loan_request = retbuf[5]; + mpp_data->backing_mem = retbuf[6]; + + return rc; +} +EXPORT_SYMBOL(h_get_mpp); + +int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; + + rc = plpar_hcall9(H_GET_MPP_X, retbuf); + + mpp_x_data->coalesced_bytes = retbuf[0]; + mpp_x_data->pool_coalesced_bytes = retbuf[1]; + mpp_x_data->pool_purr_cycles = retbuf[2]; + mpp_x_data->pool_spurr_cycles = retbuf[3]; + + return rc; +} diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index d9801117124b..4bf21207d7d3 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -270,31 +270,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len, lbuf[1]); } -static inline long plpar_eoi(unsigned long xirr) -{ - return plpar_hcall_norets(H_EOI, xirr); -} - -static inline long plpar_cppr(unsigned long cppr) -{ - return plpar_hcall_norets(H_CPPR, cppr); -} - -static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) -{ - return plpar_hcall_norets(H_IPI, servernum, mfrr); -} - -static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall(H_XIRR, retbuf, cppr); - - *xirr_ret = retbuf[0]; - - return rc; -} - #endif /* _PSERIES_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index c55d7ad9c648..086d2ae4e06a 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -122,7 +122,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, - irq_map[irq].hwirq, + virq_to_hw(irq), RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, critical, __pa(&ras_log_buf), rtas_get_error_log_max()); @@ -157,7 +157,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, - irq_map[irq].hwirq, + virq_to_hw(irq), RTAS_INTERNAL_ERROR, 1 /*Time Critical */, __pa(&ras_log_buf), rtas_get_error_log_max()); @@ -227,7 +227,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) struct rtas_error_log *h, *errhdr = NULL; if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { - printk(KERN_ERR "FWNMI: corrupt r3\n"); + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); return NULL; } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 6c42cfde8415..593acceeff96 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -53,9 +53,9 @@ #include <asm/irq.h> #include <asm/time.h> #include <asm/nvram.h> -#include "xics.h" #include <asm/pmc.h> #include <asm/mpic.h> +#include <asm/xics.h> #include <asm/ppc-pci.h> #include <asm/i8259.h> #include <asm/udbg.h> @@ -205,6 +205,9 @@ static void __init pseries_mpic_init_IRQ(void) mpic_assign_isu(mpic, n, isuaddr); } + /* Setup top-level get_irq */ + ppc_md.get_irq = mpic_get_irq; + /* All ISUs are setup, complete initialization */ mpic_init(mpic); @@ -214,7 +217,7 @@ static void __init pseries_mpic_init_IRQ(void) static void __init pseries_xics_init_IRQ(void) { - xics_init_IRQ(); + xics_init(); pseries_setup_i8259_cascade(); } @@ -238,7 +241,6 @@ static void __init pseries_discover_pic(void) if (strstr(typep, "open-pic")) { pSeries_mpic_node = of_node_get(np); ppc_md.init_IRQ = pseries_mpic_init_IRQ; - ppc_md.get_irq = mpic_get_irq; setup_kexec_cpu_down_mpic(); smp_init_pseries_mpic(); return; @@ -276,6 +278,8 @@ static struct notifier_block pci_dn_reconfig_nb = { .notifier_call = pci_dn_reconfig_notifier, }; +struct kmem_cache *dtl_cache; + #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Allocate space for the dispatch trace log for all possible cpus @@ -287,18 +291,12 @@ static int alloc_dispatch_logs(void) int cpu, ret; struct paca_struct *pp; struct dtl_entry *dtl; - struct kmem_cache *dtl_cache; if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; - dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, - DISPATCH_LOG_BYTES, 0, NULL); - if (!dtl_cache) { - pr_warn("Failed to create dispatch trace log buffer cache\n"); - pr_warn("Stolen time statistics will be unreliable\n"); + if (!dtl_cache) return 0; - } for_each_possible_cpu(cpu) { pp = &paca[cpu]; @@ -332,10 +330,27 @@ static int alloc_dispatch_logs(void) return 0; } - -early_initcall(alloc_dispatch_logs); +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +static inline int alloc_dispatch_logs(void) +{ + return 0; +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +static int alloc_dispatch_log_kmem_cache(void) +{ + dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, NULL); + if (!dtl_cache) { + pr_warn("Failed to create dispatch trace log buffer cache\n"); + pr_warn("Stolen time statistics will be unreliable\n"); + return 0; + } + + return alloc_dispatch_logs(); +} +early_initcall(alloc_dispatch_log_kmem_cache); + static void __init pSeries_setup_arch(void) { /* Discover PIC type and setup ppc_md accordingly */ @@ -403,6 +418,16 @@ static int pseries_set_xdabr(unsigned long dabr) #define CMO_CHARACTERISTICS_TOKEN 44 #define CMO_MAXLENGTH 1026 +void pSeries_coalesce_init(void) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data)) + powerpc_firmware_features |= FW_FEATURE_XCMO; + else + powerpc_firmware_features &= ~FW_FEATURE_XCMO; +} + /** * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) @@ -472,6 +497,7 @@ void pSeries_cmo_feature_init(void) pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); powerpc_firmware_features |= FW_FEATURE_CMO; + pSeries_coalesce_init(); } else pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index a509c5292a67..fbffd7e47ab8 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -44,10 +44,11 @@ #include <asm/mpic.h> #include <asm/vdso_datapage.h> #include <asm/cputhreads.h> +#include <asm/mpic.h> +#include <asm/xics.h> #include "plpar_wrappers.h" #include "pseries.h" -#include "xics.h" #include "offline_states.h" @@ -136,7 +137,6 @@ out: return 1; } -#ifdef CONFIG_XICS static void __devinit smp_xics_setup_cpu(int cpu) { if (cpu != boot_cpuid) @@ -151,14 +151,13 @@ static void __devinit smp_xics_setup_cpu(int cpu) set_default_offline_state(cpu); #endif } -#endif /* CONFIG_XICS */ -static void __devinit smp_pSeries_kick_cpu(int nr) +static int __devinit smp_pSeries_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); if (!smp_startup_cpu(nr)) - return; + return -ENOENT; /* * The processor is currently spinning, waiting for the @@ -180,6 +179,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr) "Ret= %ld\n", nr, rc); } #endif + + return 0; } static int smp_pSeries_cpu_bootable(unsigned int nr) @@ -197,23 +198,22 @@ static int smp_pSeries_cpu_bootable(unsigned int nr) return 1; } -#ifdef CONFIG_MPIC + static struct smp_ops_t pSeries_mpic_smp_ops = { .message_pass = smp_mpic_message_pass, .probe = smp_mpic_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_mpic_setup_cpu, }; -#endif -#ifdef CONFIG_XICS + static struct smp_ops_t pSeries_xics_smp_ops = { - .message_pass = smp_xics_message_pass, - .probe = smp_xics_probe, + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */ + .probe = xics_smp_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_xics_setup_cpu, .cpu_bootable = smp_pSeries_cpu_bootable, }; -#endif /* This is called very early */ static void __init smp_init_pseries(void) @@ -245,14 +245,12 @@ static void __init smp_init_pseries(void) pr_debug(" <- smp_init_pSeries()\n"); } -#ifdef CONFIG_MPIC void __init smp_init_pseries_mpic(void) { smp_ops = &pSeries_mpic_smp_ops; smp_init_pseries(); } -#endif void __init smp_init_pseries_xics(void) { diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c deleted file mode 100644 index d6901334d66e..000000000000 --- a/arch/powerpc/platforms/pseries/xics.c +++ /dev/null @@ -1,949 +0,0 @@ -/* - * arch/powerpc/platforms/pseries/xics.c - * - * Copyright 2000 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/types.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/irq.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/radix-tree.h> -#include <linux/cpu.h> -#include <linux/msi.h> -#include <linux/of.h> -#include <linux/percpu.h> - -#include <asm/firmware.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/smp.h> -#include <asm/rtas.h> -#include <asm/hvcall.h> -#include <asm/machdep.h> - -#include "xics.h" -#include "plpar_wrappers.h" - -static struct irq_host *xics_host; - -#define XICS_IPI 2 -#define XICS_IRQ_SPURIOUS 0 - -/* Want a priority other than 0. Various HW issues require this. */ -#define DEFAULT_PRIORITY 5 - -/* - * Mark IPIs as higher priority so we can take them inside interrupts that - * arent marked IRQF_DISABLED - */ -#define IPI_PRIORITY 4 - -/* The least favored priority */ -#define LOWEST_PRIORITY 0xFF - -/* The number of priorities defined above */ -#define MAX_NUM_PRIORITIES 3 - -static unsigned int default_server = 0xFF; -static unsigned int default_distrib_server = 0; -static unsigned int interrupt_server_size = 8; - -/* RTAS service tokens */ -static int ibm_get_xive; -static int ibm_set_xive; -static int ibm_int_on; -static int ibm_int_off; - -struct xics_cppr { - unsigned char stack[MAX_NUM_PRIORITIES]; - int index; -}; - -static DEFINE_PER_CPU(struct xics_cppr, xics_cppr); - -/* Direct hardware low level accessors */ - -/* The part of the interrupt presentation layer that we care about */ -struct xics_ipl { - union { - u32 word; - u8 bytes[4]; - } xirr_poll; - union { - u32 word; - u8 bytes[4]; - } xirr; - u32 dummy; - union { - u32 word; - u8 bytes[4]; - } qirr; -}; - -static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; - -static inline unsigned int direct_xirr_info_get(void) -{ - int cpu = smp_processor_id(); - - return in_be32(&xics_per_cpu[cpu]->xirr.word); -} - -static inline void direct_xirr_info_set(unsigned int value) -{ - int cpu = smp_processor_id(); - - out_be32(&xics_per_cpu[cpu]->xirr.word, value); -} - -static inline void direct_cppr_info(u8 value) -{ - int cpu = smp_processor_id(); - - out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value); -} - -static inline void direct_qirr_info(int n_cpu, u8 value) -{ - out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); -} - - -/* LPAR low level accessors */ - -static inline unsigned int lpar_xirr_info_get(unsigned char cppr) -{ - unsigned long lpar_rc; - unsigned long return_value; - - lpar_rc = plpar_xirr(&return_value, cppr); - if (lpar_rc != H_SUCCESS) - panic(" bad return code xirr - rc = %lx\n", lpar_rc); - return (unsigned int)return_value; -} - -static inline void lpar_xirr_info_set(unsigned int value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_eoi(value); - if (lpar_rc != H_SUCCESS) - panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc, - value); -} - -static inline void lpar_cppr_info(u8 value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_cppr(value); - if (lpar_rc != H_SUCCESS) - panic("bad return code cppr - rc = %lx\n", lpar_rc); -} - -static inline void lpar_qirr_info(int n_cpu , u8 value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value); - if (lpar_rc != H_SUCCESS) - panic("bad return code qirr - rc = %lx\n", lpar_rc); -} - - -/* Interface to generic irq subsystem */ - -#ifdef CONFIG_SMP -/* - * For the moment we only implement delivery to all cpus or one cpu. - * - * If the requested affinity is cpu_all_mask, we set global affinity. - * If not we set it to the first cpu in the mask, even if multiple cpus - * are set. This is so things like irqbalance (which set core and package - * wide affinities) do the right thing. - */ -static int get_irq_server(unsigned int virq, const struct cpumask *cpumask, - unsigned int strict_check) -{ - - if (!distribute_irqs) - return default_server; - - if (!cpumask_subset(cpu_possible_mask, cpumask)) { - int server = cpumask_first_and(cpu_online_mask, cpumask); - - if (server < nr_cpu_ids) - return get_hard_smp_processor_id(server); - - if (strict_check) - return -1; - } - - /* - * Workaround issue with some versions of JS20 firmware that - * deliver interrupts to cpus which haven't been started. This - * happens when using the maxcpus= boot option. - */ - if (cpumask_equal(cpu_online_mask, cpu_present_mask)) - return default_distrib_server; - - return default_server; -} -#else -#define get_irq_server(virq, cpumask, strict_check) (default_server) -#endif - -static void xics_unmask_irq(struct irq_data *d) -{ - unsigned int hwirq; - int call_status; - int server; - - pr_devel("xics: unmask virq %d\n", d->irq); - - hwirq = (unsigned int)irq_map[d->irq].hwirq; - pr_devel(" -> map to hwirq 0x%x\n", hwirq); - if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS) - return; - - server = get_irq_server(d->irq, d->affinity, 0); - - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, server, - DEFAULT_PRIORITY); - if (call_status != 0) { - printk(KERN_ERR - "%s: ibm_set_xive irq %u server %x returned %d\n", - __func__, hwirq, server, call_status); - return; - } - - /* Now unmask the interrupt (often a no-op) */ - call_status = rtas_call(ibm_int_on, 1, 1, NULL, hwirq); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", - __func__, hwirq, call_status); - return; - } -} - -static unsigned int xics_startup(struct irq_data *d) -{ - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - if (d->msi_desc) - unmask_msi_irq(d); - - /* unmask it */ - xics_unmask_irq(d); - return 0; -} - -static void xics_mask_real_irq(unsigned int hwirq) -{ - int call_status; - - if (hwirq == XICS_IPI) - return; - - call_status = rtas_call(ibm_int_off, 1, 1, NULL, hwirq); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", - __func__, hwirq, call_status); - return; - } - - /* Have to set XIVE to 0xff to be able to remove a slot */ - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, - default_server, 0xff); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", - __func__, hwirq, call_status); - return; - } -} - -static void xics_mask_irq(struct irq_data *d) -{ - unsigned int hwirq; - - pr_devel("xics: mask virq %d\n", d->irq); - - hwirq = (unsigned int)irq_map[d->irq].hwirq; - if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS) - return; - xics_mask_real_irq(hwirq); -} - -static void xics_mask_unknown_vec(unsigned int vec) -{ - printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec); - xics_mask_real_irq(vec); -} - -static inline unsigned int xics_xirr_vector(unsigned int xirr) -{ - /* - * The top byte is the old cppr, to be restored on EOI. - * The remaining 24 bits are the vector. - */ - return xirr & 0x00ffffff; -} - -static void push_cppr(unsigned int vec) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) - return; - - if (vec == XICS_IPI) - os_cppr->stack[++os_cppr->index] = IPI_PRIORITY; - else - os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY; -} - -static unsigned int xics_get_irq_direct(void) -{ - unsigned int xirr = direct_xirr_info_get(); - unsigned int vec = xics_xirr_vector(xirr); - unsigned int irq; - - if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; - - irq = irq_radix_revmap_lookup(xics_host, vec); - if (likely(irq != NO_IRQ)) { - push_cppr(vec); - return irq; - } - - /* We don't have a linux mapping, so have rtas mask it. */ - xics_mask_unknown_vec(vec); - - /* We might learn about it later, so EOI it */ - direct_xirr_info_set(xirr); - return NO_IRQ; -} - -static unsigned int xics_get_irq_lpar(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]); - unsigned int vec = xics_xirr_vector(xirr); - unsigned int irq; - - if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; - - irq = irq_radix_revmap_lookup(xics_host, vec); - if (likely(irq != NO_IRQ)) { - push_cppr(vec); - return irq; - } - - /* We don't have a linux mapping, so have RTAS mask it. */ - xics_mask_unknown_vec(vec); - - /* We might learn about it later, so EOI it */ - lpar_xirr_info_set(xirr); - return NO_IRQ; -} - -static unsigned char pop_cppr(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - if (WARN_ON(os_cppr->index < 1)) - return LOWEST_PRIORITY; - - return os_cppr->stack[--os_cppr->index]; -} - -static void xics_eoi_direct(struct irq_data *d) -{ - unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq; - - iosync(); - direct_xirr_info_set((pop_cppr() << 24) | hwirq); -} - -static void xics_eoi_lpar(struct irq_data *d) -{ - unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq; - - iosync(); - lpar_xirr_info_set((pop_cppr() << 24) | hwirq); -} - -static int -xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) -{ - unsigned int hwirq; - int status; - int xics_status[2]; - int irq_server; - - hwirq = (unsigned int)irq_map[d->irq].hwirq; - if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS) - return -1; - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq); - - if (status) { - printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", - __func__, hwirq, status); - return -1; - } - - irq_server = get_irq_server(d->irq, cpumask, 1); - if (irq_server == -1) { - char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); - printk(KERN_WARNING - "%s: No online cpus in the mask %s for irq %d\n", - __func__, cpulist, d->irq); - return -1; - } - - status = rtas_call(ibm_set_xive, 3, 1, NULL, - hwirq, irq_server, xics_status[1]); - - if (status) { - printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", - __func__, hwirq, status); - return -1; - } - - return 0; -} - -static struct irq_chip xics_pic_direct = { - .name = "XICS", - .irq_startup = xics_startup, - .irq_mask = xics_mask_irq, - .irq_unmask = xics_unmask_irq, - .irq_eoi = xics_eoi_direct, - .irq_set_affinity = xics_set_affinity -}; - -static struct irq_chip xics_pic_lpar = { - .name = "XICS", - .irq_startup = xics_startup, - .irq_mask = xics_mask_irq, - .irq_unmask = xics_unmask_irq, - .irq_eoi = xics_eoi_lpar, - .irq_set_affinity = xics_set_affinity -}; - - -/* Interface to arch irq controller subsystem layer */ - -/* Points to the irq_chip we're actually using */ -static struct irq_chip *xics_irq_chip; - -static int xics_host_match(struct irq_host *h, struct device_node *node) -{ - /* IBM machines have interrupt parents of various funky types for things - * like vdevices, events, etc... The trick we use here is to match - * everything here except the legacy 8259 which is compatible "chrp,iic" - */ - return !of_device_is_compatible(node, "chrp,iic"); -} - -static int xics_host_map(struct irq_host *h, unsigned int virq, - irq_hw_number_t hw) -{ - pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); - - /* Insert the interrupt mapping into the radix tree for fast lookup */ - irq_radix_revmap_insert(xics_host, virq, hw); - - irq_set_status_flags(virq, IRQ_LEVEL); - irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq); - return 0; -} - -static int xics_host_xlate(struct irq_host *h, struct device_node *ct, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_flags) - -{ - /* Current xics implementation translates everything - * to level. It is not technically right for MSIs but this - * is irrelevant at this point. We might get smarter in the future - */ - *out_hwirq = intspec[0]; - *out_flags = IRQ_TYPE_LEVEL_LOW; - - return 0; -} - -static struct irq_host_ops xics_host_ops = { - .match = xics_host_match, - .map = xics_host_map, - .xlate = xics_host_xlate, -}; - -static void __init xics_init_host(void) -{ - if (firmware_has_feature(FW_FEATURE_LPAR)) - xics_irq_chip = &xics_pic_lpar; - else - xics_irq_chip = &xics_pic_direct; - - xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops, - XICS_IRQ_SPURIOUS); - BUG_ON(xics_host == NULL); - irq_set_default_host(xics_host); -} - - -/* Inter-processor interrupt support */ - -#ifdef CONFIG_SMP -/* - * XICS only has a single IPI, so encode the messages per CPU - */ -static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message); - -static inline void smp_xics_do_message(int cpu, int msg) -{ - unsigned long *tgt = &per_cpu(xics_ipi_message, cpu); - - set_bit(msg, tgt); - mb(); - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_qirr_info(cpu, IPI_PRIORITY); - else - direct_qirr_info(cpu, IPI_PRIORITY); -} - -void smp_xics_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - smp_xics_do_message(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - smp_xics_do_message(i, msg); - } - } -} - -static irqreturn_t xics_ipi_dispatch(int cpu) -{ - unsigned long *tgt = &per_cpu(xics_ipi_message, cpu); - - mb(); /* order mmio clearing qirr */ - while (*tgt) { - if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) { - smp_message_recv(PPC_MSG_CALL_FUNCTION); - } - if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) { - smp_message_recv(PPC_MSG_RESCHEDULE); - } - if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) { - smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE); - } -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) - if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) { - smp_message_recv(PPC_MSG_DEBUGGER_BREAK); - } -#endif - } - return IRQ_HANDLED; -} - -static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id) -{ - int cpu = smp_processor_id(); - - direct_qirr_info(cpu, 0xff); - - return xics_ipi_dispatch(cpu); -} - -static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id) -{ - int cpu = smp_processor_id(); - - lpar_qirr_info(cpu, 0xff); - - return xics_ipi_dispatch(cpu); -} - -static void xics_request_ipi(void) -{ - unsigned int ipi; - int rc; - - ipi = irq_create_mapping(xics_host, XICS_IPI); - BUG_ON(ipi == NO_IRQ); - - /* - * IPIs are marked IRQF_DISABLED as they must run with irqs - * disabled - */ - irq_set_handler(ipi, handle_percpu_irq); - if (firmware_has_feature(FW_FEATURE_LPAR)) - rc = request_irq(ipi, xics_ipi_action_lpar, - IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL); - else - rc = request_irq(ipi, xics_ipi_action_direct, - IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL); - BUG_ON(rc); -} - -int __init smp_xics_probe(void) -{ - xics_request_ipi(); - - return cpumask_weight(cpu_possible_mask); -} - -#endif /* CONFIG_SMP */ - - -/* Initialization */ - -static void xics_update_irq_servers(void) -{ - int i, j; - struct device_node *np; - u32 ilen; - const u32 *ireg; - u32 hcpuid; - - /* Find the server numbers for the boot cpu. */ - np = of_get_cpu_node(boot_cpuid, NULL); - BUG_ON(!np); - - ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen); - if (!ireg) { - of_node_put(np); - return; - } - - i = ilen / sizeof(int); - hcpuid = get_hard_smp_processor_id(boot_cpuid); - - /* Global interrupt distribution server is specified in the last - * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last - * entry fom this property for current boot cpu id and use it as - * default distribution server - */ - for (j = 0; j < i; j += 2) { - if (ireg[j] == hcpuid) { - default_server = hcpuid; - default_distrib_server = ireg[j+1]; - } - } - - of_node_put(np); -} - -static void __init xics_map_one_cpu(int hw_id, unsigned long addr, - unsigned long size) -{ - int i; - - /* This may look gross but it's good enough for now, we don't quite - * have a hard -> linux processor id matching. - */ - for_each_possible_cpu(i) { - if (!cpu_present(i)) - continue; - if (hw_id == get_hard_smp_processor_id(i)) { - xics_per_cpu[i] = ioremap(addr, size); - return; - } - } -} - -static void __init xics_init_one_node(struct device_node *np, - unsigned int *indx) -{ - unsigned int ilen; - const u32 *ireg; - - /* This code does the theorically broken assumption that the interrupt - * server numbers are the same as the hard CPU numbers. - * This happens to be the case so far but we are playing with fire... - * should be fixed one of these days. -BenH. - */ - ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL); - - /* Do that ever happen ? we'll know soon enough... but even good'old - * f80 does have that property .. - */ - WARN_ON(ireg == NULL); - if (ireg) { - /* - * set node starting index for this node - */ - *indx = *ireg; - } - ireg = of_get_property(np, "reg", &ilen); - if (!ireg) - panic("xics_init_IRQ: can't find interrupt reg property"); - - while (ilen >= (4 * sizeof(u32))) { - unsigned long addr, size; - - /* XXX Use proper OF parsing code here !!! */ - addr = (unsigned long)*ireg++ << 32; - ilen -= sizeof(u32); - addr |= *ireg++; - ilen -= sizeof(u32); - size = (unsigned long)*ireg++ << 32; - ilen -= sizeof(u32); - size |= *ireg++; - ilen -= sizeof(u32); - xics_map_one_cpu(*indx, addr, size); - (*indx)++; - } -} - -void __init xics_init_IRQ(void) -{ - struct device_node *np; - u32 indx = 0; - int found = 0; - const u32 *isize; - - ppc64_boot_msg(0x20, "XICS Init"); - - ibm_get_xive = rtas_token("ibm,get-xive"); - ibm_set_xive = rtas_token("ibm,set-xive"); - ibm_int_on = rtas_token("ibm,int-on"); - ibm_int_off = rtas_token("ibm,int-off"); - - for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") { - found = 1; - if (firmware_has_feature(FW_FEATURE_LPAR)) { - of_node_put(np); - break; - } - xics_init_one_node(np, &indx); - } - if (found == 0) - return; - - /* get the bit size of server numbers */ - found = 0; - - for_each_compatible_node(np, NULL, "ibm,ppc-xics") { - isize = of_get_property(np, "ibm,interrupt-server#-size", NULL); - - if (!isize) - continue; - - if (!found) { - interrupt_server_size = *isize; - found = 1; - } else if (*isize != interrupt_server_size) { - printk(KERN_WARNING "XICS: " - "mismatched ibm,interrupt-server#-size\n"); - interrupt_server_size = max(*isize, - interrupt_server_size); - } - } - - xics_update_irq_servers(); - xics_init_host(); - - if (firmware_has_feature(FW_FEATURE_LPAR)) - ppc_md.get_irq = xics_get_irq_lpar; - else - ppc_md.get_irq = xics_get_irq_direct; - - xics_setup_cpu(); - - ppc64_boot_msg(0x21, "XICS Done"); -} - -/* Cpu startup, shutdown, and hotplug */ - -static void xics_set_cpu_priority(unsigned char cppr) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - /* - * we only really want to set the priority when there's - * just one cppr value on the stack - */ - WARN_ON(os_cppr->index != 0); - - os_cppr->stack[0] = cppr; - - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_cppr_info(cppr); - else - direct_cppr_info(cppr); - iosync(); -} - -/* Have the calling processor join or leave the specified global queue */ -static void xics_set_cpu_giq(unsigned int gserver, unsigned int join) -{ - int index; - int status; - - if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL)) - return; - - index = (1UL << interrupt_server_size) - 1 - gserver; - - status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join); - - WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n", - GLOBAL_INTERRUPT_QUEUE, index, join, status); -} - -void xics_setup_cpu(void) -{ - xics_set_cpu_priority(LOWEST_PRIORITY); - - xics_set_cpu_giq(default_distrib_server, 1); -} - -void xics_teardown_cpu(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - int cpu = smp_processor_id(); - - /* - * we have to reset the cppr index to 0 because we're - * not going to return from the IPI - */ - os_cppr->index = 0; - xics_set_cpu_priority(0); - - /* Clear any pending IPI request */ - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_qirr_info(cpu, 0xff); - else - direct_qirr_info(cpu, 0xff); -} - -void xics_kexec_teardown_cpu(int secondary) -{ - xics_teardown_cpu(); - - /* - * we take the ipi irq but and never return so we - * need to EOI the IPI, but want to leave our priority 0 - * - * should we check all the other interrupts too? - * should we be flagging idle loop instead? - * or creating some task to be scheduled? - */ - - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_xirr_info_set((0x00 << 24) | XICS_IPI); - else - direct_xirr_info_set((0x00 << 24) | XICS_IPI); - - /* - * Some machines need to have at least one cpu in the GIQ, - * so leave the master cpu in the group. - */ - if (secondary) - xics_set_cpu_giq(default_distrib_server, 0); -} - -#ifdef CONFIG_HOTPLUG_CPU - -/* Interrupts are disabled. */ -void xics_migrate_irqs_away(void) -{ - int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id(); - int virq; - - /* If we used to be the default server, move to the new "boot_cpuid" */ - if (hw_cpu == default_server) - xics_update_irq_servers(); - - /* Reject any interrupt that was queued to us... */ - xics_set_cpu_priority(0); - - /* Remove ourselves from the global interrupt queue */ - xics_set_cpu_giq(default_distrib_server, 0); - - /* Allow IPIs again... */ - xics_set_cpu_priority(DEFAULT_PRIORITY); - - for_each_irq(virq) { - struct irq_desc *desc; - struct irq_chip *chip; - unsigned int hwirq; - int xics_status[2]; - int status; - unsigned long flags; - - /* We can't set affinity on ISA interrupts */ - if (virq < NUM_ISA_INTERRUPTS) - continue; - if (irq_map[virq].host != xics_host) - continue; - hwirq = (unsigned int)irq_map[virq].hwirq; - /* We need to get IPIs still. */ - if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS) - continue; - - desc = irq_to_desc(virq); - - /* We only need to migrate enabled IRQS */ - if (desc == NULL || desc->action == NULL) - continue; - - chip = irq_desc_get_chip(desc); - if (chip == NULL || chip->irq_set_affinity == NULL) - continue; - - raw_spin_lock_irqsave(&desc->lock, flags); - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq); - if (status) { - printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", - __func__, hwirq, status); - goto unlock; - } - - /* - * We only support delivery to all cpus or to one cpu. - * The irq has to be migrated only in the single cpu - * case. - */ - if (xics_status[0] != hw_cpu) - goto unlock; - - /* This is expected during cpu offline. */ - if (cpu_online(cpu)) - printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n", - virq, cpu); - - /* Reset affinity to all cpus */ - cpumask_setall(desc->irq_data.affinity); - chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true); -unlock: - raw_spin_unlock_irqrestore(&desc->lock, flags); - } -} -#endif diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h deleted file mode 100644 index d1d5a83039ae..000000000000 --- a/arch/powerpc/platforms/pseries/xics.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * arch/powerpc/platforms/pseries/xics.h - * - * Copyright 2000 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _POWERPC_KERNEL_XICS_H -#define _POWERPC_KERNEL_XICS_H - -extern void xics_init_IRQ(void); -extern void xics_setup_cpu(void); -extern void xics_teardown_cpu(void); -extern void xics_kexec_teardown_cpu(int secondary); -extern void xics_migrate_irqs_away(void); -extern int smp_xics_probe(void); -extern void smp_xics_message_pass(int target, int msg); - -#endif /* _POWERPC_KERNEL_XICS_H */ diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig new file mode 100644 index 000000000000..c3c48eb62cc1 --- /dev/null +++ b/arch/powerpc/platforms/wsp/Kconfig @@ -0,0 +1,28 @@ +config PPC_WSP + bool + default n + +menu "WSP platform selection" + depends on PPC_BOOK3E_64 + +config PPC_PSR2 + bool "PSR-2 platform" + select PPC_A2 + select GENERIC_TBSYNC + select PPC_SCOM + select EPAPR_BOOT + select PPC_WSP + select PPC_XICS + select PPC_ICP_NATIVE + default y + +endmenu + +config PPC_A2_DD2 + bool "Support for DD2 based A2/WSP systems" + depends on PPC_A2 + +config WORKAROUND_ERRATUM_463 + depends on PPC_A2_DD2 + bool "Workaround erratum 463" + default y diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile new file mode 100644 index 000000000000..095be73d6cd4 --- /dev/null +++ b/arch/powerpc/platforms/wsp/Makefile @@ -0,0 +1,6 @@ +ccflags-y += -mno-minimal-toc + +obj-y += setup.o ics.o +obj-$(CONFIG_PPC_PSR2) += psr2.o opb_pic.o +obj-$(CONFIG_PPC_WSP) += scom_wsp.o +obj-$(CONFIG_SMP) += smp.o scom_smp.o diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c new file mode 100644 index 000000000000..e53bd9e7b125 --- /dev/null +++ b/arch/powerpc/platforms/wsp/ics.c @@ -0,0 +1,712 @@ +/* + * Copyright 2008-2011 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/xics.h> + +#include "wsp.h" +#include "ics.h" + + +/* WSP ICS */ + +struct wsp_ics { + struct ics ics; + struct device_node *dn; + void __iomem *regs; + spinlock_t lock; + unsigned long *bitmap; + u32 chip_id; + u32 lsi_base; + u32 lsi_count; + u64 hwirq_start; + u64 count; +#ifdef CONFIG_SMP + int *hwirq_cpu_map; +#endif +}; + +#define to_wsp_ics(ics) container_of(ics, struct wsp_ics, ics) + +#define INT_SRC_LAYER_BUID_REG(base) ((base) + 0x00) +#define IODA_TBL_ADDR_REG(base) ((base) + 0x18) +#define IODA_TBL_DATA_REG(base) ((base) + 0x20) +#define XIVE_UPDATE_REG(base) ((base) + 0x28) +#define ICS_INT_CAPS_REG(base) ((base) + 0x30) + +#define TBL_AUTO_INCREMENT ((1UL << 63) | (1UL << 15)) +#define TBL_SELECT_XIST (1UL << 48) +#define TBL_SELECT_XIVT (1UL << 49) + +#define IODA_IRQ(irq) ((irq) & (0x7FFULL)) /* HRM 5.1.3.4 */ + +#define XIST_REQUIRED 0x8 +#define XIST_REJECTED 0x4 +#define XIST_PRESENTED 0x2 +#define XIST_PENDING 0x1 + +#define XIVE_SERVER_SHIFT 42 +#define XIVE_SERVER_MASK 0xFFFFULL +#define XIVE_PRIORITY_MASK 0xFFULL +#define XIVE_PRIORITY_SHIFT 32 +#define XIVE_WRITE_ENABLE (1ULL << 63) + +/* + * The docs refer to a 6 bit field called ChipID, which consists of a + * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero + * so we ignore it, and every where we use "chip id" in this code we + * mean the NodeID. + */ +#define WSP_ICS_CHIP_SHIFT 17 + + +static struct wsp_ics *ics_list; +static int num_ics; + +/* ICS Source controller accessors */ + +static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq) +{ + unsigned long flags; + u64 xive; + + spin_lock_irqsave(&ics->lock, flags); + out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq)); + xive = in_be64(IODA_TBL_DATA_REG(ics->regs)); + spin_unlock_irqrestore(&ics->lock, flags); + + return xive; +} + +static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive) +{ + xive &= ~XIVE_ADDR_MASK; + xive |= (irq & XIVE_ADDR_MASK); + xive |= XIVE_WRITE_ENABLE; + + out_be64(XIVE_UPDATE_REG(ics->regs), xive); +} + +static u64 xive_set_server(u64 xive, unsigned int server) +{ + u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT); + + xive &= mask; + xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT; + + return xive; +} + +static u64 xive_set_priority(u64 xive, unsigned int priority) +{ + u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT); + + xive &= mask; + xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT; + + return xive; +} + + +#ifdef CONFIG_SMP +/* Find logical CPUs within mask on a given chip and store result in ret */ +void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret) +{ + int cpu, chip; + struct device_node *cpu_dn, *dn; + const u32 *prop; + + cpumask_clear(ret); + for_each_cpu(cpu, mask) { + cpu_dn = of_get_cpu_node(cpu, NULL); + if (!cpu_dn) + continue; + + prop = of_get_property(cpu_dn, "at-node", NULL); + if (!prop) { + of_node_put(cpu_dn); + continue; + } + + dn = of_find_node_by_phandle(*prop); + of_node_put(cpu_dn); + + chip = wsp_get_chip_id(dn); + if (chip == chip_id) + cpumask_set_cpu(cpu, ret); + + of_node_put(dn); + } +} + +/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */ +static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, + const cpumask_t *affinity) +{ + cpumask_var_t avail, newmask; + int ret = -ENOMEM, cpu, cpu_rover = 0, target; + int index = hwirq - ics->hwirq_start; + unsigned int nodeid; + + BUG_ON(index < 0 || index >= ics->count); + + if (!ics->hwirq_cpu_map) + return -ENOMEM; + + if (!distribute_irqs) { + ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server; + return 0; + } + + /* Allocate needed CPU masks */ + if (!alloc_cpumask_var(&avail, GFP_KERNEL)) + goto ret; + if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) + goto freeavail; + + /* Find PBus attached to the source of this IRQ */ + nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */ + + /* Find CPUs that could handle this IRQ */ + if (affinity) + cpumask_and(avail, cpu_online_mask, affinity); + else + cpumask_copy(avail, cpu_online_mask); + + /* Narrow selection down to logical CPUs on the same chip */ + cpus_on_chip(nodeid, avail, newmask); + + /* Ensure we haven't narrowed it down to 0 */ + if (unlikely(cpumask_empty(newmask))) { + if (unlikely(cpumask_empty(avail))) { + ret = -1; + goto out; + } + cpumask_copy(newmask, avail); + } + + /* Choose a CPU out of those we narrowed it down to in round robin */ + target = hwirq % cpumask_weight(newmask); + for_each_cpu(cpu, newmask) { + if (cpu_rover++ >= target) { + ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu); + ret = 0; + goto out; + } + } + + /* Shouldn't happen */ + WARN_ON(1); + +out: + free_cpumask_var(newmask); +freeavail: + free_cpumask_var(avail); +ret: + if (ret < 0) { + ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask); + pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n", + hwirq, ics->hwirq_cpu_map[index]); + } + return ret; +} + +static void alloc_irq_map(struct wsp_ics *ics) +{ + int i; + + ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL); + if (!ics->hwirq_cpu_map) { + pr_warning("Allocate hwirq_cpu_map failed, " + "IRQ balancing disabled\n"); + return; + } + + for (i=0; i < ics->count; i++) + ics->hwirq_cpu_map[i] = xics_default_server; +} + +static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) +{ + int index = hwirq - ics->hwirq_start; + + BUG_ON(index < 0 || index >= ics->count); + + if (!ics->hwirq_cpu_map) + return xics_default_server; + + return ics->hwirq_cpu_map[index]; +} +#else /* !CONFIG_SMP */ +static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, + const cpumask_t *affinity) +{ + return 0; +} + +static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) +{ + return xics_default_server; +} + +static void alloc_irq_map(struct wsp_ics *ics) { } +#endif + +static void wsp_chip_unmask_irq(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct wsp_ics *ics; + int server; + u64 xive; + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return; + + ics = d->chip_data; + if (WARN_ON(!ics)) + return; + + server = get_irq_server(ics, hw_irq); + + xive = wsp_ics_get_xive(ics, hw_irq); + xive = xive_set_server(xive, server); + xive = xive_set_priority(xive, DEFAULT_PRIORITY); + wsp_ics_set_xive(ics, hw_irq, xive); +} + +static unsigned int wsp_chip_startup(struct irq_data *d) +{ + /* unmask it */ + wsp_chip_unmask_irq(d); + return 0; +} + +static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics) +{ + u64 xive; + + if (hw_irq == XICS_IPI) + return; + + if (WARN_ON(!ics)) + return; + xive = wsp_ics_get_xive(ics, hw_irq); + xive = xive_set_server(xive, xics_default_server); + xive = xive_set_priority(xive, LOWEST_PRIORITY); + wsp_ics_set_xive(ics, hw_irq, xive); +} + +static void wsp_chip_mask_irq(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct wsp_ics *ics = d->chip_data; + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return; + + wsp_mask_real_irq(hw_irq, ics); +} + +static int wsp_chip_set_affinity(struct irq_data *d, + const struct cpumask *cpumask, bool force) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct wsp_ics *ics; + int ret; + u64 xive; + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return -1; + + ics = d->chip_data; + if (WARN_ON(!ics)) + return -1; + xive = wsp_ics_get_xive(ics, hw_irq); + + /* + * For the moment only implement delivery to all cpus or one cpu. + * Get current irq_server for the given irq + */ + ret = cache_hwirq_map(ics, d->irq, cpumask); + if (ret == -1) { + char cpulist[128]; + cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); + pr_warning("%s: No online cpus in the mask %s for irq %d\n", + __func__, cpulist, d->irq); + return -1; + } else if (ret == -ENOMEM) { + pr_warning("%s: Out of memory\n", __func__); + return -1; + } + + xive = xive_set_server(xive, get_irq_server(ics, hw_irq)); + wsp_ics_set_xive(ics, hw_irq, xive); + + return 0; +} + +static struct irq_chip wsp_irq_chip = { + .name = "WSP ICS", + .irq_startup = wsp_chip_startup, + .irq_mask = wsp_chip_mask_irq, + .irq_unmask = wsp_chip_unmask_irq, + .irq_set_affinity = wsp_chip_set_affinity +}; + +static int wsp_ics_host_match(struct ics *ics, struct device_node *dn) +{ + /* All ICSs in the system implement a global irq number space, + * so match against them all. */ + return of_device_is_compatible(dn, "ibm,ppc-xics"); +} + +static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq) +{ + if (hwirq >= wsp_ics->hwirq_start && + hwirq < wsp_ics->hwirq_start + wsp_ics->count) + return 1; + + return 0; +} + +static int wsp_ics_map(struct ics *ics, unsigned int virq) +{ + struct wsp_ics *wsp_ics = to_wsp_ics(ics); + unsigned int hw_irq = virq_to_hw(virq); + unsigned long flags; + + if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) + return -ENOENT; + + irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq); + + irq_set_chip_data(virq, wsp_ics); + + spin_lock_irqsave(&wsp_ics->lock, flags); + bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0); + spin_unlock_irqrestore(&wsp_ics->lock, flags); + + return 0; +} + +static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq) +{ + struct wsp_ics *wsp_ics = to_wsp_ics(ics); + + if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) + return; + + pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq); + wsp_mask_real_irq(hw_irq, wsp_ics); +} + +static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq) +{ + struct wsp_ics *wsp_ics = to_wsp_ics(ics); + + if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) + return -ENOENT; + + return get_irq_server(wsp_ics, hw_irq); +} + +/* HW Number allocation API */ + +static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn) +{ + struct device_node *iparent; + int i; + + iparent = of_irq_find_parent(dn); + if (!iparent) { + pr_err("wsp_ics: Failed to find interrupt parent!\n"); + return NULL; + } + + for(i = 0; i < num_ics; i++) { + if(ics_list[i].dn == iparent) + break; + } + + if (i >= num_ics) { + pr_err("wsp_ics: Unable to find parent bitmap!\n"); + return NULL; + } + + return &ics_list[i]; +} + +int wsp_ics_alloc_irq(struct device_node *dn, int num) +{ + struct wsp_ics *ics; + int order, offset; + + ics = wsp_ics_find_dn_ics(dn); + if (!ics) + return -ENODEV; + + /* Fast, but overly strict if num isn't a power of two */ + order = get_count_order(num); + + spin_lock_irq(&ics->lock); + offset = bitmap_find_free_region(ics->bitmap, ics->count, order); + spin_unlock_irq(&ics->lock); + + if (offset < 0) + return offset; + + return offset + ics->hwirq_start; +} + +void wsp_ics_free_irq(struct device_node *dn, unsigned int irq) +{ + struct wsp_ics *ics; + + ics = wsp_ics_find_dn_ics(dn); + if (WARN_ON(!ics)) + return; + + spin_lock_irq(&ics->lock); + bitmap_release_region(ics->bitmap, irq, 0); + spin_unlock_irq(&ics->lock); +} + +/* Initialisation */ + +static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics, + struct device_node *dn) +{ + int len, i, j, size; + u32 start, count; + const u32 *p; + + size = BITS_TO_LONGS(ics->count) * sizeof(long); + ics->bitmap = kzalloc(size, GFP_KERNEL); + if (!ics->bitmap) { + pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n"); + return -ENOMEM; + } + + spin_lock_init(&ics->lock); + + p = of_get_property(dn, "available-ranges", &len); + if (!p || !len) { + /* FIXME this should be a WARN() once mambo is updated */ + pr_err("wsp_ics: No available-ranges defined for %s\n", + dn->full_name); + return 0; + } + + if (len % (2 * sizeof(u32)) != 0) { + /* FIXME this should be a WARN() once mambo is updated */ + pr_err("wsp_ics: Invalid available-ranges for %s\n", + dn->full_name); + return 0; + } + + bitmap_fill(ics->bitmap, ics->count); + + for (i = 0; i < len / sizeof(u32); i += 2) { + start = of_read_number(p + i, 1); + count = of_read_number(p + i + 1, 1); + + pr_devel("%s: start: %d count: %d\n", __func__, start, count); + + if ((start + count) > (ics->hwirq_start + ics->count) || + start < ics->hwirq_start) { + pr_err("wsp_ics: Invalid range! -> %d to %d\n", + start, start + count); + break; + } + + for (j = 0; j < count; j++) + bitmap_release_region(ics->bitmap, + (start + j) - ics->hwirq_start, 0); + } + + /* Ensure LSIs are not available for allocation */ + bitmap_allocate_region(ics->bitmap, ics->lsi_base, + get_count_order(ics->lsi_count)); + + return 0; +} + +static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn) +{ + u32 lsi_buid, msi_buid, msi_base, msi_count; + void __iomem *regs; + const u32 *p; + int rc, len, i; + u64 caps, buid; + + p = of_get_property(dn, "interrupt-ranges", &len); + if (!p || len < (2 * sizeof(u32))) { + pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n", + dn->full_name); + return -ENOENT; + } + + if (len > (2 * sizeof(u32))) { + pr_err("wsp_ics: Multiple ics ranges not supported.\n"); + return -EINVAL; + } + + regs = of_iomap(dn, 0); + if (!regs) { + pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name); + return -ENXIO; + } + + ics->hwirq_start = of_read_number(p, 1); + ics->count = of_read_number(p + 1, 1); + ics->regs = regs; + + ics->chip_id = wsp_get_chip_id(dn); + if (WARN_ON(ics->chip_id < 0)) + ics->chip_id = 0; + + /* Get some informations about the critter */ + caps = in_be64(ICS_INT_CAPS_REG(ics->regs)); + buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs)); + ics->lsi_count = caps >> 56; + msi_count = (caps >> 44) & 0x7ff; + + /* Note: LSI BUID is 9 bits, but really only 3 are BUID and the + * rest is mixed in the interrupt number. We store the whole + * thing though + */ + lsi_buid = (buid >> 48) & 0x1ff; + ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5; + msi_buid = (buid >> 37) & 0x7; + msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11; + + pr_info("wsp_ics: Found %s\n", dn->full_name); + pr_info("wsp_ics: irq range : 0x%06llx..0x%06llx\n", + ics->hwirq_start, ics->hwirq_start + ics->count - 1); + pr_info("wsp_ics: %4d LSIs : 0x%06x..0x%06x\n", + ics->lsi_count, ics->lsi_base, + ics->lsi_base + ics->lsi_count - 1); + pr_info("wsp_ics: %4d MSIs : 0x%06x..0x%06x\n", + msi_count, msi_base, + msi_base + msi_count - 1); + + /* Let's check the HW config is sane */ + if (ics->lsi_base < ics->hwirq_start || + (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count)) + pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n"); + if (msi_base < ics->hwirq_start || + (msi_base + msi_count) > (ics->hwirq_start + ics->count)) + pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n"); + + /* We don't check for overlap between LSI and MSI, which will happen + * if we use the same BUID, I'm not sure yet how legit that is. + */ + + rc = wsp_ics_bitmap_setup(ics, dn); + if (rc) { + iounmap(regs); + return rc; + } + + ics->dn = of_node_get(dn); + alloc_irq_map(ics); + + for(i = 0; i < ics->count; i++) + wsp_mask_real_irq(ics->hwirq_start + i, ics); + + ics->ics.map = wsp_ics_map; + ics->ics.mask_unknown = wsp_ics_mask_unknown; + ics->ics.get_server = wsp_ics_get_server; + ics->ics.host_match = wsp_ics_host_match; + + xics_register_ics(&ics->ics); + + return 0; +} + +static void __init wsp_ics_set_default_server(void) +{ + struct device_node *np; + u32 hwid; + + /* Find the server number for the boot cpu. */ + np = of_get_cpu_node(boot_cpuid, NULL); + BUG_ON(!np); + + hwid = get_hard_smp_processor_id(boot_cpuid); + + pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name); + xics_default_server = hwid; + + of_node_put(np); +} + +static int __init wsp_ics_init(void) +{ + struct device_node *dn; + struct wsp_ics *ics; + int rc, found; + + wsp_ics_set_default_server(); + + found = 0; + for_each_compatible_node(dn, NULL, "ibm,ppc-xics") + found++; + + if (found == 0) { + pr_err("wsp_ics: No ICS's found!\n"); + return -ENODEV; + } + + ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL); + if (!ics_list) { + pr_err("wsp_ics: No memory for structs.\n"); + return -ENOMEM; + } + + num_ics = 0; + ics = ics_list; + for_each_compatible_node(dn, NULL, "ibm,wsp-xics") { + rc = wsp_ics_setup(ics, dn); + if (rc == 0) { + ics++; + num_ics++; + } + } + + if (found != num_ics) { + pr_err("wsp_ics: Failed setting up %d ICS's\n", + found - num_ics); + return -1; + } + + return 0; +} + +void __init wsp_init_irq(void) +{ + wsp_ics_init(); + xics_init(); + + /* We need to patch our irq chip's EOI to point to the right ICP */ + wsp_irq_chip.irq_eoi = icp_ops->eoi; +} diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h new file mode 100644 index 000000000000..e34d53102640 --- /dev/null +++ b/arch/powerpc/platforms/wsp/ics.h @@ -0,0 +1,20 @@ +/* + * Copyright 2009 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ICS_H +#define __ICS_H + +#define XIVE_ADDR_MASK 0x7FFULL + +extern void wsp_init_irq(void); + +extern int wsp_ics_alloc_irq(struct device_node *dn, int num); +extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq); + +#endif /* __ICS_H */ diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c new file mode 100644 index 000000000000..be05631a3c1c --- /dev/null +++ b/arch/powerpc/platforms/wsp/opb_pic.c @@ -0,0 +1,332 @@ +/* + * IBM Onboard Peripheral Bus Interrupt Controller + * + * Copyright 2010 Jack Miller, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/time.h> + +#include <asm/reg_a2.h> +#include <asm/irq.h> + +#define OPB_NR_IRQS 32 + +#define OPB_MLSASIER 0x04 /* MLS Accumulated Status IER */ +#define OPB_MLSIR 0x50 /* MLS Interrupt Register */ +#define OPB_MLSIER 0x54 /* MLS Interrupt Enable Register */ +#define OPB_MLSIPR 0x58 /* MLS Interrupt Polarity Register */ +#define OPB_MLSIIR 0x5c /* MLS Interrupt Inputs Register */ + +static int opb_index = 0; + +struct opb_pic { + struct irq_host *host; + void *regs; + int index; + spinlock_t lock; +}; + +static u32 opb_in(struct opb_pic *opb, int offset) +{ + return in_be32(opb->regs + offset); +} + +static void opb_out(struct opb_pic *opb, int offset, u32 val) +{ + out_be32(opb->regs + offset, val); +} + +static void opb_unmask_irq(struct irq_data *d) +{ + struct opb_pic *opb; + unsigned long flags; + u32 ier, bitset; + + opb = d->chip_data; + bitset = (1 << (31 - irqd_to_hwirq(d))); + + spin_lock_irqsave(&opb->lock, flags); + + ier = opb_in(opb, OPB_MLSIER); + opb_out(opb, OPB_MLSIER, ier | bitset); + ier = opb_in(opb, OPB_MLSIER); + + spin_unlock_irqrestore(&opb->lock, flags); +} + +static void opb_mask_irq(struct irq_data *d) +{ + struct opb_pic *opb; + unsigned long flags; + u32 ier, mask; + + opb = d->chip_data; + mask = ~(1 << (31 - irqd_to_hwirq(d))); + + spin_lock_irqsave(&opb->lock, flags); + + ier = opb_in(opb, OPB_MLSIER); + opb_out(opb, OPB_MLSIER, ier & mask); + ier = opb_in(opb, OPB_MLSIER); // Flush posted writes + + spin_unlock_irqrestore(&opb->lock, flags); +} + +static void opb_ack_irq(struct irq_data *d) +{ + struct opb_pic *opb; + unsigned long flags; + u32 bitset; + + opb = d->chip_data; + bitset = (1 << (31 - irqd_to_hwirq(d))); + + spin_lock_irqsave(&opb->lock, flags); + + opb_out(opb, OPB_MLSIR, bitset); + opb_in(opb, OPB_MLSIR); // Flush posted writes + + spin_unlock_irqrestore(&opb->lock, flags); +} + +static void opb_mask_ack_irq(struct irq_data *d) +{ + struct opb_pic *opb; + unsigned long flags; + u32 bitset; + u32 ier, ir; + + opb = d->chip_data; + bitset = (1 << (31 - irqd_to_hwirq(d))); + + spin_lock_irqsave(&opb->lock, flags); + + ier = opb_in(opb, OPB_MLSIER); + opb_out(opb, OPB_MLSIER, ier & ~bitset); + ier = opb_in(opb, OPB_MLSIER); // Flush posted writes + + opb_out(opb, OPB_MLSIR, bitset); + ir = opb_in(opb, OPB_MLSIR); // Flush posted writes + + spin_unlock_irqrestore(&opb->lock, flags); +} + +static int opb_set_irq_type(struct irq_data *d, unsigned int flow) +{ + struct opb_pic *opb; + unsigned long flags; + int invert, ipr, mask, bit; + + opb = d->chip_data; + + /* The only information we're interested in in the type is whether it's + * a high or low trigger. For high triggered interrupts, the polarity + * set for it in the MLS Interrupt Polarity Register is 0, for low + * interrupts it's 1 so that the proper input in the MLS Interrupt Input + * Register is interrupted as asserting the interrupt. */ + + switch (flow) { + case IRQ_TYPE_NONE: + opb_mask_irq(d); + return 0; + + case IRQ_TYPE_LEVEL_HIGH: + invert = 0; + break; + + case IRQ_TYPE_LEVEL_LOW: + invert = 1; + break; + + default: + return -EINVAL; + } + + bit = (1 << (31 - irqd_to_hwirq(d))); + mask = ~bit; + + spin_lock_irqsave(&opb->lock, flags); + + ipr = opb_in(opb, OPB_MLSIPR); + ipr = (ipr & mask) | (invert ? bit : 0); + opb_out(opb, OPB_MLSIPR, ipr); + ipr = opb_in(opb, OPB_MLSIPR); // Flush posted writes + + spin_unlock_irqrestore(&opb->lock, flags); + + /* Record the type in the interrupt descriptor */ + irqd_set_trigger_type(d, flow); + + return 0; +} + +static struct irq_chip opb_irq_chip = { + .name = "OPB", + .irq_mask = opb_mask_irq, + .irq_unmask = opb_unmask_irq, + .irq_mask_ack = opb_mask_ack_irq, + .irq_ack = opb_ack_irq, + .irq_set_type = opb_set_irq_type +}; + +static int opb_host_map(struct irq_host *host, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct opb_pic *opb; + + opb = host->host_data; + + /* Most of the important stuff is handled by the generic host code, like + * the lookup, so just attach some info to the virtual irq */ + + irq_set_chip_data(virq, opb); + irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq); + irq_set_irq_type(virq, IRQ_TYPE_NONE); + + return 0; +} + +static int opb_host_xlate(struct irq_host *host, struct device_node *dn, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) +{ + /* Interrupt size must == 2 */ + BUG_ON(intsize != 2); + *out_hwirq = intspec[0]; + *out_type = intspec[1]; + return 0; +} + +static struct irq_host_ops opb_host_ops = { + .map = opb_host_map, + .xlate = opb_host_xlate, +}; + +irqreturn_t opb_irq_handler(int irq, void *private) +{ + struct opb_pic *opb; + u32 ir, src, subvirq; + + opb = (struct opb_pic *) private; + + /* Read the OPB MLS Interrupt Register for + * asserted interrupts */ + ir = opb_in(opb, OPB_MLSIR); + if (!ir) + return IRQ_NONE; + + do { + /* Get 1 - 32 source, *NOT* bit */ + src = 32 - ffs(ir); + + /* Translate from the OPB's conception of interrupt number to + * Linux's virtual IRQ */ + + subvirq = irq_linear_revmap(opb->host, src); + + generic_handle_irq(subvirq); + } while ((ir = opb_in(opb, OPB_MLSIR))); + + return IRQ_HANDLED; +} + +struct opb_pic *opb_pic_init_one(struct device_node *dn) +{ + struct opb_pic *opb; + struct resource res; + + if (of_address_to_resource(dn, 0, &res)) { + printk(KERN_ERR "opb: Couldn't translate resource\n"); + return NULL; + } + + opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL); + if (!opb) { + printk(KERN_ERR "opb: Failed to allocate opb struct!\n"); + return NULL; + } + + /* Get access to the OPB MMIO registers */ + opb->regs = ioremap(res.start + 0x10000, 0x1000); + if (!opb->regs) { + printk(KERN_ERR "opb: Failed to allocate register space!\n"); + goto free_opb; + } + + /* Allocate an irq host so that Linux knows that despite only + * having one interrupt to issue, we're the controller for multiple + * hardware IRQs, so later we can lookup their virtual IRQs. */ + + opb->host = irq_alloc_host(dn, IRQ_HOST_MAP_LINEAR, + OPB_NR_IRQS, &opb_host_ops, -1); + + if (!opb->host) { + printk(KERN_ERR "opb: Failed to allocate IRQ host!\n"); + goto free_regs; + } + + opb->index = opb_index++; + spin_lock_init(&opb->lock); + opb->host->host_data = opb; + + /* Disable all interrupts by default */ + opb_out(opb, OPB_MLSASIER, 0); + opb_out(opb, OPB_MLSIER, 0); + + /* ACK any interrupts left by FW */ + opb_out(opb, OPB_MLSIR, 0xFFFFFFFF); + + return opb; + +free_regs: + iounmap(opb->regs); +free_opb: + kfree(opb); + return NULL; +} + +void __init opb_pic_init(void) +{ + struct device_node *dn; + struct opb_pic *opb; + int virq; + int rc; + + /* Call init_one for each OPB device */ + for_each_compatible_node(dn, NULL, "ibm,opb") { + + /* Fill in an OPB struct */ + opb = opb_pic_init_one(dn); + if (!opb) { + printk(KERN_WARNING "opb: Failed to init node, skipped!\n"); + continue; + } + + /* Map / get opb's hardware virtual irq */ + virq = irq_of_parse_and_map(dn, 0); + if (virq <= 0) { + printk("opb: irq_op_parse_and_map failed!\n"); + continue; + } + + /* Attach opb interrupt handler to new virtual IRQ */ + rc = request_irq(virq, opb_irq_handler, 0, "OPB LS Cascade", opb); + if (rc) { + printk("opb: request_irq failed: %d\n", rc); + continue; + } + + printk("OPB%d init with %d IRQs at %p\n", opb->index, + OPB_NR_IRQS, opb->regs); + } +} diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c new file mode 100644 index 000000000000..40f28916ff6c --- /dev/null +++ b/arch/powerpc/platforms/wsp/psr2.c @@ -0,0 +1,95 @@ +/* + * Copyright 2008-2011, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/of.h> +#include <linux/smp.h> + +#include <asm/machdep.h> +#include <asm/system.h> +#include <asm/time.h> +#include <asm/udbg.h> + +#include "ics.h" +#include "wsp.h" + + +static void psr2_spin(void) +{ + hard_irq_disable(); + for (;;) ; +} + +static void psr2_restart(char *cmd) +{ + psr2_spin(); +} + +static int psr2_probe_devices(void) +{ + struct device_node *np; + + /* Our RTC is a ds1500. It seems to be programatically compatible + * with the ds1511 for which we have a driver so let's use that + */ + np = of_find_compatible_node(NULL, NULL, "dallas,ds1500"); + if (np != NULL) { + struct resource res; + if (of_address_to_resource(np, 0, &res) == 0) + platform_device_register_simple("ds1511", 0, &res, 1); + } + return 0; +} +machine_arch_initcall(psr2_md, psr2_probe_devices); + +static void __init psr2_setup_arch(void) +{ + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + scom_init_wsp(); + + /* Setup SMP callback */ +#ifdef CONFIG_SMP + a2_setup_smp(); +#endif +} + +static int __init psr2_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + if (!of_flat_dt_is_compatible(root, "ibm,psr2")) + return 0; + + return 1; +} + +static void __init psr2_init_irq(void) +{ + wsp_init_irq(); + opb_pic_init(); +} + +define_machine(psr2_md) { + .name = "PSR2 A2", + .probe = psr2_probe, + .setup_arch = psr2_setup_arch, + .restart = psr2_restart, + .power_off = psr2_spin, + .halt = psr2_spin, + .calibrate_decr = generic_calibrate_decr, + .init_IRQ = psr2_init_irq, + .progress = udbg_progress, + .power_save = book3e_idle, +}; diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c new file mode 100644 index 000000000000..141e78032097 --- /dev/null +++ b/arch/powerpc/platforms/wsp/scom_smp.c @@ -0,0 +1,427 @@ +/* + * SCOM support for A2 platforms + * + * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson, + * Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/cpumask.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/cputhreads.h> +#include <asm/reg_a2.h> +#include <asm/scom.h> +#include <asm/udbg.h> + +#include "wsp.h" + +#define SCOM_RAMC 0x2a /* Ram Command */ +#define SCOM_RAMC_TGT1_EXT 0x80000000 +#define SCOM_RAMC_SRC1_EXT 0x40000000 +#define SCOM_RAMC_SRC2_EXT 0x20000000 +#define SCOM_RAMC_SRC3_EXT 0x10000000 +#define SCOM_RAMC_ENABLE 0x00080000 +#define SCOM_RAMC_THREADSEL 0x00060000 +#define SCOM_RAMC_EXECUTE 0x00010000 +#define SCOM_RAMC_MSR_OVERRIDE 0x00008000 +#define SCOM_RAMC_MSR_PR 0x00004000 +#define SCOM_RAMC_MSR_GS 0x00002000 +#define SCOM_RAMC_FORCE 0x00001000 +#define SCOM_RAMC_FLUSH 0x00000800 +#define SCOM_RAMC_INTERRUPT 0x00000004 +#define SCOM_RAMC_ERROR 0x00000002 +#define SCOM_RAMC_DONE 0x00000001 +#define SCOM_RAMI 0x29 /* Ram Instruction */ +#define SCOM_RAMIC 0x28 /* Ram Instruction and Command */ +#define SCOM_RAMIC_INSN 0xffffffff00000000 +#define SCOM_RAMD 0x2d /* Ram Data */ +#define SCOM_RAMDH 0x2e /* Ram Data High */ +#define SCOM_RAMDL 0x2f /* Ram Data Low */ +#define SCOM_PCCR0 0x33 /* PC Configuration Register 0 */ +#define SCOM_PCCR0_ENABLE_DEBUG 0x80000000 +#define SCOM_PCCR0_ENABLE_RAM 0x40000000 +#define SCOM_THRCTL 0x30 /* Thread Control and Status */ +#define SCOM_THRCTL_T0_STOP 0x80000000 +#define SCOM_THRCTL_T1_STOP 0x40000000 +#define SCOM_THRCTL_T2_STOP 0x20000000 +#define SCOM_THRCTL_T3_STOP 0x10000000 +#define SCOM_THRCTL_T0_STEP 0x08000000 +#define SCOM_THRCTL_T1_STEP 0x04000000 +#define SCOM_THRCTL_T2_STEP 0x02000000 +#define SCOM_THRCTL_T3_STEP 0x01000000 +#define SCOM_THRCTL_T0_RUN 0x00800000 +#define SCOM_THRCTL_T1_RUN 0x00400000 +#define SCOM_THRCTL_T2_RUN 0x00200000 +#define SCOM_THRCTL_T3_RUN 0x00100000 +#define SCOM_THRCTL_T0_PM 0x00080000 +#define SCOM_THRCTL_T1_PM 0x00040000 +#define SCOM_THRCTL_T2_PM 0x00020000 +#define SCOM_THRCTL_T3_PM 0x00010000 +#define SCOM_THRCTL_T0_UDE 0x00008000 +#define SCOM_THRCTL_T1_UDE 0x00004000 +#define SCOM_THRCTL_T2_UDE 0x00002000 +#define SCOM_THRCTL_T3_UDE 0x00001000 +#define SCOM_THRCTL_ASYNC_DIS 0x00000800 +#define SCOM_THRCTL_TB_DIS 0x00000400 +#define SCOM_THRCTL_DEC_DIS 0x00000200 +#define SCOM_THRCTL_AND 0x31 /* Thread Control and Status */ +#define SCOM_THRCTL_OR 0x32 /* Thread Control and Status */ + + +static DEFINE_PER_CPU(scom_map_t, scom_ptrs); + +static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread) +{ + scom_map_t scom = per_cpu(scom_ptrs, cpu); + int tcpu; + + if (scom_map_ok(scom)) { + *first_thread = 0; + return scom; + } + + *first_thread = 1; + + scom = scom_map_device(np, 0); + + for (tcpu = cpu_first_thread_sibling(cpu); + tcpu <= cpu_last_thread_sibling(cpu); tcpu++) + per_cpu(scom_ptrs, tcpu) = scom; + + /* Hack: for the boot core, this will actually get called on + * the second thread up, not the first so our test above will + * set first_thread incorrectly. */ + if (cpu_first_thread_sibling(cpu) == 0) + *first_thread = 0; + + return scom; +} + +static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask) +{ + u64 cmd, mask, val; + int n = 0; + + cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28) + | ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE; + mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR; + + scom_write(scom, SCOM_RAMIC, cmd); + + while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) { + pr_devel("Waiting on RAMC = 0x%llx\n", val); + if (++n == 3) { + pr_err("RAMC timeout on instruction 0x%08x, thread %d\n", + insn, thread); + return -1; + } + } + + if (val & SCOM_RAMC_INTERRUPT) { + pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n", + insn, thread); + return -SCOM_RAMC_INTERRUPT; + } + + if (val & SCOM_RAMC_ERROR) { + pr_err("RAMC error on instruction 0x%08x, thread %d\n", + insn, thread); + return -SCOM_RAMC_ERROR; + } + + return 0; +} + +static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt, + u64 *out_gpr) +{ + int rc; + + /* or rN, rN, rN */ + u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11); + rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0); + if (rc) + return rc; + + *out_gpr = scom_read(scom, SCOM_RAMD); + + return 0; +} + +static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr) +{ + int rc, sprhi, sprlo; + u32 insn; + + sprhi = spr >> 5; + sprlo = spr & 0x1f; + insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */ + + if (spr == 0x0ff0) + insn = 0x7c2000a6; /* mfmsr r1 */ + + rc = a2_scom_ram(scom, thread, insn, 0xf); + if (rc) + return rc; + return a2_scom_getgpr(scom, thread, 1, 1, out_spr); +} + +static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr, + int alt, u64 val) +{ + u32 lis = 0x3c000000 | (gpr << 21); + u32 li = 0x38000000 | (gpr << 21); + u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16); + u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16); + u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16); + u32 highest = val >> 48; + u32 higher = (val >> 32) & 0xffff; + u32 high = (val >> 16) & 0xffff; + u32 low = val & 0xffff; + int lext = alt ? 0x8 : 0x0; + int oext = alt ? 0xf : 0x0; + int rc = 0; + + if (highest) + rc |= a2_scom_ram(scom, thread, lis | highest, lext); + + if (higher) { + if (highest) + rc |= a2_scom_ram(scom, thread, oris | higher, oext); + else + rc |= a2_scom_ram(scom, thread, li | higher, lext); + } + + if (highest || higher) + rc |= a2_scom_ram(scom, thread, rldicr32, oext); + + if (high) { + if (highest || higher) + rc |= a2_scom_ram(scom, thread, oris | high, oext); + else + rc |= a2_scom_ram(scom, thread, lis | high, lext); + } + + if (highest || higher || high) + rc |= a2_scom_ram(scom, thread, ori | low, oext); + else + rc |= a2_scom_ram(scom, thread, li | low, lext); + + return rc; +} + +static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val) +{ + int sprhi = spr >> 5; + int sprlo = spr & 0x1f; + /* mtspr spr, r1 */ + u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11); + + if (spr == 0x0ff0) + insn = 0x7c200124; /* mtmsr r1 */ + + if (a2_scom_setgpr(scom, thread, 1, 1, val)) + return -1; + + return a2_scom_ram(scom, thread, insn, 0xf); +} + +static int a2_scom_initial_tlb(scom_map_t scom, int thread) +{ + extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[]; + extern u32 a2_tlbinit_after_iprot_flush[]; + extern u32 a2_tlbinit_after_linear_map[]; + u32 assoc, entries, i; + u64 epn, tlbcfg; + u32 *p; + int rc; + + /* Invalidate all entries (including iprot) */ + + rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg); + if (rc) + goto scom_fail; + entries = tlbcfg & TLBnCFG_N_ENTRY; + assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24; + epn = 0; + + /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ + a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531); + /* Set MMUCR3 to write all thids bit to the TLB */ + a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f); + + /* Set MAS1 for 1G page size, and MAS2 to our initial EPN */ + a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB)); + a2_scom_setspr(scom, thread, SPRN_MAS2, epn); + for (i = 0; i < entries; i++) { + + a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc)); + + /* tlbwe */ + rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0); + if (rc) + goto scom_fail; + + /* Next entry is new address? */ + if((i + 1) % assoc == 0) { + epn += (1 << 30); + a2_scom_setspr(scom, thread, SPRN_MAS2, epn); + } + } + + /* Setup args for linear mapping */ + rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0)); + if (rc) + goto scom_fail; + + /* Linear mapping */ + for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) { + rc = a2_scom_ram(scom, thread, *p, 0); + if (rc) + goto scom_fail; + } + + /* + * For the boot thread, between the linear mapping and the debug + * mappings there is a loop to flush iprot mappings. Ramming doesn't do + * branches, but the secondary threads don't need to be nearly as smart + * (i.e. we don't need to worry about invalidating the mapping we're + * standing on). + */ + + /* Debug mappings. Expects r11 = MAS0 from linear map (set above) */ + for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) { + rc = a2_scom_ram(scom, thread, *p, 0); + if (rc) + goto scom_fail; + } + +scom_fail: + if (rc) + pr_err("Setting up initial TLB failed, err %d\n", rc); + + if (rc == -SCOM_RAMC_INTERRUPT) { + /* Interrupt, dump some status */ + int rc[10]; + u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2; + rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar); + rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0); + rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1); + rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr); + rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0); + rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1); + rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2); + rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3); + rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8); + rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2); + pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]); + pr_err(" retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]); + pr_err(" retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]); + pr_err(" retreived ESR =0x%llx (err %d)\n", esr, rc[3]); + pr_err(" retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]); + pr_err(" retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]); + pr_err(" retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]); + pr_err(" retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]); + pr_err(" retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]); + pr_err(" retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]); + } + + return rc; +} + +int __devinit a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, + struct device_node *np) +{ + u64 init_iar, init_msr, init_ccr2; + unsigned long start_here; + int rc, core_setup; + scom_map_t scom; + u64 pccr0; + + scom = get_scom(lcpu, np, &core_setup); + if (!scom) { + printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu); + return -1; + } + + pr_devel("Bringing up CPU%d using SCOM...\n", lcpu); + + pccr0 = scom_read(scom, SCOM_PCCR0); + scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG | + SCOM_PCCR0_ENABLE_RAM); + + /* Stop the thead with THRCTL. If we are setting up the TLB we stop all + * threads. We also disable asynchronous interrupts while RAMing. + */ + if (core_setup) + scom_write(scom, SCOM_THRCTL_OR, + SCOM_THRCTL_T0_STOP | + SCOM_THRCTL_T1_STOP | + SCOM_THRCTL_T2_STOP | + SCOM_THRCTL_T3_STOP | + SCOM_THRCTL_ASYNC_DIS); + else + scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx); + + /* Flush its pipeline just in case */ + scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) | + SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE); + + a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar); + a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr); + a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2); + + /* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */ + rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM); + if (rc) { + pr_err("Failed to set MSR ! err %d\n", rc); + return rc; + } + + /* RAM in an sync/isync for the sake of it */ + a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0); + a2_scom_ram(scom, thr_idx, 0x4c00012c, 0); + + if (core_setup) { + pr_devel("CPU%d is first thread in core, initializing TLB...\n", + lcpu); + rc = a2_scom_initial_tlb(scom, thr_idx); + if (rc) + goto fail; + } + + start_here = *(unsigned long *)(core_setup ? generic_secondary_smp_init + : generic_secondary_thread_init); + pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here); + + rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here); + rc |= a2_scom_setgpr(scom, thr_idx, 3, 0, + get_hard_smp_processor_id(lcpu)); + /* + * Tell book3e_secondary_core_init not to set up the TLB, we've + * already done that. + */ + rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1); + + rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx); + + scom_write(scom, SCOM_RAMC, 0); + scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx)); + scom_write(scom, SCOM_PCCR0, pccr0); +fail: + pr_devel(" SCOM initialization %s\n", rc ? "failed" : "succeeded"); + if (rc) { + pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n", + init_iar, init_msr, init_ccr2); + } + + return rc; +} diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c new file mode 100644 index 000000000000..4052e2259f30 --- /dev/null +++ b/arch/powerpc/platforms/wsp/scom_wsp.c @@ -0,0 +1,77 @@ +/* + * SCOM backend for WSP + * + * Copyright 2010 Benjamin Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/cpumask.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/cputhreads.h> +#include <asm/reg_a2.h> +#include <asm/scom.h> +#include <asm/udbg.h> + +#include "wsp.h" + + +static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count) +{ + struct resource r; + u64 xscom_addr; + + if (!of_get_property(dev, "scom-controller", NULL)) { + pr_err("%s: device %s is not a SCOM controller\n", + __func__, dev->full_name); + return SCOM_MAP_INVALID; + } + + if (of_address_to_resource(dev, 0, &r)) { + pr_debug("Failed to find SCOM controller address\n"); + return 0; + } + + /* Transform the SCOM address into an XSCOM offset */ + xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3); + + return (scom_map_t)ioremap(r.start + xscom_addr, count << 3); +} + +static void wsp_scom_unmap(scom_map_t map) +{ + iounmap((void *)map); +} + +static u64 wsp_scom_read(scom_map_t map, u32 reg) +{ + u64 __iomem *addr = (u64 __iomem *)map; + + return in_be64(addr + reg); +} + +static void wsp_scom_write(scom_map_t map, u32 reg, u64 value) +{ + u64 __iomem *addr = (u64 __iomem *)map; + + return out_be64(addr + reg, value); +} + +static const struct scom_controller wsp_scom_controller = { + .map = wsp_scom_map, + .unmap = wsp_scom_unmap, + .read = wsp_scom_read, + .write = wsp_scom_write +}; + +void scom_init_wsp(void) +{ + scom_init(&wsp_scom_controller); +} diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c new file mode 100644 index 000000000000..11ac2f05e01c --- /dev/null +++ b/arch/powerpc/platforms/wsp/setup.c @@ -0,0 +1,36 @@ +/* + * Copyright 2010 Michael Ellerman, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/of_platform.h> + +#include "wsp.h" + +/* + * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property. + * Won't work for nodes that are not a descendant of a wsp node. + */ +int wsp_get_chip_id(struct device_node *dn) +{ + const u32 *p; + int rc; + + /* Start looking at the specified node, not its parent */ + dn = of_node_get(dn); + while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL))) + dn = of_get_next_parent(dn); + + if (!dn) + return -1; + + rc = *p; + of_node_put(dn); + + return rc; +} diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c new file mode 100644 index 000000000000..9d20fa9d3710 --- /dev/null +++ b/arch/powerpc/platforms/wsp/smp.c @@ -0,0 +1,88 @@ +/* + * SMP Support for A2 platforms + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/smp.h> + +#include <asm/dbell.h> +#include <asm/machdep.h> +#include <asm/xics.h> + +#include "ics.h" +#include "wsp.h" + +static void __devinit smp_a2_setup_cpu(int cpu) +{ + doorbell_setup_this_cpu(); + + if (cpu != boot_cpuid) + xics_setup_cpu(); +} + +int __devinit smp_a2_kick_cpu(int nr) +{ + const char *enable_method; + struct device_node *np; + int thr_idx; + + if (nr < 0 || nr >= NR_CPUS) + return -ENOENT; + + np = of_get_cpu_node(nr, &thr_idx); + if (!np) + return -ENODEV; + + enable_method = of_get_property(np, "enable-method", NULL); + pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method); + + if (!enable_method) { + printk(KERN_ERR "CPU%d has no enable-method\n", nr); + return -ENOENT; + } else if (strcmp(enable_method, "ibm,a2-scom") == 0) { + if (a2_scom_startup_cpu(nr, thr_idx, np)) + return -1; + } else { + printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n", + nr, enable_method); + return -EINVAL; + } + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca[nr].cpu_start = 1; + + return 0; +} + +static int __init smp_a2_probe(void) +{ + return cpus_weight(cpu_possible_map); +} + +static struct smp_ops_t a2_smp_ops = { + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = doorbell_cause_ipi, + .probe = smp_a2_probe, + .kick_cpu = smp_a2_kick_cpu, + .setup_cpu = smp_a2_setup_cpu, +}; + +void __init a2_setup_smp(void) +{ + smp_ops = &a2_smp_ops; +} diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h new file mode 100644 index 000000000000..7c3e087fd2f2 --- /dev/null +++ b/arch/powerpc/platforms/wsp/wsp.h @@ -0,0 +1,17 @@ +#ifndef __WSP_H +#define __WSP_H + +#include <asm/wsp.h> + +extern void wsp_setup_pci(void); +extern void scom_init_wsp(void); + +extern void a2_setup_smp(void); +extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, + struct device_node *np); +int smp_a2_cpu_bootable(unsigned int nr); +int __devinit smp_a2_kick_cpu(int nr); + +void opb_pic_init(void); + +#endif /* __WSP_H */ diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index 396582835cb5..d775fd148d13 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -12,3 +12,13 @@ config PPC_MSI_BITMAP depends on PCI_MSI default y if MPIC default y if FSL_PCI + +source "arch/powerpc/sysdev/xics/Kconfig" + +config PPC_SCOM + bool + +config SCOM_DEBUGFS + bool "Expose SCOM controllers via debugfs" + depends on PPC_SCOM + default n diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 1e0c933ef772..6076e0074a87 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -57,3 +57,9 @@ obj-$(CONFIG_PPC_MPC52xx) += mpc5xxx_clocks.o ifeq ($(CONFIG_SUSPEND),y) obj-$(CONFIG_6xx) += 6xx-suspend.o endif + +obj-$(CONFIG_PPC_SCOM) += scom.o + +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +obj-$(CONFIG_PPC_XICS) += xics/ diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 1636dd896707..bd0d54060b94 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -216,7 +216,7 @@ static int axon_ram_probe(struct platform_device *device) AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20); bank->ph_addr = resource.start; - bank->io_addr = (unsigned long) ioremap_flags( + bank->io_addr = (unsigned long) ioremap_prot( bank->ph_addr, bank->size, _PAGE_NO_CACHE); if (bank->io_addr == 0) { dev_err(&device->dev, "ioremap() failed\n"); diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index e0bc944eb23f..350787c83e22 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -58,21 +58,21 @@ static struct irq_host *cpm_pic_host; static void cpm_mask_irq(struct irq_data *d) { - unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); clrbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec)); } static void cpm_unmask_irq(struct irq_data *d) { - unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); setbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec)); } static void cpm_end_irq(struct irq_data *d) { - unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq; + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); out_be32(&cpic_reg->cpic_cisr, (1 << cpm_vec)); } @@ -157,7 +157,7 @@ unsigned int cpm_pic_init(void) goto end; /* Initialize the CPM interrupt controller. */ - hwirq = (unsigned int)irq_map[sirq].hwirq; + hwirq = (unsigned int)virq_to_hw(sirq); out_be32(&cpic_reg->cpic_cicr, (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) | ((hwirq/2) << 13) | CICR_HP_MASK); diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index 5495c1be472b..bcab50e2a9eb 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -81,7 +81,7 @@ static const u_char irq_to_siubit[] = { static void cpm2_mask_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = virq_to_hw(d->irq); + unsigned int irq_nr = irqd_to_hwirq(d); bit = irq_to_siubit[irq_nr]; word = irq_to_siureg[irq_nr]; @@ -93,7 +93,7 @@ static void cpm2_mask_irq(struct irq_data *d) static void cpm2_unmask_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = virq_to_hw(d->irq); + unsigned int irq_nr = irqd_to_hwirq(d); bit = irq_to_siubit[irq_nr]; word = irq_to_siureg[irq_nr]; @@ -105,7 +105,7 @@ static void cpm2_unmask_irq(struct irq_data *d) static void cpm2_ack(struct irq_data *d) { int bit, word; - unsigned int irq_nr = virq_to_hw(d->irq); + unsigned int irq_nr = irqd_to_hwirq(d); bit = irq_to_siubit[irq_nr]; word = irq_to_siureg[irq_nr]; @@ -116,7 +116,7 @@ static void cpm2_ack(struct irq_data *d) static void cpm2_end_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = virq_to_hw(d->irq); + unsigned int irq_nr = irqd_to_hwirq(d); bit = irq_to_siubit[irq_nr]; word = irq_to_siureg[irq_nr]; @@ -133,7 +133,7 @@ static void cpm2_end_irq(struct irq_data *d) static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type) { - unsigned int src = virq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned int vold, vnew, edibit; /* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c index 54fb1922fe30..116415899176 100644 --- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c +++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c @@ -106,10 +106,10 @@ int __init instantiate_cache_sram(struct platform_device *dev, goto out_free; } - cache_sram->base_virt = ioremap_flags(cache_sram->base_phys, + cache_sram->base_virt = ioremap_prot(cache_sram->base_phys, cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL); if (!cache_sram->base_virt) { - dev_err(&dev->dev, "%s: ioremap_flags failed\n", + dev_err(&dev->dev, "%s: ioremap_prot failed\n", dev->dev.of_node->full_name); ret = -ENOMEM; goto out_release; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index d5679dc1e20f..92e78333c47c 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -110,7 +110,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; - msi_data = irq_get_handler_data(entry->irq); + msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); msi_bitmap_free_hwirqs(&msi_data->bitmap, virq_to_hw(entry->irq), 1); @@ -168,7 +168,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) rc = -ENOSPC; goto out_free; } - irq_set_handler_data(virq, msi_data); + /* chip_data is msi_data via host->hostdata in host->map() */ irq_set_msi_desc(virq, entry); fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data); @@ -193,7 +193,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 have_shift = 0; struct fsl_msi_cascade_data *cascade_data; - cascade_data = (struct fsl_msi_cascade_data *)irq_get_handler_data(irq); + cascade_data = irq_get_handler_data(irq); msi_data = cascade_data->msi_data; raw_spin_lock(&desc->lock); @@ -253,7 +253,7 @@ unlock: static int fsl_of_msi_remove(struct platform_device *ofdev) { - struct fsl_msi *msi = ofdev->dev.platform_data; + struct fsl_msi *msi = platform_get_drvdata(ofdev); int virq, i; struct fsl_msi_cascade_data *cascade_data; @@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi, return 0; } +static const struct of_device_id fsl_of_msi_ids[]; static int __devinit fsl_of_msi_probe(struct platform_device *dev) { + const struct of_device_id *match; struct fsl_msi *msi; struct resource res; int err, i, j, irq_index, count; @@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev) u32 offset; static const u32 all_avail[] = { 0, NR_MSI_IRQS }; - if (!dev->dev.of_match) + match = of_match_device(fsl_of_msi_ids, &dev->dev); + if (!match) return -EINVAL; - features = dev->dev.of_match->data; + features = match->data; printk(KERN_DEBUG "Setting up Freescale MSI support\n"); @@ -327,7 +330,7 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev) dev_err(&dev->dev, "No memory for MSI structure\n"); return -ENOMEM; } - dev->dev.platform_data = msi; + platform_set_drvdata(dev, msi); msi->irqhost = irq_alloc_host(dev->dev.of_node, IRQ_HOST_MAP_LINEAR, NR_MSI_IRQS, &fsl_msi_host_ops, 0); diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 142770cb84b6..d18bb27e4df9 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -185,18 +185,6 @@ static int i8259_host_map(struct irq_host *h, unsigned int virq, return 0; } -static void i8259_host_unmap(struct irq_host *h, unsigned int virq) -{ - /* Make sure irq is masked in hardware */ - i8259_mask_irq(irq_get_irq_data(virq)); - - /* remove chip and handler */ - irq_set_chip_and_handler(virq, NULL, NULL); - - /* Make sure it's completed */ - synchronize_irq(virq); -} - static int i8259_host_xlate(struct irq_host *h, struct device_node *ct, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_flags) @@ -220,7 +208,6 @@ static int i8259_host_xlate(struct irq_host *h, struct device_node *ct, static struct irq_host_ops i8259_host_ops = { .match = i8259_host_match, .map = i8259_host_map, - .unmap = i8259_host_unmap, .xlate = i8259_host_xlate, }; diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index fa438be962b7..7367d17364cb 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -18,7 +18,7 @@ #include <linux/stddef.h> #include <linux/sched.h> #include <linux/signal.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/device.h> #include <linux/bootmem.h> #include <linux/spinlock.h> @@ -521,12 +521,10 @@ static inline struct ipic * ipic_from_irq(unsigned int virq) return primary_ipic; } -#define ipic_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - static void ipic_unmask_irq(struct irq_data *d) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -542,7 +540,7 @@ static void ipic_unmask_irq(struct irq_data *d) static void ipic_mask_irq(struct irq_data *d) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -562,7 +560,7 @@ static void ipic_mask_irq(struct irq_data *d) static void ipic_ack_irq(struct irq_data *d) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -581,7 +579,7 @@ static void ipic_ack_irq(struct irq_data *d) static void ipic_mask_irq_and_ack(struct irq_data *d) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -604,7 +602,7 @@ static void ipic_mask_irq_and_ack(struct irq_data *d) static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type) { struct ipic *ipic = ipic_from_irq(d->irq); - unsigned int src = ipic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned int vold, vnew, edibit; if (flow_type == IRQ_TYPE_NONE) @@ -793,7 +791,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags) int ipic_set_priority(unsigned int virq, unsigned int priority) { struct ipic *ipic = ipic_from_irq(virq); - unsigned int src = ipic_irq_to_hw(virq); + unsigned int src = virq_to_hw(virq); u32 temp; if (priority > 7) @@ -821,7 +819,7 @@ int ipic_set_priority(unsigned int virq, unsigned int priority) void ipic_set_highest_priority(unsigned int virq) { struct ipic *ipic = ipic_from_irq(virq); - unsigned int src = ipic_irq_to_hw(virq); + unsigned int src = virq_to_hw(virq); u32 temp; temp = ipic_read(ipic->regs, IPIC_SICFR); @@ -902,7 +900,7 @@ static struct { u32 sercr; } ipic_saved_state; -static int ipic_suspend(struct sys_device *sdev, pm_message_t state) +static int ipic_suspend(void) { struct ipic *ipic = primary_ipic; @@ -933,7 +931,7 @@ static int ipic_suspend(struct sys_device *sdev, pm_message_t state) return 0; } -static int ipic_resume(struct sys_device *sdev) +static void ipic_resume(void) { struct ipic *ipic = primary_ipic; @@ -949,44 +947,26 @@ static int ipic_resume(struct sys_device *sdev) ipic_write(ipic->regs, IPIC_SECNR, ipic_saved_state.secnr); ipic_write(ipic->regs, IPIC_SERMR, ipic_saved_state.sermr); ipic_write(ipic->regs, IPIC_SERCR, ipic_saved_state.sercr); - - return 0; } #else #define ipic_suspend NULL #define ipic_resume NULL #endif -static struct sysdev_class ipic_sysclass = { - .name = "ipic", +static struct syscore_ops ipic_syscore_ops = { .suspend = ipic_suspend, .resume = ipic_resume, }; -static struct sys_device device_ipic = { - .id = 0, - .cls = &ipic_sysclass, -}; - -static int __init init_ipic_sysfs(void) +static int __init init_ipic_syscore(void) { - int rc; - if (!primary_ipic || !primary_ipic->regs) return -ENODEV; - printk(KERN_DEBUG "Registering ipic with sysfs...\n"); - rc = sysdev_class_register(&ipic_sysclass); - if (rc) { - printk(KERN_ERR "Failed registering ipic sys class\n"); - return -ENODEV; - } - rc = sysdev_register(&device_ipic); - if (rc) { - printk(KERN_ERR "Failed registering ipic sys device\n"); - return -ENODEV; - } + printk(KERN_DEBUG "Registering ipic system core operations\n"); + register_syscore_ops(&ipic_syscore_ops); + return 0; } -subsys_initcall(init_ipic_sysfs); +subsys_initcall(init_ipic_syscore); diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c index 207324209065..ddc877a3a23a 100644 --- a/arch/powerpc/sysdev/mmio_nvram.c +++ b/arch/powerpc/sysdev/mmio_nvram.c @@ -115,6 +115,8 @@ int __init mmio_nvram_init(void) int ret; nvram_node = of_find_node_by_type(NULL, "nvram"); + if (!nvram_node) + nvram_node = of_find_compatible_node(NULL, NULL, "nvram"); if (!nvram_node) { printk(KERN_WARNING "nvram: no node found in device-tree\n"); return -ENODEV; diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index a88800ff4d01..20924f2246f0 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -28,7 +28,7 @@ int cpm_get_irq(struct pt_regs *regs); static void mpc8xx_unmask_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq; + unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d); bit = irq_nr & 0x1f; word = irq_nr >> 5; @@ -40,7 +40,7 @@ static void mpc8xx_unmask_irq(struct irq_data *d) static void mpc8xx_mask_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq; + unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d); bit = irq_nr & 0x1f; word = irq_nr >> 5; @@ -52,7 +52,7 @@ static void mpc8xx_mask_irq(struct irq_data *d) static void mpc8xx_ack(struct irq_data *d) { int bit; - unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq; + unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d); bit = irq_nr & 0x1f; out_be32(&siu_reg->sc_sipend, 1 << (31-bit)); @@ -61,7 +61,7 @@ static void mpc8xx_ack(struct irq_data *d) static void mpc8xx_end_irq(struct irq_data *d) { int bit, word; - unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq; + unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d); bit = irq_nr & 0x1f; word = irq_nr >> 5; @@ -73,7 +73,7 @@ static void mpc8xx_end_irq(struct irq_data *d) static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type) { if (flow_type & IRQ_TYPE_EDGE_FALLING) { - irq_hw_number_t hw = (unsigned int)irq_map[d->irq].hwirq; + irq_hw_number_t hw = (unsigned int)irqd_to_hwirq(d); unsigned int siel = in_be32(&siu_reg->sc_siel); /* only external IRQ senses are programmable */ diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c index 0892a2841c2b..fb4963abdf55 100644 --- a/arch/powerpc/sysdev/mpc8xxx_gpio.c +++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c @@ -163,7 +163,7 @@ static void mpc8xxx_irq_unmask(struct irq_data *d) spin_lock_irqsave(&mpc8xxx_gc->lock, flags); - setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d))); spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags); } @@ -176,7 +176,7 @@ static void mpc8xxx_irq_mask(struct irq_data *d) spin_lock_irqsave(&mpc8xxx_gc->lock, flags); - clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d))); spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags); } @@ -186,7 +186,7 @@ static void mpc8xxx_irq_ack(struct irq_data *d) struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d); struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc; - out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(irqd_to_hwirq(d))); } static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type) @@ -199,14 +199,14 @@ static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type) case IRQ_TYPE_EDGE_FALLING: spin_lock_irqsave(&mpc8xxx_gc->lock, flags); setbits32(mm->regs + GPIO_ICR, - mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + mpc8xxx_gpio2mask(irqd_to_hwirq(d))); spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags); break; case IRQ_TYPE_EDGE_BOTH: spin_lock_irqsave(&mpc8xxx_gc->lock, flags); clrbits32(mm->regs + GPIO_ICR, - mpc8xxx_gpio2mask(virq_to_hw(d->irq))); + mpc8xxx_gpio2mask(irqd_to_hwirq(d))); spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags); break; @@ -221,7 +221,7 @@ static int mpc512x_irq_set_type(struct irq_data *d, unsigned int flow_type) { struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d); struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc; - unsigned long gpio = virq_to_hw(d->irq); + unsigned long gpio = irqd_to_hwirq(d); void __iomem *reg; unsigned int shift; unsigned long flags; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index f91c065bed5a..3a8de5bb628a 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -6,6 +6,7 @@ * with various broken implementations of this HW. * * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * Copyright 2010-2011 Freescale Semiconductor, Inc. * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive @@ -27,6 +28,7 @@ #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/slab.h> +#include <linux/syscore_ops.h> #include <asm/ptrace.h> #include <asm/signal.h> @@ -218,6 +220,28 @@ static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 valu _mpic_write(mpic->reg_type, &mpic->gregs, offset, value); } +static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm) +{ + unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) + + ((tm & 3) * MPIC_INFO(TIMER_STRIDE)); + + if (tm >= 4) + offset += 0x1000 / 4; + + return _mpic_read(mpic->reg_type, &mpic->tmregs, offset); +} + +static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value) +{ + unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) + + ((tm & 3) * MPIC_INFO(TIMER_STRIDE)); + + if (tm >= 4) + offset += 0x1000 / 4; + + _mpic_write(mpic->reg_type, &mpic->tmregs, offset, value); +} + static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg) { unsigned int cpu = mpic_processor_id(mpic); @@ -268,6 +292,8 @@ static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no, #define mpic_write(b,r,v) _mpic_write(mpic->reg_type,&(b),(r),(v)) #define mpic_ipi_read(i) _mpic_ipi_read(mpic,(i)) #define mpic_ipi_write(i,v) _mpic_ipi_write(mpic,(i),(v)) +#define mpic_tm_read(i) _mpic_tm_read(mpic,(i)) +#define mpic_tm_write(i,v) _mpic_tm_write(mpic,(i),(v)) #define mpic_cpu_read(i) _mpic_cpu_read(mpic,(i)) #define mpic_cpu_write(i,v) _mpic_cpu_write(mpic,(i),(v)) #define mpic_irq_read(s,r) _mpic_irq_read(mpic,(s),(r)) @@ -607,8 +633,6 @@ static int irq_choose_cpu(const struct cpumask *mask) } #endif -#define mpic_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - /* Find an mpic associated with a given linux interrupt */ static struct mpic *mpic_find(unsigned int irq) { @@ -621,11 +645,18 @@ static struct mpic *mpic_find(unsigned int irq) /* Determine if the linux irq is an IPI */ static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int irq) { - unsigned int src = mpic_irq_to_hw(irq); + unsigned int src = virq_to_hw(irq); return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]); } +/* Determine if the linux irq is a timer */ +static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int irq) +{ + unsigned int src = virq_to_hw(irq); + + return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]); +} /* Convert a cpu mask from logical to physical cpu numbers. */ static inline u32 mpic_physmask(u32 cpumask) @@ -633,7 +664,7 @@ static inline u32 mpic_physmask(u32 cpumask) int i; u32 mask = 0; - for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1) + for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1) mask |= (cpumask & 1) << get_hard_smp_processor_id(i); return mask; } @@ -674,7 +705,7 @@ void mpic_unmask_irq(struct irq_data *d) { unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, d->irq, src); @@ -695,7 +726,7 @@ void mpic_mask_irq(struct irq_data *d) { unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); DBG("%s: disable_irq: %d (src %d)\n", mpic->name, d->irq, src); @@ -733,7 +764,7 @@ void mpic_end_irq(struct irq_data *d) static void mpic_unmask_ht_irq(struct irq_data *d) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); mpic_unmask_irq(d); @@ -744,7 +775,7 @@ static void mpic_unmask_ht_irq(struct irq_data *d) static unsigned int mpic_startup_ht_irq(struct irq_data *d) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); mpic_unmask_irq(d); mpic_startup_ht_interrupt(mpic, src, irqd_is_level_type(d)); @@ -755,7 +786,7 @@ static unsigned int mpic_startup_ht_irq(struct irq_data *d) static void mpic_shutdown_ht_irq(struct irq_data *d) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); mpic_shutdown_ht_interrupt(mpic, src); mpic_mask_irq(d); @@ -764,7 +795,7 @@ static void mpic_shutdown_ht_irq(struct irq_data *d) static void mpic_end_ht_irq(struct irq_data *d) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); #ifdef DEBUG_IRQ DBG("%s: end_irq: %d\n", mpic->name, d->irq); @@ -785,7 +816,7 @@ static void mpic_end_ht_irq(struct irq_data *d) static void mpic_unmask_ipi(struct irq_data *d) { struct mpic *mpic = mpic_from_ipi(d); - unsigned int src = mpic_irq_to_hw(d->irq) - mpic->ipi_vecs[0]; + unsigned int src = virq_to_hw(d->irq) - mpic->ipi_vecs[0]; DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, d->irq, src); mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK); @@ -812,27 +843,42 @@ static void mpic_end_ipi(struct irq_data *d) #endif /* CONFIG_SMP */ +static void mpic_unmask_tm(struct irq_data *d) +{ + struct mpic *mpic = mpic_from_irq_data(d); + unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0]; + + DBG("%s: enable_tm: %d (tm %d)\n", mpic->name, irq, src); + mpic_tm_write(src, mpic_tm_read(src) & ~MPIC_VECPRI_MASK); + mpic_tm_read(src); +} + +static void mpic_mask_tm(struct irq_data *d) +{ + struct mpic *mpic = mpic_from_irq_data(d); + unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0]; + + mpic_tm_write(src, mpic_tm_read(src) | MPIC_VECPRI_MASK); + mpic_tm_read(src); +} + int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); if (mpic->flags & MPIC_SINGLE_DEST_CPU) { int cpuid = irq_choose_cpu(cpumask); mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid); } else { - cpumask_var_t tmp; - - alloc_cpumask_var(&tmp, GFP_KERNEL); + u32 mask = cpumask_bits(cpumask)[0]; - cpumask_and(tmp, cpumask, cpu_online_mask); + mask &= cpumask_bits(cpu_online_mask)[0]; mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), - mpic_physmask(cpumask_bits(tmp)[0])); - - free_cpumask_var(tmp); + mpic_physmask(mask)); } return 0; @@ -862,7 +908,7 @@ static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type) { struct mpic *mpic = mpic_from_irq_data(d); - unsigned int src = mpic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned int vecpri, vold, vnew; DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n", @@ -898,7 +944,7 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type) void mpic_set_vector(unsigned int virq, unsigned int vector) { struct mpic *mpic = mpic_from_irq(virq); - unsigned int src = mpic_irq_to_hw(virq); + unsigned int src = virq_to_hw(virq); unsigned int vecpri; DBG("mpic: set_vector(mpic:@%p,virq:%d,src:%d,vector:0x%x)\n", @@ -916,7 +962,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector) void mpic_set_destination(unsigned int virq, unsigned int cpuid) { struct mpic *mpic = mpic_from_irq(virq); - unsigned int src = mpic_irq_to_hw(virq); + unsigned int src = virq_to_hw(virq); DBG("mpic: set_destination(mpic:@%p,virq:%d,src:%d,cpuid:0x%x)\n", mpic, virq, src, cpuid); @@ -942,6 +988,12 @@ static struct irq_chip mpic_ipi_chip = { }; #endif /* CONFIG_SMP */ +static struct irq_chip mpic_tm_chip = { + .irq_mask = mpic_mask_tm, + .irq_unmask = mpic_unmask_tm, + .irq_eoi = mpic_end_irq, +}; + #ifdef CONFIG_MPIC_U3_HT_IRQS static struct irq_chip mpic_irq_ht_chip = { .irq_startup = mpic_startup_ht_irq, @@ -985,6 +1037,16 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq, } #endif /* CONFIG_SMP */ + if (hw >= mpic->timer_vecs[0] && hw <= mpic->timer_vecs[7]) { + WARN_ON(!(mpic->flags & MPIC_PRIMARY)); + + DBG("mpic: mapping as timer\n"); + irq_set_chip_data(virq, mpic); + irq_set_chip_and_handler(virq, &mpic->hc_tm, + handle_fasteoi_irq); + return 0; + } + if (hw >= mpic->irq_count) return -EINVAL; @@ -1025,6 +1087,7 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct, irq_hw_number_t *out_hwirq, unsigned int *out_flags) { + struct mpic *mpic = h->host_data; static unsigned char map_mpic_senses[4] = { IRQ_TYPE_EDGE_RISING, IRQ_TYPE_LEVEL_LOW, @@ -1033,7 +1096,38 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct, }; *out_hwirq = intspec[0]; - if (intsize > 1) { + if (intsize >= 4 && (mpic->flags & MPIC_FSL)) { + /* + * Freescale MPIC with extended intspec: + * First two cells are as usual. Third specifies + * an "interrupt type". Fourth is type-specific data. + * + * See Documentation/devicetree/bindings/powerpc/fsl/mpic.txt + */ + switch (intspec[2]) { + case 0: + case 1: /* no EISR/EIMR support for now, treat as shared IRQ */ + break; + case 2: + if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs)) + return -EINVAL; + + *out_hwirq = mpic->ipi_vecs[intspec[0]]; + break; + case 3: + if (intspec[0] >= ARRAY_SIZE(mpic->timer_vecs)) + return -EINVAL; + + *out_hwirq = mpic->timer_vecs[intspec[0]]; + break; + default: + pr_debug("%s: unknown irq type %u\n", + __func__, intspec[2]); + return -EINVAL; + } + + *out_flags = map_mpic_senses[intspec[1] & 3]; + } else if (intsize > 1) { u32 mask = 0x3; /* Apple invented a new race of encoding on machines with @@ -1109,6 +1203,9 @@ struct mpic * __init mpic_alloc(struct device_node *node, mpic->hc_ipi.name = name; #endif /* CONFIG_SMP */ + mpic->hc_tm = mpic_tm_chip; + mpic->hc_tm.name = name; + mpic->flags = flags; mpic->isu_size = isu_size; mpic->irq_count = irq_count; @@ -1119,10 +1216,14 @@ struct mpic * __init mpic_alloc(struct device_node *node, else intvec_top = 255; - mpic->timer_vecs[0] = intvec_top - 8; - mpic->timer_vecs[1] = intvec_top - 7; - mpic->timer_vecs[2] = intvec_top - 6; - mpic->timer_vecs[3] = intvec_top - 5; + mpic->timer_vecs[0] = intvec_top - 12; + mpic->timer_vecs[1] = intvec_top - 11; + mpic->timer_vecs[2] = intvec_top - 10; + mpic->timer_vecs[3] = intvec_top - 9; + mpic->timer_vecs[4] = intvec_top - 8; + mpic->timer_vecs[5] = intvec_top - 7; + mpic->timer_vecs[6] = intvec_top - 6; + mpic->timer_vecs[7] = intvec_top - 5; mpic->ipi_vecs[0] = intvec_top - 4; mpic->ipi_vecs[1] = intvec_top - 3; mpic->ipi_vecs[2] = intvec_top - 2; @@ -1132,6 +1233,8 @@ struct mpic * __init mpic_alloc(struct device_node *node, /* Check for "big-endian" in device-tree */ if (node && of_get_property(node, "big-endian", NULL) != NULL) mpic->flags |= MPIC_BIG_ENDIAN; + if (node && of_device_is_compatible(node, "fsl,mpic")) + mpic->flags |= MPIC_FSL; /* Look for protected sources */ if (node) { @@ -1323,15 +1426,17 @@ void __init mpic_init(struct mpic *mpic) /* Set current processor priority to max */ mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf); - /* Initialize timers: just disable them all */ + /* Initialize timers to our reserved vectors and mask them for now */ for (i = 0; i < 4; i++) { mpic_write(mpic->tmregs, i * MPIC_INFO(TIMER_STRIDE) + - MPIC_INFO(TIMER_DESTINATION), 0); + MPIC_INFO(TIMER_DESTINATION), + 1 << hard_smp_processor_id()); mpic_write(mpic->tmregs, i * MPIC_INFO(TIMER_STRIDE) + MPIC_INFO(TIMER_VECTOR_PRI), MPIC_VECPRI_MASK | + (9 << MPIC_VECPRI_PRIORITY_SHIFT) | (mpic->timer_vecs[0] + i)); } @@ -1427,7 +1532,7 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable) void mpic_irq_set_priority(unsigned int irq, unsigned int pri) { struct mpic *mpic = mpic_find(irq); - unsigned int src = mpic_irq_to_hw(irq); + unsigned int src = virq_to_hw(irq); unsigned long flags; u32 reg; @@ -1440,6 +1545,11 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) ~MPIC_VECPRI_PRIORITY_MASK; mpic_ipi_write(src - mpic->ipi_vecs[0], reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + } else if (mpic_is_tm(mpic, irq)) { + reg = mpic_tm_read(src - mpic->timer_vecs[0]) & + ~MPIC_VECPRI_PRIORITY_MASK; + mpic_tm_write(src - mpic->timer_vecs[0], + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } else { reg = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & ~MPIC_VECPRI_PRIORITY_MASK; @@ -1619,46 +1729,28 @@ void mpic_request_ipis(void) } } -static void mpic_send_ipi(unsigned int ipi_no, const struct cpumask *cpu_mask) +void smp_mpic_message_pass(int cpu, int msg) { struct mpic *mpic = mpic_primary; + u32 physmask; BUG_ON(mpic == NULL); -#ifdef DEBUG_IPI - DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no); -#endif - - mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) + - ipi_no * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), - mpic_physmask(cpumask_bits(cpu_mask)[0])); -} - -void smp_mpic_message_pass(int target, int msg) -{ - cpumask_var_t tmp; - /* make sure we're sending something that translates to an IPI */ if ((unsigned int)msg > 3) { printk("SMP %d: smp_message_pass: unknown msg %d\n", smp_processor_id(), msg); return; } - switch (target) { - case MSG_ALL: - mpic_send_ipi(msg, cpu_online_mask); - break; - case MSG_ALL_BUT_SELF: - alloc_cpumask_var(&tmp, GFP_NOWAIT); - cpumask_andnot(tmp, cpu_online_mask, - cpumask_of(smp_processor_id())); - mpic_send_ipi(msg, tmp); - free_cpumask_var(tmp); - break; - default: - mpic_send_ipi(msg, cpumask_of(target)); - break; - } + +#ifdef DEBUG_IPI + DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, msg); +#endif + + physmask = 1 << get_hard_smp_processor_id(cpu); + + mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) + + msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask); } int __init smp_mpic_probe(void) @@ -1702,9 +1794,8 @@ void mpic_reset_core(int cpu) #endif /* CONFIG_SMP */ #ifdef CONFIG_PM -static int mpic_suspend(struct sys_device *dev, pm_message_t state) +static void mpic_suspend_one(struct mpic *mpic) { - struct mpic *mpic = container_of(dev, struct mpic, sysdev); int i; for (i = 0; i < mpic->num_sources; i++) { @@ -1713,13 +1804,22 @@ static int mpic_suspend(struct sys_device *dev, pm_message_t state) mpic->save_data[i].dest = mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)); } +} + +static int mpic_suspend(void) +{ + struct mpic *mpic = mpics; + + while (mpic) { + mpic_suspend_one(mpic); + mpic = mpic->next; + } return 0; } -static int mpic_resume(struct sys_device *dev) +static void mpic_resume_one(struct mpic *mpic) { - struct mpic *mpic = container_of(dev, struct mpic, sysdev); int i; for (i = 0; i < mpic->num_sources; i++) { @@ -1746,33 +1846,28 @@ static int mpic_resume(struct sys_device *dev) } #endif } /* end for loop */ +} - return 0; +static void mpic_resume(void) +{ + struct mpic *mpic = mpics; + + while (mpic) { + mpic_resume_one(mpic); + mpic = mpic->next; + } } -#endif -static struct sysdev_class mpic_sysclass = { -#ifdef CONFIG_PM +static struct syscore_ops mpic_syscore_ops = { .resume = mpic_resume, .suspend = mpic_suspend, -#endif - .name = "mpic", }; static int mpic_init_sys(void) { - struct mpic *mpic = mpics; - int error, id = 0; - - error = sysdev_class_register(&mpic_sysclass); - - while (mpic && !error) { - mpic->sysdev.cls = &mpic_sysclass; - mpic->sysdev.id = id++; - error = sysdev_register(&mpic->sysdev); - mpic = mpic->next; - } - return error; + register_syscore_ops(&mpic_syscore_ops); + return 0; } device_initcall(mpic_init_sys); +#endif diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c index e9c633c7c083..14d130268e7a 100644 --- a/arch/powerpc/sysdev/mv64x60_pic.c +++ b/arch/powerpc/sysdev/mv64x60_pic.c @@ -78,7 +78,7 @@ static struct irq_host *mv64x60_irq_host; static void mv64x60_mask_low(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -91,7 +91,7 @@ static void mv64x60_mask_low(struct irq_data *d) static void mv64x60_unmask_low(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -115,7 +115,7 @@ static struct irq_chip mv64x60_chip_low = { static void mv64x60_mask_high(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -128,7 +128,7 @@ static void mv64x60_mask_high(struct irq_data *d) static void mv64x60_unmask_high(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -152,7 +152,7 @@ static struct irq_chip mv64x60_chip_high = { static void mv64x60_mask_gpp(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -165,7 +165,7 @@ static void mv64x60_mask_gpp(struct irq_data *d) static void mv64x60_mask_ack_gpp(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); @@ -180,7 +180,7 @@ static void mv64x60_mask_ack_gpp(struct irq_data *d) static void mv64x60_unmask_gpp(struct irq_data *d) { - int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK; + int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK; unsigned long flags; spin_lock_irqsave(&mv64x60_lock, flags); diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 832d6924ad1c..b2acda07220d 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -197,12 +197,10 @@ static inline struct qe_ic *qe_ic_from_irq_data(struct irq_data *d) return irq_data_get_irq_chip_data(d); } -#define virq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq) - static void qe_ic_unmask_irq(struct irq_data *d) { struct qe_ic *qe_ic = qe_ic_from_irq_data(d); - unsigned int src = virq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; @@ -218,7 +216,7 @@ static void qe_ic_unmask_irq(struct irq_data *d) static void qe_ic_mask_irq(struct irq_data *d) { struct qe_ic *qe_ic = qe_ic_from_irq_data(d); - unsigned int src = virq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 temp; diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c new file mode 100644 index 000000000000..b2593ce30c9b --- /dev/null +++ b/arch/powerpc/sysdev/scom.c @@ -0,0 +1,192 @@ +/* + * Copyright 2010 Benjamin Herrenschmidt, IBM Corp + * <benh@kernel.crashing.org> + * and David Gibson, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/debugfs.h> +#include <linux/slab.h> +#include <asm/prom.h> +#include <asm/scom.h> + +const struct scom_controller *scom_controller; +EXPORT_SYMBOL_GPL(scom_controller); + +struct device_node *scom_find_parent(struct device_node *node) +{ + struct device_node *par, *tmp; + const u32 *p; + + for (par = of_node_get(node); par;) { + if (of_get_property(par, "scom-controller", NULL)) + break; + p = of_get_property(par, "scom-parent", NULL); + tmp = par; + if (p == NULL) + par = of_get_parent(par); + else + par = of_find_node_by_phandle(*p); + of_node_put(tmp); + } + return par; +} +EXPORT_SYMBOL_GPL(scom_find_parent); + +scom_map_t scom_map_device(struct device_node *dev, int index) +{ + struct device_node *parent; + unsigned int cells, size; + const u32 *prop; + u64 reg, cnt; + scom_map_t ret; + + parent = scom_find_parent(dev); + + if (parent == NULL) + return 0; + + prop = of_get_property(parent, "#scom-cells", NULL); + cells = prop ? *prop : 1; + + prop = of_get_property(dev, "scom-reg", &size); + if (!prop) + return 0; + size >>= 2; + + if (index >= (size / (2*cells))) + return 0; + + reg = of_read_number(&prop[index * cells * 2], cells); + cnt = of_read_number(&prop[index * cells * 2 + cells], cells); + + ret = scom_map(parent, reg, cnt); + of_node_put(parent); + + return ret; +} +EXPORT_SYMBOL_GPL(scom_map_device); + +#ifdef CONFIG_SCOM_DEBUGFS +struct scom_debug_entry { + struct device_node *dn; + unsigned long addr; + scom_map_t map; + spinlock_t lock; + char name[8]; + struct debugfs_blob_wrapper blob; +}; + +static int scom_addr_set(void *data, u64 val) +{ + struct scom_debug_entry *ent = data; + + ent->addr = 0; + scom_unmap(ent->map); + + ent->map = scom_map(ent->dn, val, 1); + if (scom_map_ok(ent->map)) + ent->addr = val; + else + return -EFAULT; + + return 0; +} + +static int scom_addr_get(void *data, u64 *val) +{ + struct scom_debug_entry *ent = data; + *val = ent->addr; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(scom_addr_fops, scom_addr_get, scom_addr_set, + "0x%llx\n"); + +static int scom_val_set(void *data, u64 val) +{ + struct scom_debug_entry *ent = data; + + if (!scom_map_ok(ent->map)) + return -EFAULT; + + scom_write(ent->map, 0, val); + + return 0; +} + +static int scom_val_get(void *data, u64 *val) +{ + struct scom_debug_entry *ent = data; + + if (!scom_map_ok(ent->map)) + return -EFAULT; + + *val = scom_read(ent->map, 0); + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(scom_val_fops, scom_val_get, scom_val_set, + "0x%llx\n"); + +static int scom_debug_init_one(struct dentry *root, struct device_node *dn, + int i) +{ + struct scom_debug_entry *ent; + struct dentry *dir; + + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->dn = of_node_get(dn); + ent->map = SCOM_MAP_INVALID; + spin_lock_init(&ent->lock); + snprintf(ent->name, 8, "scom%d", i); + ent->blob.data = dn->full_name; + ent->blob.size = strlen(dn->full_name); + + dir = debugfs_create_dir(ent->name, root); + if (!dir) { + of_node_put(dn); + kfree(ent); + return -1; + } + + debugfs_create_file("addr", 0600, dir, ent, &scom_addr_fops); + debugfs_create_file("value", 0600, dir, ent, &scom_val_fops); + debugfs_create_blob("path", 0400, dir, &ent->blob); + + return 0; +} + +static int scom_debug_init(void) +{ + struct device_node *dn; + struct dentry *root; + int i, rc; + + root = debugfs_create_dir("scom", powerpc_debugfs_root); + if (!root) + return -1; + + i = rc = 0; + for_each_node_with_property(dn, "scom-controller") + rc |= scom_debug_init_one(root, dn, i++); + + return rc; +} +device_initcall(scom_debug_init); +#endif /* CONFIG_SCOM_DEBUGFS */ diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 5d9138516628..984cd2029158 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -41,8 +41,6 @@ #define UIC_VR 0x7 #define UIC_VCR 0x8 -#define uic_irq_to_hw(virq) (irq_map[virq].hwirq) - struct uic *primary_uic; struct uic { @@ -58,7 +56,7 @@ struct uic { static void uic_unmask_irq(struct irq_data *d) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 er, sr; @@ -76,7 +74,7 @@ static void uic_unmask_irq(struct irq_data *d) static void uic_mask_irq(struct irq_data *d) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 er; @@ -90,7 +88,7 @@ static void uic_mask_irq(struct irq_data *d) static void uic_ack_irq(struct irq_data *d) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; spin_lock_irqsave(&uic->lock, flags); @@ -101,7 +99,7 @@ static void uic_ack_irq(struct irq_data *d) static void uic_mask_ack_irq(struct irq_data *d) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; u32 er, sr; @@ -126,7 +124,7 @@ static void uic_mask_ack_irq(struct irq_data *d) static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type) { struct uic *uic = irq_data_get_irq_chip_data(d); - unsigned int src = uic_irq_to_hw(d->irq); + unsigned int src = irqd_to_hwirq(d); unsigned long flags; int trigger, polarity; u32 tr, pr, mask; diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig new file mode 100644 index 000000000000..0031eda320c3 --- /dev/null +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -0,0 +1,13 @@ +config PPC_XICS + def_bool n + select PPC_SMP_MUXED_IPI + +config PPC_ICP_NATIVE + def_bool n + +config PPC_ICP_HV + def_bool n + +config PPC_ICS_RTAS + def_bool n + diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile new file mode 100644 index 000000000000..b75a6059337f --- /dev/null +++ b/arch/powerpc/sysdev/xics/Makefile @@ -0,0 +1,6 @@ +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +obj-y += xics-common.o +obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o +obj-$(CONFIG_PPC_ICP_HV) += icp-hv.o +obj-$(CONFIG_PPC_ICS_RTAS) += ics-rtas.o diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c new file mode 100644 index 000000000000..9518d367a64f --- /dev/null +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -0,0 +1,164 @@ +/* + * Copyright 2011 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> + +#include <asm/smp.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/xics.h> +#include <asm/io.h> +#include <asm/hvcall.h> + +static inline unsigned int icp_hv_get_xirr(unsigned char cppr) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_XIRR, retbuf, cppr); + if (rc != H_SUCCESS) + panic(" bad return code xirr - rc = %lx\n", rc); + return (unsigned int)retbuf[0]; +} + +static inline void icp_hv_set_xirr(unsigned int value) +{ + long rc = plpar_hcall_norets(H_EOI, value); + if (rc != H_SUCCESS) + panic("bad return code EOI - rc = %ld, value=%x\n", rc, value); +} + +static inline void icp_hv_set_cppr(u8 value) +{ + long rc = plpar_hcall_norets(H_CPPR, value); + if (rc != H_SUCCESS) + panic("bad return code cppr - rc = %lx\n", rc); +} + +static inline void icp_hv_set_qirr(int n_cpu , u8 value) +{ + long rc = plpar_hcall_norets(H_IPI, get_hard_smp_processor_id(n_cpu), + value); + if (rc != H_SUCCESS) + panic("bad return code qirr - rc = %lx\n", rc); +} + +static void icp_hv_eoi(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + + iosync(); + icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq); +} + +static void icp_hv_teardown_cpu(void) +{ + int cpu = smp_processor_id(); + + /* Clear any pending IPI */ + icp_hv_set_qirr(cpu, 0xff); +} + +static void icp_hv_flush_ipi(void) +{ + /* We take the ipi irq but and never return so we + * need to EOI the IPI, but want to leave our priority 0 + * + * should we check all the other interrupts too? + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + + icp_hv_set_xirr((0x00 << 24) | XICS_IPI); +} + +static unsigned int icp_hv_get_irq(void) +{ + unsigned int xirr = icp_hv_get_xirr(xics_cppr_top()); + unsigned int vec = xirr & 0x00ffffff; + unsigned int irq; + + if (vec == XICS_IRQ_SPURIOUS) + return NO_IRQ; + + irq = irq_radix_revmap_lookup(xics_host, vec); + if (likely(irq != NO_IRQ)) { + xics_push_cppr(vec); + return irq; + } + + /* We don't have a linux mapping, so have rtas mask it. */ + xics_mask_unknown_vec(vec); + + /* We might learn about it later, so EOI it */ + icp_hv_set_xirr(xirr); + + return NO_IRQ; +} + +static void icp_hv_set_cpu_priority(unsigned char cppr) +{ + xics_set_base_cppr(cppr); + icp_hv_set_cppr(cppr); + iosync(); +} + +#ifdef CONFIG_SMP + +static void icp_hv_cause_ipi(int cpu, unsigned long data) +{ + icp_hv_set_qirr(cpu, IPI_PRIORITY); +} + +static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + + icp_hv_set_qirr(cpu, 0xff); + + return smp_ipi_demux(); +} + +#endif /* CONFIG_SMP */ + +static const struct icp_ops icp_hv_ops = { + .get_irq = icp_hv_get_irq, + .eoi = icp_hv_eoi, + .set_priority = icp_hv_set_cpu_priority, + .teardown_cpu = icp_hv_teardown_cpu, + .flush_ipi = icp_hv_flush_ipi, +#ifdef CONFIG_SMP + .ipi_action = icp_hv_ipi_action, + .cause_ipi = icp_hv_cause_ipi, +#endif +}; + +int icp_hv_init(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp"); + if (!np) + np = of_find_node_by_type(NULL, + "PowerPC-External-Interrupt-Presentation"); + if (!np) + return -ENODEV; + + icp_ops = &icp_hv_ops; + + return 0; +} + diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c new file mode 100644 index 000000000000..1f15ad436140 --- /dev/null +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -0,0 +1,293 @@ +/* + * Copyright 2011 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/spinlock.h> + +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/xics.h> + +struct icp_ipl { + union { + u32 word; + u8 bytes[4]; + } xirr_poll; + union { + u32 word; + u8 bytes[4]; + } xirr; + u32 dummy; + union { + u32 word; + u8 bytes[4]; + } qirr; + u32 link_a; + u32 link_b; + u32 link_c; +}; + +static struct icp_ipl __iomem *icp_native_regs[NR_CPUS]; + +static inline unsigned int icp_native_get_xirr(void) +{ + int cpu = smp_processor_id(); + + return in_be32(&icp_native_regs[cpu]->xirr.word); +} + +static inline void icp_native_set_xirr(unsigned int value) +{ + int cpu = smp_processor_id(); + + out_be32(&icp_native_regs[cpu]->xirr.word, value); +} + +static inline void icp_native_set_cppr(u8 value) +{ + int cpu = smp_processor_id(); + + out_8(&icp_native_regs[cpu]->xirr.bytes[0], value); +} + +static inline void icp_native_set_qirr(int n_cpu, u8 value) +{ + out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value); +} + +static void icp_native_set_cpu_priority(unsigned char cppr) +{ + xics_set_base_cppr(cppr); + icp_native_set_cppr(cppr); + iosync(); +} + +static void icp_native_eoi(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + + iosync(); + icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq); +} + +static void icp_native_teardown_cpu(void) +{ + int cpu = smp_processor_id(); + + /* Clear any pending IPI */ + icp_native_set_qirr(cpu, 0xff); +} + +static void icp_native_flush_ipi(void) +{ + /* We take the ipi irq but and never return so we + * need to EOI the IPI, but want to leave our priority 0 + * + * should we check all the other interrupts too? + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + + icp_native_set_xirr((0x00 << 24) | XICS_IPI); +} + +static unsigned int icp_native_get_irq(void) +{ + unsigned int xirr = icp_native_get_xirr(); + unsigned int vec = xirr & 0x00ffffff; + unsigned int irq; + + if (vec == XICS_IRQ_SPURIOUS) + return NO_IRQ; + + irq = irq_radix_revmap_lookup(xics_host, vec); + if (likely(irq != NO_IRQ)) { + xics_push_cppr(vec); + return irq; + } + + /* We don't have a linux mapping, so have rtas mask it. */ + xics_mask_unknown_vec(vec); + + /* We might learn about it later, so EOI it */ + icp_native_set_xirr(xirr); + + return NO_IRQ; +} + +#ifdef CONFIG_SMP + +static void icp_native_cause_ipi(int cpu, unsigned long data) +{ + icp_native_set_qirr(cpu, IPI_PRIORITY); +} + +static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + + icp_native_set_qirr(cpu, 0xff); + + return smp_ipi_demux(); +} + +#endif /* CONFIG_SMP */ + +static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr, + unsigned long size) +{ + char *rname; + int i, cpu = -1; + + /* This may look gross but it's good enough for now, we don't quite + * have a hard -> linux processor id matching. + */ + for_each_possible_cpu(i) { + if (!cpu_present(i)) + continue; + if (hw_id == get_hard_smp_processor_id(i)) { + cpu = i; + break; + } + } + + /* Fail, skip that CPU. Don't print, it's normal, some XICS come up + * with way more entries in there than you have CPUs + */ + if (cpu == -1) + return 0; + + rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation", + cpu, hw_id); + + if (!request_mem_region(addr, size, rname)) { + pr_warning("icp_native: Could not reserve ICP MMIO" + " for CPU %d, interrupt server #0x%x\n", + cpu, hw_id); + return -EBUSY; + } + + icp_native_regs[cpu] = ioremap(addr, size); + if (!icp_native_regs[cpu]) { + pr_warning("icp_native: Failed ioremap for CPU %d, " + "interrupt server #0x%x, addr %#lx\n", + cpu, hw_id, addr); + release_mem_region(addr, size); + return -ENOMEM; + } + return 0; +} + +static int __init icp_native_init_one_node(struct device_node *np, + unsigned int *indx) +{ + unsigned int ilen; + const u32 *ireg; + int i; + int reg_tuple_size; + int num_servers = 0; + + /* This code does the theorically broken assumption that the interrupt + * server numbers are the same as the hard CPU numbers. + * This happens to be the case so far but we are playing with fire... + * should be fixed one of these days. -BenH. + */ + ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen); + + /* Do that ever happen ? we'll know soon enough... but even good'old + * f80 does have that property .. + */ + WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32))); + + if (ireg) { + *indx = of_read_number(ireg, 1); + if (ilen >= 2*sizeof(u32)) + num_servers = of_read_number(ireg + 1, 1); + } + + ireg = of_get_property(np, "reg", &ilen); + if (!ireg) { + pr_err("icp_native: Can't find interrupt reg property"); + return -1; + } + + reg_tuple_size = (of_n_addr_cells(np) + of_n_size_cells(np)) * 4; + if (((ilen % reg_tuple_size) != 0) + || (num_servers && (num_servers != (ilen / reg_tuple_size)))) { + pr_err("icp_native: ICP reg len (%d) != num servers (%d)", + ilen / reg_tuple_size, num_servers); + return -1; + } + + for (i = 0; i < (ilen / reg_tuple_size); i++) { + struct resource r; + int err; + + err = of_address_to_resource(np, i, &r); + if (err) { + pr_err("icp_native: Could not translate ICP MMIO" + " for interrupt server 0x%x (%d)\n", *indx, err); + return -1; + } + + if (icp_native_map_one_cpu(*indx, r.start, r.end - r.start)) + return -1; + + (*indx)++; + } + return 0; +} + +static const struct icp_ops icp_native_ops = { + .get_irq = icp_native_get_irq, + .eoi = icp_native_eoi, + .set_priority = icp_native_set_cpu_priority, + .teardown_cpu = icp_native_teardown_cpu, + .flush_ipi = icp_native_flush_ipi, +#ifdef CONFIG_SMP + .ipi_action = icp_native_ipi_action, + .cause_ipi = icp_native_cause_ipi, +#endif +}; + +int icp_native_init(void) +{ + struct device_node *np; + u32 indx = 0; + int found = 0; + + for_each_compatible_node(np, NULL, "ibm,ppc-xicp") + if (icp_native_init_one_node(np, &indx) == 0) + found = 1; + if (!found) { + for_each_node_by_type(np, + "PowerPC-External-Interrupt-Presentation") { + if (icp_native_init_one_node(np, &indx) == 0) + found = 1; + } + } + + if (found == 0) + return -ENODEV; + + icp_ops = &icp_native_ops; + + return 0; +} diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c new file mode 100644 index 000000000000..c782f85cf7e4 --- /dev/null +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -0,0 +1,240 @@ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/msi.h> + +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/xics.h> +#include <asm/rtas.h> + +/* RTAS service tokens */ +static int ibm_get_xive; +static int ibm_set_xive; +static int ibm_int_on; +static int ibm_int_off; + +static int ics_rtas_map(struct ics *ics, unsigned int virq); +static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec); +static long ics_rtas_get_server(struct ics *ics, unsigned long vec); +static int ics_rtas_host_match(struct ics *ics, struct device_node *node); + +/* Only one global & state struct ics */ +static struct ics ics_rtas = { + .map = ics_rtas_map, + .mask_unknown = ics_rtas_mask_unknown, + .get_server = ics_rtas_get_server, + .host_match = ics_rtas_host_match, +}; + +static void ics_rtas_unmask_irq(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + int call_status; + int server; + + pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq); + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return; + + server = xics_get_irq_server(d->irq, d->affinity, 0); + + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, + DEFAULT_PRIORITY); + if (call_status != 0) { + printk(KERN_ERR + "%s: ibm_set_xive irq %u server %x returned %d\n", + __func__, hw_irq, server, call_status); + return; + } + + /* Now unmask the interrupt (often a no-op) */ + call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq); + if (call_status != 0) { + printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", + __func__, hw_irq, call_status); + return; + } +} + +static unsigned int ics_rtas_startup(struct irq_data *d) +{ +#ifdef CONFIG_PCI_MSI + /* + * The generic MSI code returns with the interrupt disabled on the + * card, using the MSI mask bits. Firmware doesn't appear to unmask + * at that level, so we do it here by hand. + */ + if (d->msi_desc) + unmask_msi_irq(d); +#endif + /* unmask it */ + ics_rtas_unmask_irq(d); + return 0; +} + +static void ics_rtas_mask_real_irq(unsigned int hw_irq) +{ + int call_status; + + if (hw_irq == XICS_IPI) + return; + + call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq); + if (call_status != 0) { + printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", + __func__, hw_irq, call_status); + return; + } + + /* Have to set XIVE to 0xff to be able to remove a slot */ + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, + xics_default_server, 0xff); + if (call_status != 0) { + printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", + __func__, hw_irq, call_status); + return; + } +} + +static void ics_rtas_mask_irq(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + + pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq); + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return; + ics_rtas_mask_real_irq(hw_irq); +} + +static int ics_rtas_set_affinity(struct irq_data *d, + const struct cpumask *cpumask, + bool force) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + int status; + int xics_status[2]; + int irq_server; + + if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) + return -1; + + status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq); + + if (status) { + printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", + __func__, hw_irq, status); + return -1; + } + + irq_server = xics_get_irq_server(d->irq, cpumask, 1); + if (irq_server == -1) { + char cpulist[128]; + cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); + printk(KERN_WARNING + "%s: No online cpus in the mask %s for irq %d\n", + __func__, cpulist, d->irq); + return -1; + } + + status = rtas_call(ibm_set_xive, 3, 1, NULL, + hw_irq, irq_server, xics_status[1]); + + if (status) { + printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", + __func__, hw_irq, status); + return -1; + } + + return IRQ_SET_MASK_OK; +} + +static struct irq_chip ics_rtas_irq_chip = { + .name = "XICS", + .irq_startup = ics_rtas_startup, + .irq_mask = ics_rtas_mask_irq, + .irq_unmask = ics_rtas_unmask_irq, + .irq_eoi = NULL, /* Patched at init time */ + .irq_set_affinity = ics_rtas_set_affinity +}; + +static int ics_rtas_map(struct ics *ics, unsigned int virq) +{ + unsigned int hw_irq = (unsigned int)virq_to_hw(virq); + int status[2]; + int rc; + + if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)) + return -EINVAL; + + /* Check if RTAS knows about this interrupt */ + rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq); + if (rc) + return -ENXIO; + + irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq); + irq_set_chip_data(virq, &ics_rtas); + + return 0; +} + +static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec) +{ + ics_rtas_mask_real_irq(vec); +} + +static long ics_rtas_get_server(struct ics *ics, unsigned long vec) +{ + int rc, status[2]; + + rc = rtas_call(ibm_get_xive, 1, 3, status, vec); + if (rc) + return -1; + return status[0]; +} + +static int ics_rtas_host_match(struct ics *ics, struct device_node *node) +{ + /* IBM machines have interrupt parents of various funky types for things + * like vdevices, events, etc... The trick we use here is to match + * everything here except the legacy 8259 which is compatible "chrp,iic" + */ + return !of_device_is_compatible(node, "chrp,iic"); +} + +int ics_rtas_init(void) +{ + ibm_get_xive = rtas_token("ibm,get-xive"); + ibm_set_xive = rtas_token("ibm,set-xive"); + ibm_int_on = rtas_token("ibm,int-on"); + ibm_int_off = rtas_token("ibm,int-off"); + + /* We enable the RTAS "ICS" if RTAS is present with the + * appropriate tokens + */ + if (ibm_get_xive == RTAS_UNKNOWN_SERVICE || + ibm_set_xive == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + /* We need to patch our irq chip's EOI to point to the + * right ICP + */ + ics_rtas_irq_chip.irq_eoi = icp_ops->eoi; + + /* Register ourselves */ + xics_register_ics(&ics_rtas); + + return 0; +} + diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c new file mode 100644 index 000000000000..445c5a01b766 --- /dev/null +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -0,0 +1,443 @@ +/* + * Copyright 2011 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include <linux/types.h> +#include <linux/threads.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/debugfs.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/rtas.h> +#include <asm/xics.h> +#include <asm/firmware.h> + +/* Globals common to all ICP/ICS implementations */ +const struct icp_ops *icp_ops; + +unsigned int xics_default_server = 0xff; +unsigned int xics_default_distrib_server = 0; +unsigned int xics_interrupt_server_size = 8; + +DEFINE_PER_CPU(struct xics_cppr, xics_cppr); + +struct irq_host *xics_host; + +static LIST_HEAD(ics_list); + +void xics_update_irq_servers(void) +{ + int i, j; + struct device_node *np; + u32 ilen; + const u32 *ireg; + u32 hcpuid; + + /* Find the server numbers for the boot cpu. */ + np = of_get_cpu_node(boot_cpuid, NULL); + BUG_ON(!np); + + hcpuid = get_hard_smp_processor_id(boot_cpuid); + xics_default_server = xics_default_distrib_server = hcpuid; + + pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server); + + ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen); + if (!ireg) { + of_node_put(np); + return; + } + + i = ilen / sizeof(int); + + /* Global interrupt distribution server is specified in the last + * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last + * entry fom this property for current boot cpu id and use it as + * default distribution server + */ + for (j = 0; j < i; j += 2) { + if (ireg[j] == hcpuid) { + xics_default_distrib_server = ireg[j+1]; + break; + } + } + pr_devel("xics: xics_default_distrib_server = 0x%x\n", + xics_default_distrib_server); + of_node_put(np); +} + +/* GIQ stuff, currently only supported on RTAS setups, will have + * to be sorted properly for bare metal + */ +void xics_set_cpu_giq(unsigned int gserver, unsigned int join) +{ +#ifdef CONFIG_PPC_RTAS + int index; + int status; + + if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL)) + return; + + index = (1UL << xics_interrupt_server_size) - 1 - gserver; + + status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join); + + WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n", + GLOBAL_INTERRUPT_QUEUE, index, join, status); +#endif +} + +void xics_setup_cpu(void) +{ + icp_ops->set_priority(LOWEST_PRIORITY); + + xics_set_cpu_giq(xics_default_distrib_server, 1); +} + +void xics_mask_unknown_vec(unsigned int vec) +{ + struct ics *ics; + + pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec); + + list_for_each_entry(ics, &ics_list, link) + ics->mask_unknown(ics, vec); +} + + +#ifdef CONFIG_SMP + +static void xics_request_ipi(void) +{ + unsigned int ipi; + + ipi = irq_create_mapping(xics_host, XICS_IPI); + BUG_ON(ipi == NO_IRQ); + + /* + * IPIs are marked IRQF_DISABLED as they must run with irqs + * disabled, and PERCPU. The handler was set in map. + */ + BUG_ON(request_irq(ipi, icp_ops->ipi_action, + IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL)); +} + +int __init xics_smp_probe(void) +{ + /* Setup cause_ipi callback based on which ICP is used */ + smp_ops->cause_ipi = icp_ops->cause_ipi; + + /* Register all the IPIs */ + xics_request_ipi(); + + return cpumask_weight(cpu_possible_mask); +} + +#endif /* CONFIG_SMP */ + +void xics_teardown_cpu(void) +{ + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + + /* + * we have to reset the cppr index to 0 because we're + * not going to return from the IPI + */ + os_cppr->index = 0; + icp_ops->set_priority(0); + icp_ops->teardown_cpu(); +} + +void xics_kexec_teardown_cpu(int secondary) +{ + xics_teardown_cpu(); + + icp_ops->flush_ipi(); + + /* + * Some machines need to have at least one cpu in the GIQ, + * so leave the master cpu in the group. + */ + if (secondary) + xics_set_cpu_giq(xics_default_distrib_server, 0); +} + + +#ifdef CONFIG_HOTPLUG_CPU + +/* Interrupts are disabled. */ +void xics_migrate_irqs_away(void) +{ + int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id(); + unsigned int irq, virq; + + /* If we used to be the default server, move to the new "boot_cpuid" */ + if (hw_cpu == xics_default_server) + xics_update_irq_servers(); + + /* Reject any interrupt that was queued to us... */ + icp_ops->set_priority(0); + + /* Remove ourselves from the global interrupt queue */ + xics_set_cpu_giq(xics_default_distrib_server, 0); + + /* Allow IPIs again... */ + icp_ops->set_priority(DEFAULT_PRIORITY); + + for_each_irq(virq) { + struct irq_desc *desc; + struct irq_chip *chip; + long server; + unsigned long flags; + struct ics *ics; + + /* We can't set affinity on ISA interrupts */ + if (virq < NUM_ISA_INTERRUPTS) + continue; + if (!virq_is_host(virq, xics_host)) + continue; + irq = (unsigned int)virq_to_hw(virq); + /* We need to get IPIs still. */ + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) + continue; + desc = irq_to_desc(virq); + /* We only need to migrate enabled IRQS */ + if (!desc || !desc->action) + continue; + chip = irq_desc_get_chip(desc); + if (!chip || !chip->irq_set_affinity) + continue; + + raw_spin_lock_irqsave(&desc->lock, flags); + + /* Locate interrupt server */ + server = -1; + ics = irq_get_chip_data(virq); + if (ics) + server = ics->get_server(ics, irq); + if (server < 0) { + printk(KERN_ERR "%s: Can't find server for irq %d\n", + __func__, irq); + goto unlock; + } + + /* We only support delivery to all cpus or to one cpu. + * The irq has to be migrated only in the single cpu + * case. + */ + if (server != hw_cpu) + goto unlock; + + /* This is expected during cpu offline. */ + if (cpu_online(cpu)) + pr_warning("IRQ %u affinity broken off cpu %u\n", + virq, cpu); + + /* Reset affinity to all cpus */ + raw_spin_unlock_irqrestore(&desc->lock, flags); + irq_set_affinity(virq, cpu_all_mask); + continue; +unlock: + raw_spin_unlock_irqrestore(&desc->lock, flags); + } +} +#endif /* CONFIG_HOTPLUG_CPU */ + +#ifdef CONFIG_SMP +/* + * For the moment we only implement delivery to all cpus or one cpu. + * + * If the requested affinity is cpu_all_mask, we set global affinity. + * If not we set it to the first cpu in the mask, even if multiple cpus + * are set. This is so things like irqbalance (which set core and package + * wide affinities) do the right thing. + * + * We need to fix this to implement support for the links + */ +int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, + unsigned int strict_check) +{ + + if (!distribute_irqs) + return xics_default_server; + + if (!cpumask_subset(cpu_possible_mask, cpumask)) { + int server = cpumask_first_and(cpu_online_mask, cpumask); + + if (server < nr_cpu_ids) + return get_hard_smp_processor_id(server); + + if (strict_check) + return -1; + } + + /* + * Workaround issue with some versions of JS20 firmware that + * deliver interrupts to cpus which haven't been started. This + * happens when using the maxcpus= boot option. + */ + if (cpumask_equal(cpu_online_mask, cpu_present_mask)) + return xics_default_distrib_server; + + return xics_default_server; +} +#endif /* CONFIG_SMP */ + +static int xics_host_match(struct irq_host *h, struct device_node *node) +{ + struct ics *ics; + + list_for_each_entry(ics, &ics_list, link) + if (ics->host_match(ics, node)) + return 1; + + return 0; +} + +/* Dummies */ +static void xics_ipi_unmask(struct irq_data *d) { } +static void xics_ipi_mask(struct irq_data *d) { } + +static struct irq_chip xics_ipi_chip = { + .name = "XICS", + .irq_eoi = NULL, /* Patched at init time */ + .irq_mask = xics_ipi_mask, + .irq_unmask = xics_ipi_unmask, +}; + +static int xics_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct ics *ics; + + pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); + + /* Insert the interrupt mapping into the radix tree for fast lookup */ + irq_radix_revmap_insert(xics_host, virq, hw); + + /* They aren't all level sensitive but we just don't really know */ + irq_set_status_flags(virq, IRQ_LEVEL); + + /* Don't call into ICS for IPIs */ + if (hw == XICS_IPI) { + irq_set_chip_and_handler(virq, &xics_ipi_chip, + handle_percpu_irq); + return 0; + } + + /* Let the ICS setup the chip data */ + list_for_each_entry(ics, &ics_list, link) + if (ics->map(ics, virq) == 0) + return 0; + + return -EINVAL; +} + +static int xics_host_xlate(struct irq_host *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + /* Current xics implementation translates everything + * to level. It is not technically right for MSIs but this + * is irrelevant at this point. We might get smarter in the future + */ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_LOW; + + return 0; +} + +static struct irq_host_ops xics_host_ops = { + .match = xics_host_match, + .map = xics_host_map, + .xlate = xics_host_xlate, +}; + +static void __init xics_init_host(void) +{ + xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops, + XICS_IRQ_SPURIOUS); + BUG_ON(xics_host == NULL); + irq_set_default_host(xics_host); +} + +void __init xics_register_ics(struct ics *ics) +{ + list_add(&ics->link, &ics_list); +} + +static void __init xics_get_server_size(void) +{ + struct device_node *np; + const u32 *isize; + + /* We fetch the interrupt server size from the first ICS node + * we find if any + */ + np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics"); + if (!np) + return; + isize = of_get_property(np, "ibm,interrupt-server#-size", NULL); + if (!isize) + return; + xics_interrupt_server_size = *isize; + of_node_put(np); +} + +void __init xics_init(void) +{ + int rc = -1; + + /* Fist locate ICP */ +#ifdef CONFIG_PPC_ICP_HV + if (firmware_has_feature(FW_FEATURE_LPAR)) + rc = icp_hv_init(); +#endif +#ifdef CONFIG_PPC_ICP_NATIVE + if (rc < 0) + rc = icp_native_init(); +#endif + if (rc < 0) { + pr_warning("XICS: Cannot find a Presentation Controller !\n"); + return; + } + + /* Copy get_irq callback over to ppc_md */ + ppc_md.get_irq = icp_ops->get_irq; + + /* Patch up IPI chip EOI */ + xics_ipi_chip.irq_eoi = icp_ops->eoi; + + /* Now locate ICS */ +#ifdef CONFIG_PPC_ICS_RTAS + rc = ics_rtas_init(); +#endif + if (rc < 0) + pr_warning("XICS: Cannot find a Source Controller !\n"); + + /* Initialize common bits */ + xics_get_server_size(); + xics_update_irq_servers(); + xics_init_host(); + xics_setup_cpu(); +} diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 0a13fc19e287..6183799754af 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -71,7 +71,7 @@ static unsigned char xilinx_intc_map_senses[] = { */ static void xilinx_intc_mask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void * regs = irq_data_get_irq_chip_data(d); pr_debug("mask: %d\n", irq); out_be32(regs + XINTC_CIE, 1 << irq); @@ -87,7 +87,7 @@ static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type) */ static void xilinx_intc_level_unmask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void * regs = irq_data_get_irq_chip_data(d); pr_debug("unmask: %d\n", irq); out_be32(regs + XINTC_SIE, 1 << irq); @@ -112,7 +112,7 @@ static struct irq_chip xilinx_intc_level_irqchip = { */ static void xilinx_intc_edge_unmask(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void *regs = irq_data_get_irq_chip_data(d); pr_debug("unmask: %d\n", irq); out_be32(regs + XINTC_SIE, 1 << irq); @@ -120,7 +120,7 @@ static void xilinx_intc_edge_unmask(struct irq_data *d) static void xilinx_intc_edge_ack(struct irq_data *d) { - int irq = virq_to_hw(d->irq); + int irq = irqd_to_hwirq(d); void * regs = irq_data_get_irq_chip_data(d); pr_debug("ack: %d\n", irq); out_be32(regs + XINTC_IAR, 1 << irq); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 33794c1d92c3..42541bbcc7fa 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -334,7 +334,7 @@ static void release_output_lock(void) int cpus_are_in_xmon(void) { - return !cpus_empty(cpus_in_xmon); + return !cpumask_empty(&cpus_in_xmon); } #endif @@ -373,7 +373,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) #ifdef CONFIG_SMP cpu = smp_processor_id(); - if (cpu_isset(cpu, cpus_in_xmon)) { + if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { get_output_lock(); excprint(regs); printf("cpu 0x%x: Exception %lx %s in xmon, " @@ -396,10 +396,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } xmon_fault_jmp[cpu] = recurse_jmp; - cpu_set(cpu, cpus_in_xmon); + cpumask_set_cpu(cpu, &cpus_in_xmon); bp = NULL; - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) bp = at_breakpoint(regs->nip); if (bp || unrecoverable_excp(regs)) fromipi = 0; @@ -437,10 +437,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi) xmon_owner = cpu; mb(); if (ncpus > 1) { - smp_send_debugger_break(MSG_ALL_BUT_SELF); + smp_send_debugger_break(); /* wait for other cpus to come in */ for (timeout = 100000000; timeout != 0; --timeout) { - if (cpus_weight(cpus_in_xmon) >= ncpus) + if (cpumask_weight(&cpus_in_xmon) >= ncpus) break; barrier(); } @@ -484,7 +484,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } } leave: - cpu_clear(cpu, cpus_in_xmon); + cpumask_clear_cpu(cpu, &cpus_in_xmon); xmon_fault_jmp[cpu] = NULL; #else /* UP is simple... */ @@ -529,7 +529,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } } #else - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) { + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) { bp = at_breakpoint(regs->nip); if (bp != NULL) { int stepped = emulate_step(regs, bp->instr[0]); @@ -578,7 +578,7 @@ static int xmon_bpt(struct pt_regs *regs) struct bpt *bp; unsigned long offset; - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; /* Are we at the trap at bp->instr[1] for some bp? */ @@ -609,7 +609,7 @@ static int xmon_sstep(struct pt_regs *regs) static int xmon_dabr_match(struct pt_regs *regs) { - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; if (dabr.enabled == 0) return 0; @@ -619,7 +619,7 @@ static int xmon_dabr_match(struct pt_regs *regs) static int xmon_iabr_match(struct pt_regs *regs) { - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF)) + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; if (iabr == NULL) return 0; @@ -630,7 +630,7 @@ static int xmon_iabr_match(struct pt_regs *regs) static int xmon_ipi(struct pt_regs *regs) { #ifdef CONFIG_SMP - if (in_xmon && !cpu_isset(smp_processor_id(), cpus_in_xmon)) + if (in_xmon && !cpumask_test_cpu(smp_processor_id(), &cpus_in_xmon)) xmon_core(regs, 1); #endif return 0; @@ -644,7 +644,7 @@ static int xmon_fault_handler(struct pt_regs *regs) if (in_xmon && catch_memory_errors) handle_fault(regs); /* doesn't return */ - if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) { + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) { bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { regs->nip = bp->address + offset; @@ -929,7 +929,7 @@ static int do_step(struct pt_regs *regs) int stepped; /* check we are in 64-bit kernel mode, translation enabled */ - if ((regs->msr & (MSR_SF|MSR_PR|MSR_IR)) == (MSR_SF|MSR_IR)) { + if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) { if (mread(regs->nip, &instr, 4) == 4) { stepped = emulate_step(regs, instr); if (stepped < 0) { @@ -976,7 +976,7 @@ static int cpu_cmd(void) printf("cpus stopped:"); count = 0; for (cpu = 0; cpu < NR_CPUS; ++cpu) { - if (cpu_isset(cpu, cpus_in_xmon)) { + if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { if (count == 0) printf(" %x", cpu); ++count; @@ -992,7 +992,7 @@ static int cpu_cmd(void) return 0; } /* try to switch to cpu specified */ - if (!cpu_isset(cpu, cpus_in_xmon)) { + if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) { printf("cpu 0x%x isn't in xmon\n", cpu); return 0; } @@ -1497,6 +1497,10 @@ static void prregs(struct pt_regs *fp) #endif printf("pc = "); xmon_print_symbol(fp->nip, " ", "\n"); + if (TRAP(fp) != 0xc00 && cpu_has_feature(CPU_FTR_CFAR)) { + printf("cfar= "); + xmon_print_symbol(fp->orig_gpr3, " ", "\n"); + } printf("lr = "); xmon_print_symbol(fp->link, " ", "\n"); printf("msr = "REG" cr = %.8lx\n", fp->msr, fp->ccr); @@ -2663,7 +2667,7 @@ static void dump_stab(void) void dump_segments(void) { - if (cpu_has_feature(CPU_FTR_SLB)) + if (mmu_has_feature(MMU_FTR_SLB)) dump_slb(); else dump_stab(); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2508a6f31588..4a7f14079e03 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -88,6 +88,7 @@ config S390 select HAVE_KERNEL_XZ select HAVE_GET_USER_PAGES_FAST select HAVE_ARCH_MUTEX_CPU_RELAX + select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 1cf81d77c5a5..7f0b7cda6259 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o +obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 58f46734465f..a9ce135893f8 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -31,7 +31,8 @@ #define AES_KEYLEN_192 2 #define AES_KEYLEN_256 4 -static char keylen_flag = 0; +static u8 *ctrblk; +static char keylen_flag; struct s390_aes_ctx { u8 iv[AES_BLOCK_SIZE]; @@ -45,6 +46,24 @@ struct s390_aes_ctx { } fallback; }; +struct pcc_param { + u8 key[32]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; +}; + +struct s390_xts_ctx { + u8 key[32]; + u8 xts_param[16]; + struct pcc_param pcc; + long enc; + long dec; + int key_len; + struct crypto_blkcipher *fallback; +}; + /* * Check if the key_len is supported by the HW. * Returns 0 if it is, a positive number if it is not and software fallback is @@ -504,15 +523,337 @@ static struct crypto_alg cbc_aes_alg = { } }; +static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + unsigned int ret; + + xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int xts_fallback_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_fallback_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + switch (key_len) { + case 32: + xts_ctx->enc = KM_XTS_128_ENCRYPT; + xts_ctx->dec = KM_XTS_128_DECRYPT; + memcpy(xts_ctx->key + 16, in_key, 16); + memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16); + break; + case 48: + xts_ctx->enc = 0; + xts_ctx->dec = 0; + xts_fallback_setkey(tfm, in_key, key_len); + break; + case 64: + xts_ctx->enc = KM_XTS_256_ENCRYPT; + xts_ctx->dec = KM_XTS_256_DECRYPT; + memcpy(xts_ctx->key, in_key, 32); + memcpy(xts_ctx->pcc.key, in_key + 32, 32); + break; + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + xts_ctx->key_len = key_len; + return 0; +} + +static int xts_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_xts_ctx *xts_ctx, + struct blkcipher_walk *walk) +{ + unsigned int offset = (xts_ctx->key_len >> 1) & 0x10; + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + unsigned int n; + u8 *in, *out; + void *param; + + if (!nbytes) + goto out; + + memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block)); + memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit)); + memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts)); + memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak)); + param = xts_ctx->pcc.key + offset; + ret = crypt_s390_pcc(func, param); + BUG_ON(ret < 0); + + memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16); + param = xts_ctx->key + offset; + do { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + + ret = crypt_s390_km(func, param, out, in, n); + BUG_ON(ret < 0 || ret != n); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); +out: + return ret; +} + +static int xts_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_encrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk); +} + +static int xts_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_decrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk); +} + +static int xts_fallback_init(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + xts_ctx->fallback = crypto_alloc_blkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(xts_ctx->fallback)) { + pr_err("Allocating XTS fallback algorithm %s failed\n", + name); + return PTR_ERR(xts_ctx->fallback); + } + return 0; +} + +static void xts_fallback_exit(struct crypto_tfm *tfm) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + crypto_free_blkcipher(xts_ctx->fallback); + xts_ctx->fallback = NULL; +} + +static struct crypto_alg xts_aes_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_xts_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(xts_aes_alg.cra_list), + .cra_init = xts_fallback_init, + .cra_exit = xts_fallback_exit, + .cra_u = { + .blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aes_set_key, + .encrypt = xts_aes_encrypt, + .decrypt = xts_aes_decrypt, + } + } +}; + +static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + switch (key_len) { + case 16: + sctx->enc = KMCTR_AES_128_ENCRYPT; + sctx->dec = KMCTR_AES_128_DECRYPT; + break; + case 24: + sctx->enc = KMCTR_AES_192_ENCRYPT; + sctx->dec = KMCTR_AES_192_DECRYPT; + break; + case 32: + sctx->enc = KMCTR_AES_256_ENCRYPT; + sctx->dec = KMCTR_AES_256_DECRYPT; + break; + } + + return aes_set_key(tfm, in_key, key_len); +} + +static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_aes_ctx *sctx, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); + unsigned int i, n, nbytes; + u8 buf[AES_BLOCK_SIZE]; + u8 *out, *in; + + if (!walk->nbytes) + return ret; + + memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= AES_BLOCK_SIZE) { + /* only use complete blocks, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : + nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { + memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrblk + i, AES_BLOCK_SIZE); + } + ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk); + BUG_ON(ret < 0 || ret != n); + if (n > AES_BLOCK_SIZE) + memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrblk, AES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + /* + * final block may be < AES_BLOCK_SIZE, copy only nbytes + */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, sctx->key, buf, in, + AES_BLOCK_SIZE, ctrblk); + BUG_ON(ret < 0 || ret != AES_BLOCK_SIZE); + memcpy(out, buf, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + } + memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE); + return ret; +} + +static int ctr_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->enc, sctx, &walk); +} + +static int ctr_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->dec, sctx, &walk); +} + +static struct crypto_alg ctr_aes_alg = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_aes_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_aes_set_key, + .encrypt = ctr_aes_encrypt, + .decrypt = ctr_aes_decrypt, + } + } +}; + static int __init aes_s390_init(void) { int ret; - if (crypt_s390_func_available(KM_AES_128_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_128; - if (crypt_s390_func_available(KM_AES_192_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_192; - if (crypt_s390_func_available(KM_AES_256_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_256; if (!keylen_flag) @@ -535,9 +876,40 @@ static int __init aes_s390_init(void) if (ret) goto cbc_aes_err; + if (crypt_s390_func_available(KM_XTS_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KM_XTS_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&xts_aes_alg); + if (ret) + goto xts_aes_err; + } + + if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_aes_err; + } + ret = crypto_register_alg(&ctr_aes_alg); + if (ret) { + free_page((unsigned long) ctrblk); + goto ctr_aes_err; + } + } + out: return ret; +ctr_aes_err: + crypto_unregister_alg(&xts_aes_alg); +xts_aes_err: + crypto_unregister_alg(&cbc_aes_alg); cbc_aes_err: crypto_unregister_alg(&ecb_aes_alg); ecb_aes_err: @@ -548,6 +920,9 @@ aes_err: static void __exit aes_s390_fini(void) { + crypto_unregister_alg(&ctr_aes_alg); + free_page((unsigned long) ctrblk); + crypto_unregister_alg(&xts_aes_alg); crypto_unregister_alg(&cbc_aes_alg); crypto_unregister_alg(&ecb_aes_alg); crypto_unregister_alg(&aes_alg); diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 7ee9a1b4ad9f..49676771bd66 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -24,13 +24,18 @@ #define CRYPT_S390_PRIORITY 300 #define CRYPT_S390_COMPOSITE_PRIORITY 400 +#define CRYPT_S390_MSA 0x1 +#define CRYPT_S390_MSA3 0x2 +#define CRYPT_S390_MSA4 0x4 + /* s390 cryptographic operations */ enum crypt_s390_operations { CRYPT_S390_KM = 0x0100, CRYPT_S390_KMC = 0x0200, CRYPT_S390_KIMD = 0x0300, CRYPT_S390_KLMD = 0x0400, - CRYPT_S390_KMAC = 0x0500 + CRYPT_S390_KMAC = 0x0500, + CRYPT_S390_KMCTR = 0x0600 }; /* @@ -51,6 +56,10 @@ enum crypt_s390_km_func { KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80, KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14, KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80, + KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32, + KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80, + KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34, + KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80, }; /* @@ -75,6 +84,26 @@ enum crypt_s390_kmc_func { }; /* + * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER) + * instruction + */ +enum crypt_s390_kmctr_func { + KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0, + KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1, + KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80, + KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2, + KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80, + KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3, + KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80, + KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12, + KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80, + KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13, + KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80, + KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14, + KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80, +}; + +/* * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) * instruction */ @@ -83,6 +112,7 @@ enum crypt_s390_kimd_func { KIMD_SHA_1 = CRYPT_S390_KIMD | 1, KIMD_SHA_256 = CRYPT_S390_KIMD | 2, KIMD_SHA_512 = CRYPT_S390_KIMD | 3, + KIMD_GHASH = CRYPT_S390_KIMD | 65, }; /* @@ -284,6 +314,45 @@ static inline int crypt_s390_kmac(long func, void *param, } /** + * crypt_s390_kmctr: + * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * @counter: address of counter value + * + * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs + */ +static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, + const u8 *src, long src_len, u8 *counter) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; + register u8 *__ctr asm("6") = counter; + int ret = -1; + + asm volatile( + "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), + "+a" (__ctr) + : "d" (__func), "a" (__param) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** * crypt_s390_func_available: * @func: the function code of the specific function; 0 if op in general * @@ -291,13 +360,17 @@ static inline int crypt_s390_kmac(long func, void *param, * * Returns 1 if func available; 0 if func or op in general not available */ -static inline int crypt_s390_func_available(int func) +static inline int crypt_s390_func_available(int func, + unsigned int facility_mask) { unsigned char status[16]; int ret; - /* check if CPACF facility (bit 17) is available */ - if (!test_facility(17)) + if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) + return 0; + if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76)) + return 0; + if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) return 0; switch (func & CRYPT_S390_OP_MASK) { @@ -316,6 +389,10 @@ static inline int crypt_s390_func_available(int func) case CRYPT_S390_KMAC: ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); break; + case CRYPT_S390_KMCTR: + ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0, + NULL); + break; default: return 0; } @@ -326,4 +403,31 @@ static inline int crypt_s390_func_available(int func) return (status[func >> 3] & (0x80 >> (func & 7))) != 0; } +/** + * crypt_s390_pcc: + * @func: the function code passed to KM; see crypt_s390_km_func + * @param: address of parameter block; see POP for details on each func + * + * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU. + * + * Returns -1 for failure, 0 for success. + */ +static inline int crypt_s390_pcc(long func, void *param) +{ + register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */ + register void *__param asm("1") = param; + int ret = -1; + + asm volatile( + "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret) + : "d" (__func), "a" (__param) : "cc", "memory"); + return ret; +} + + #endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/des_check_key.c b/arch/s390/crypto/des_check_key.c deleted file mode 100644 index 5706af266442..000000000000 --- a/arch/s390/crypto/des_check_key.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Cryptographic API. - * - * Function for checking keys for the DES and Tripple DES Encryption - * algorithms. - * - * Originally released as descore by Dana L. How <how@isl.stanford.edu>. - * Modified by Raimar Falke <rf13@inf.tu-dresden.de> for the Linux-Kernel. - * Derived from Cryptoapi and Nettle implementations, adapted for in-place - * scatterlist interface. Changed LGPL to GPL per section 3 of the LGPL. - * - * s390 Version: - * Copyright IBM Corp. 2003 - * Author(s): Thomas Spatzier - * Jan Glauber (jan.glauber@de.ibm.com) - * - * Derived from "crypto/des.c" - * Copyright (c) 1992 Dana L. How. - * Copyright (c) Raimar Falke <rf13@inf.tu-dresden.de> - * Copyright (c) Gisle Sflensminde <gisle@ii.uib.no> - * Copyright (C) 2001 Niels Mvller. - * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/crypto.h> -#include "crypto_des.h" - -#define ROR(d,c,o) ((d) = (d) >> (c) | (d) << (o)) - -static const u8 parity[] = { - 8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8, -}; - -/* - * RFC2451: Weak key checks SHOULD be performed. - */ -int -crypto_des_check_key(const u8 *key, unsigned int keylen, u32 *flags) -{ - u32 n, w; - - n = parity[key[0]]; n <<= 4; - n |= parity[key[1]]; n <<= 4; - n |= parity[key[2]]; n <<= 4; - n |= parity[key[3]]; n <<= 4; - n |= parity[key[4]]; n <<= 4; - n |= parity[key[5]]; n <<= 4; - n |= parity[key[6]]; n <<= 4; - n |= parity[key[7]]; - w = 0x88888888L; - - if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY) - && !((n - (w >> 3)) & w)) { /* 1 in 10^10 keys passes this test */ - if (n < 0x41415151) { - if (n < 0x31312121) { - if (n < 0x14141515) { - /* 01 01 01 01 01 01 01 01 */ - if (n == 0x11111111) goto weak; - /* 01 1F 01 1F 01 0E 01 0E */ - if (n == 0x13131212) goto weak; - } else { - /* 01 E0 01 E0 01 F1 01 F1 */ - if (n == 0x14141515) goto weak; - /* 01 FE 01 FE 01 FE 01 FE */ - if (n == 0x16161616) goto weak; - } - } else { - if (n < 0x34342525) { - /* 1F 01 1F 01 0E 01 0E 01 */ - if (n == 0x31312121) goto weak; - /* 1F 1F 1F 1F 0E 0E 0E 0E (?) */ - if (n == 0x33332222) goto weak; - } else { - /* 1F E0 1F E0 0E F1 0E F1 */ - if (n == 0x34342525) goto weak; - /* 1F FE 1F FE 0E FE 0E FE */ - if (n == 0x36362626) goto weak; - } - } - } else { - if (n < 0x61616161) { - if (n < 0x44445555) { - /* E0 01 E0 01 F1 01 F1 01 */ - if (n == 0x41415151) goto weak; - /* E0 1F E0 1F F1 0E F1 0E */ - if (n == 0x43435252) goto weak; - } else { - /* E0 E0 E0 E0 F1 F1 F1 F1 (?) */ - if (n == 0x44445555) goto weak; - /* E0 FE E0 FE F1 FE F1 FE */ - if (n == 0x46465656) goto weak; - } - } else { - if (n < 0x64646565) { - /* FE 01 FE 01 FE 01 FE 01 */ - if (n == 0x61616161) goto weak; - /* FE 1F FE 1F FE 0E FE 0E */ - if (n == 0x63636262) goto weak; - } else { - /* FE E0 FE E0 FE F1 FE F1 */ - if (n == 0x64646565) goto weak; - /* FE FE FE FE FE FE FE FE */ - if (n == 0x66666666) goto weak; - } - } - } - } - return 0; -weak: - *flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; -} - -EXPORT_SYMBOL(crypto_des_check_key); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Key Check function for DES & DES3 Cipher Algorithms"); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index cc5420118393..a52bfd124d86 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -3,7 +3,7 @@ * * s390 implementation of the DES Cipher Algorithm. * - * Copyright IBM Corp. 2003,2007 + * Copyright IBM Corp. 2003,2011 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) * @@ -22,22 +22,19 @@ #include "crypt_s390.h" -#define DES3_192_KEY_SIZE (3 * DES_KEY_SIZE) +#define DES3_KEY_SIZE (3 * DES_KEY_SIZE) -struct crypt_s390_des_ctx { - u8 iv[DES_BLOCK_SIZE]; - u8 key[DES_KEY_SIZE]; -}; +static u8 *ctrblk; -struct crypt_s390_des3_192_ctx { +struct s390_des_ctx { u8 iv[DES_BLOCK_SIZE]; - u8 key[DES3_192_KEY_SIZE]; + u8 key[DES3_KEY_SIZE]; }; static int des_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) + unsigned int key_len) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; u32 tmp[DES_EXPKEY_WORDS]; @@ -47,22 +44,22 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key, return -EINVAL; } - memcpy(dctx->key, key, keylen); + memcpy(ctx->key, key, key_len); return 0; } static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, out, in, DES_BLOCK_SIZE); + crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE); } static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE); + crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE); } static struct crypto_alg des_alg = { @@ -71,7 +68,7 @@ static struct crypto_alg des_alg = { .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(des_alg.cra_list), .cra_u = { @@ -86,7 +83,7 @@ static struct crypto_alg des_alg = { }; static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, - void *param, struct blkcipher_walk *walk) + u8 *key, struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes; @@ -97,7 +94,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_km(func, param, out, in, n); + ret = crypt_s390_km(func, key, out, in, n); BUG_ON((ret < 0) || (ret != n)); nbytes &= DES_BLOCK_SIZE - 1; @@ -108,7 +105,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, } static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, - void *param, struct blkcipher_walk *walk) + u8 *iv, struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes = walk->nbytes; @@ -116,20 +113,20 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, if (!nbytes) goto out; - memcpy(param, walk->iv, DES_BLOCK_SIZE); + memcpy(iv, walk->iv, DES_BLOCK_SIZE); do { /* only use complete blocks */ unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, param, out, in, n); + ret = crypt_s390_kmc(func, iv, out, in, n); BUG_ON((ret < 0) || (ret != n)); nbytes &= DES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); } while ((nbytes = walk->nbytes)); - memcpy(walk->iv, param, DES_BLOCK_SIZE); + memcpy(walk->iv, iv, DES_BLOCK_SIZE); out: return ret; @@ -139,22 +136,22 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk); } static int ecb_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk); } static struct crypto_alg ecb_des_alg = { @@ -163,7 +160,7 @@ static struct crypto_alg ecb_des_alg = { .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(ecb_des_alg.cra_list), @@ -182,22 +179,22 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk); } static int cbc_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk); } static struct crypto_alg cbc_des_alg = { @@ -206,7 +203,7 @@ static struct crypto_alg cbc_des_alg = { .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(cbc_des_alg.cra_list), @@ -235,10 +232,10 @@ static struct crypto_alg cbc_des_alg = { * property. * */ -static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && @@ -248,141 +245,276 @@ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key, *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } - memcpy(dctx->key, key, keylen); + memcpy(ctx->key, key, key_len); return 0; } -static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src, - DES_BLOCK_SIZE); + crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); } -static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src, - DES_BLOCK_SIZE); + crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); } -static struct crypto_alg des3_192_alg = { +static struct crypto_alg des3_alg = { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-s390", .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(des3_192_alg.cra_list), + .cra_list = LIST_HEAD_INIT(des3_alg.cra_list), .cra_u = { .cipher = { - .cia_min_keysize = DES3_192_KEY_SIZE, - .cia_max_keysize = DES3_192_KEY_SIZE, - .cia_setkey = des3_192_setkey, - .cia_encrypt = des3_192_encrypt, - .cia_decrypt = des3_192_decrypt, + .cia_min_keysize = DES3_KEY_SIZE, + .cia_max_keysize = DES3_KEY_SIZE, + .cia_setkey = des3_setkey, + .cia_encrypt = des3_encrypt, + .cia_decrypt = des3_decrypt, } } }; -static int ecb_des3_192_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk); } -static int ecb_des3_192_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk); } -static struct crypto_alg ecb_des3_192_alg = { +static struct crypto_alg ecb_des3_alg = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "ecb-des3_ede-s390", .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT( - ecb_des3_192_alg.cra_list), + ecb_des3_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = DES3_192_KEY_SIZE, - .max_keysize = DES3_192_KEY_SIZE, - .setkey = des3_192_setkey, - .encrypt = ecb_des3_192_encrypt, - .decrypt = ecb_des3_192_decrypt, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .setkey = des3_setkey, + .encrypt = ecb_des3_encrypt, + .decrypt = ecb_des3_decrypt, } } }; -static int cbc_des3_192_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk); } -static int cbc_des3_192_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk); } -static struct crypto_alg cbc_des3_192_alg = { +static struct crypto_alg cbc_des3_alg = { .cra_name = "cbc(des3_ede)", .cra_driver_name = "cbc-des3_ede-s390", .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT( - cbc_des3_192_alg.cra_list), + cbc_des3_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = DES3_192_KEY_SIZE, - .max_keysize = DES3_192_KEY_SIZE, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, .ivsize = DES_BLOCK_SIZE, - .setkey = des3_192_setkey, - .encrypt = cbc_des3_192_encrypt, - .decrypt = cbc_des3_192_decrypt, + .setkey = des3_setkey, + .encrypt = cbc_des3_encrypt, + .decrypt = cbc_des3_decrypt, } } }; -static int des_s390_init(void) +static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, + struct s390_des_ctx *ctx, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); + unsigned int i, n, nbytes; + u8 buf[DES_BLOCK_SIZE]; + u8 *out, *in; + + memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= DES_BLOCK_SIZE) { + /* align to block size, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : + nbytes & ~(DES_BLOCK_SIZE - 1); + for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { + memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(ctrblk + i, DES_BLOCK_SIZE); + } + ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk); + BUG_ON((ret < 0) || (ret != n)); + if (n > DES_BLOCK_SIZE) + memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(ctrblk, DES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + + /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, ctx->key, buf, in, + DES_BLOCK_SIZE, ctrblk); + BUG_ON(ret < 0 || ret != DES_BLOCK_SIZE); + memcpy(out, buf, nbytes); + crypto_inc(ctrblk, DES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + } + memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE); + return ret; +} + +static int ctr_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk); +} + +static int ctr_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des_alg = { + .cra_name = "ctr(des)", + .cra_driver_name = "ctr-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_des_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey, + .encrypt = ctr_des_encrypt, + .decrypt = ctr_des_decrypt, + } + } +}; + +static int ctr_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk); +} + +static int ctr_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des3_alg = { + .cra_name = "ctr(des3_ede)", + .cra_driver_name = "ctr-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_des3_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey, + .encrypt = ctr_des3_encrypt, + .decrypt = ctr_des3_decrypt, + } + } +}; + +static int __init des_s390_init(void) { int ret; - if (!crypt_s390_func_available(KM_DEA_ENCRYPT) || - !crypt_s390_func_available(KM_TDEA_192_ENCRYPT)) + if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) || + !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA)) return -EOPNOTSUPP; ret = crypto_register_alg(&des_alg); @@ -394,23 +526,46 @@ static int des_s390_init(void) ret = crypto_register_alg(&cbc_des_alg); if (ret) goto cbc_des_err; - ret = crypto_register_alg(&des3_192_alg); + ret = crypto_register_alg(&des3_alg); if (ret) - goto des3_192_err; - ret = crypto_register_alg(&ecb_des3_192_alg); + goto des3_err; + ret = crypto_register_alg(&ecb_des3_alg); if (ret) - goto ecb_des3_192_err; - ret = crypto_register_alg(&cbc_des3_192_alg); + goto ecb_des3_err; + ret = crypto_register_alg(&cbc_des3_alg); if (ret) - goto cbc_des3_192_err; + goto cbc_des3_err; + + if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&ctr_des_alg); + if (ret) + goto ctr_des_err; + ret = crypto_register_alg(&ctr_des3_alg); + if (ret) + goto ctr_des3_err; + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_mem_err; + } + } out: return ret; -cbc_des3_192_err: - crypto_unregister_alg(&ecb_des3_192_alg); -ecb_des3_192_err: - crypto_unregister_alg(&des3_192_alg); -des3_192_err: +ctr_mem_err: + crypto_unregister_alg(&ctr_des3_alg); +ctr_des3_err: + crypto_unregister_alg(&ctr_des_alg); +ctr_des_err: + crypto_unregister_alg(&cbc_des3_alg); +cbc_des3_err: + crypto_unregister_alg(&ecb_des3_alg); +ecb_des3_err: + crypto_unregister_alg(&des3_alg); +des3_err: crypto_unregister_alg(&cbc_des_alg); cbc_des_err: crypto_unregister_alg(&ecb_des_alg); @@ -422,9 +577,14 @@ des_err: static void __exit des_s390_exit(void) { - crypto_unregister_alg(&cbc_des3_192_alg); - crypto_unregister_alg(&ecb_des3_192_alg); - crypto_unregister_alg(&des3_192_alg); + if (ctrblk) { + crypto_unregister_alg(&ctr_des_alg); + crypto_unregister_alg(&ctr_des3_alg); + free_page((unsigned long) ctrblk); + } + crypto_unregister_alg(&cbc_des3_alg); + crypto_unregister_alg(&ecb_des3_alg); + crypto_unregister_alg(&des3_alg); crypto_unregister_alg(&cbc_des_alg); crypto_unregister_alg(&ecb_des_alg); crypto_unregister_alg(&des_alg); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c new file mode 100644 index 000000000000..b1bd170f24b1 --- /dev/null +++ b/arch/s390/crypto/ghash_s390.c @@ -0,0 +1,162 @@ +/* + * Cryptographic API. + * + * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode). + * + * Copyright IBM Corp. 2011 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <crypto/internal/hash.h> +#include <linux/module.h> + +#include "crypt_s390.h" + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_ctx { + u8 icv[16]; + u8 key[16]; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, GHASH_BLOCK_SIZE); + memset(ctx->icv, 0, GHASH_BLOCK_SIZE); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + unsigned int n; + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + n = min(srclen, dctx->bytes); + dctx->bytes -= n; + srclen -= n; + + memcpy(pos, src, n); + src += n; + + if (!dctx->bytes) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, + GHASH_BLOCK_SIZE); + BUG_ON(ret != GHASH_BLOCK_SIZE); + } + } + + n = srclen & ~(GHASH_BLOCK_SIZE - 1); + if (n) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); + BUG_ON(ret != n); + src += n; + srclen -= n; + } + + if (srclen) { + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + memcpy(buf, src, srclen); + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + memset(pos, 0, dctx->bytes); + + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); + BUG_ON(ret != GHASH_BLOCK_SIZE); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + + ghash_flush(ctx, dctx); + memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), + }, +}; + +static int __init ghash_mod_init(void) +{ + if (!crypt_s390_func_available(KIMD_GHASH, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) + return -EOPNOTSUPP; + + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_mod_init); +module_exit(ghash_mod_exit); + +MODULE_ALIAS("ghash"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 8b16c479585b..0808fbf0f7d3 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -166,7 +166,7 @@ static int __init prng_init(void) int ret; /* check if the CPU has a PRNG */ - if (!crypt_s390_func_available(KMC_PRNG)) + if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) return -EOPNOTSUPP; if (prng_chunk_size < 8) diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index f6de7826c979..e9868c6e0a08 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -90,7 +90,7 @@ static struct shash_alg alg = { static int __init sha1_s390_init(void) { - if (!crypt_s390_func_available(KIMD_SHA_1)) + if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA)) return -EOPNOTSUPP; return crypto_register_shash(&alg); } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 61a7db372121..5ed8d64fc2ed 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -86,7 +86,7 @@ static struct shash_alg alg = { static int sha256_s390_init(void) { - if (!crypt_s390_func_available(KIMD_SHA_256)) + if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA)) return -EOPNOTSUPP; return crypto_register_shash(&alg); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 4bf73d0dc525..32a81383b69c 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -132,7 +132,7 @@ static int __init init(void) { int ret; - if (!crypt_s390_func_available(KIMD_SHA_512)) + if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA)) return -EOPNOTSUPP; if ((ret = crypto_register_shash(&sha512_alg)) < 0) goto out; diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h index 43a5c78046db..3e20383d0921 100644 --- a/arch/s390/include/asm/cacheflush.h +++ b/arch/s390/include/asm/cacheflush.h @@ -11,5 +11,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable); int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); #endif /* _S390_CACHEFLUSH_H */ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 3c29be4836ed..b7931faaef6d 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -11,15 +11,13 @@ struct dyn_arch_ftrace { }; #ifdef CONFIG_64BIT #define MCOUNT_INSN_SIZE 12 -#define MCOUNT_OFFSET 8 #else #define MCOUNT_INSN_SIZE 20 -#define MCOUNT_OFFSET 4 #endif static inline unsigned long ftrace_call_adjust(unsigned long addr) { - return addr - MCOUNT_OFFSET; + return addr; } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h new file mode 100644 index 000000000000..95a6cf2b5b67 --- /dev/null +++ b/arch/s390/include/asm/jump_label.h @@ -0,0 +1,37 @@ +#ifndef _ASM_S390_JUMP_LABEL_H +#define _ASM_S390_JUMP_LABEL_H + +#include <linux/types.h> + +#define JUMP_LABEL_NOP_SIZE 6 + +#ifdef CONFIG_64BIT +#define ASM_PTR ".quad" +#define ASM_ALIGN ".balign 8" +#else +#define ASM_PTR ".long" +#define ASM_ALIGN ".balign 4" +#endif + +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("0: brcl 0,0\n" + ".pushsection __jump_table, \"aw\"\n" + ASM_ALIGN "\n" + ASM_PTR " 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (key) : : label); + return false; +label: + return true; +} + +typedef unsigned long jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 64230bc392fa..5ff15dacb571 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ - vdso.o vtime.o sysinfo.o nmi.o sclp.o + vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c new file mode 100644 index 000000000000..44cc06bedf77 --- /dev/null +++ b/arch/s390/kernel/jump_label.c @@ -0,0 +1,59 @@ +/* + * Jump label s390 support + * + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/stop_machine.h> +#include <linux/jump_label.h> +#include <asm/ipl.h> + +#ifdef HAVE_JUMP_LABEL + +struct insn { + u16 opcode; + s32 offset; +} __packed; + +struct insn_args { + unsigned long *target; + struct insn *insn; + ssize_t size; +}; + +static int __arch_jump_label_transform(void *data) +{ + struct insn_args *args = data; + int rc; + + rc = probe_kernel_write(args->target, args->insn, args->size); + WARN_ON_ONCE(rc < 0); + return 0; +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn_args args; + struct insn insn; + + if (type == JUMP_LABEL_ENABLE) { + /* brcl 15,offset */ + insn.opcode = 0xc0f4; + insn.offset = (entry->target - entry->code) >> 1; + } else { + /* brcl 0,0 */ + insn.opcode = 0xc004; + insn.offset = 0; + } + + args.target = (void *) entry->code; + args.insn = &insn; + args.size = JUMP_LABEL_NOP_SIZE; + + stop_machine(__arch_jump_label_transform, &args, NULL); +} + +#endif diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 63a97db83f96..63c7d9ff220d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; /* * handle bit signal external calls - * - * For the ec_schedule signal we have to do nothing. All the work - * is done automatically when we return from the interrupt. */ bits = xchg(&S390_lowcore.ext_call_fast, 0); + if (test_bit(ec_schedule, &bits)) + scheduler_ipi(); + if (test_bit(ec_call_function, &bits)) generic_smp_call_function_interrupt(); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 0607e4b14b27..f05edcc3beff 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -54,3 +54,8 @@ int set_memory_nx(unsigned long addr, int numpages) return 0; } EXPORT_SYMBOL_GPL(set_memory_nx); + +int set_memory_x(unsigned long addr, int numpages) +{ + return 0; +} diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 4b89da248d17..bc439de48cd1 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -24,7 +24,6 @@ config SUPERH select RTC_LIB select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW - select ARCH_NO_SYSDEV_OPS help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index e71a531f1e31..77ec0e7b8ddf 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -48,7 +48,6 @@ CONFIG_PREEMPT=y CONFIG_BINFMT_MISC=y CONFIG_PM=y CONFIG_PM_DEBUG=y -CONFIG_PM_VERBOSE=y CONFIG_PM_RUNTIME=y CONFIG_CPU_IDLE=y CONFIG_NET=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index dc4a2eb6a616..c41650572d79 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -83,7 +83,6 @@ CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_MISC=y CONFIG_PM=y CONFIG_PM_DEBUG=y -CONFIG_PM_VERBOSE=y CONFIG_PM_RUNTIME=y CONFIG_CPU_IDLE=y CONFIG_NET=y diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 14726eef1ce0..f0907995b4c9 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -20,6 +20,7 @@ #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/io.h> +#include <linux/prefetch.h> #include <asm/page.h> #include <asm/cacheflush.h> #include <cpu/sq.h> diff --git a/arch/sh/kernel/cpu/shmobile/pm_runtime.c b/arch/sh/kernel/cpu/shmobile/pm_runtime.c index 6dcb8166a64d..22db127afa7b 100644 --- a/arch/sh/kernel/cpu/shmobile/pm_runtime.c +++ b/arch/sh/kernel/cpu/shmobile/pm_runtime.c @@ -139,7 +139,7 @@ void platform_pm_runtime_suspend_idle(void) queue_work(pm_wq, &hwblk_work); } -int platform_pm_runtime_suspend(struct device *dev) +static int default_platform_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct pdev_archdata *ad = &pdev->archdata; @@ -147,7 +147,7 @@ int platform_pm_runtime_suspend(struct device *dev) int hwblk = ad->hwblk_id; int ret = 0; - dev_dbg(dev, "platform_pm_runtime_suspend() [%d]\n", hwblk); + dev_dbg(dev, "%s() [%d]\n", __func__, hwblk); /* ignore off-chip platform devices */ if (!hwblk) @@ -183,20 +183,20 @@ int platform_pm_runtime_suspend(struct device *dev) mutex_unlock(&ad->mutex); out: - dev_dbg(dev, "platform_pm_runtime_suspend() [%d] returns %d\n", - hwblk, ret); + dev_dbg(dev, "%s() [%d] returns %d\n", + __func__, hwblk, ret); return ret; } -int platform_pm_runtime_resume(struct device *dev) +static int default_platform_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct pdev_archdata *ad = &pdev->archdata; int hwblk = ad->hwblk_id; int ret = 0; - dev_dbg(dev, "platform_pm_runtime_resume() [%d]\n", hwblk); + dev_dbg(dev, "%s() [%d]\n", __func__, hwblk); /* ignore off-chip platform devices */ if (!hwblk) @@ -228,19 +228,19 @@ int platform_pm_runtime_resume(struct device *dev) */ mutex_unlock(&ad->mutex); out: - dev_dbg(dev, "platform_pm_runtime_resume() [%d] returns %d\n", - hwblk, ret); + dev_dbg(dev, "%s() [%d] returns %d\n", + __func__, hwblk, ret); return ret; } -int platform_pm_runtime_idle(struct device *dev) +static int default_platform_runtime_idle(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); int hwblk = pdev->archdata.hwblk_id; int ret = 0; - dev_dbg(dev, "platform_pm_runtime_idle() [%d]\n", hwblk); + dev_dbg(dev, "%s() [%d]\n", __func__, hwblk); /* ignore off-chip platform devices */ if (!hwblk) @@ -252,10 +252,19 @@ int platform_pm_runtime_idle(struct device *dev) /* suspend synchronously to disable clocks immediately */ ret = pm_runtime_suspend(dev); out: - dev_dbg(dev, "platform_pm_runtime_idle() [%d] done!\n", hwblk); + dev_dbg(dev, "%s() [%d] done!\n", __func__, hwblk); return ret; } +static struct dev_power_domain default_power_domain = { + .ops = { + .runtime_suspend = default_platform_runtime_suspend, + .runtime_resume = default_platform_runtime_resume, + .runtime_idle = default_platform_runtime_idle, + USE_PLATFORM_PM_SLEEP_OPS + }, +}; + static int platform_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { @@ -276,6 +285,7 @@ static int platform_bus_notify(struct notifier_block *nb, hwblk_disable(hwblk_info, hwblk); /* make sure driver re-inits itself once */ __set_bit(PDEV_ARCHDATA_FLAG_INIT, &pdev->archdata.flags); + dev->pwr_domain = &default_power_domain; break; /* TODO: add BUS_NOTIFY_BIND_DRIVER and increase idle count */ case BUS_NOTIFY_BOUND_DRIVER: @@ -289,6 +299,7 @@ static int platform_bus_notify(struct notifier_block *nb, __set_bit(PDEV_ARCHDATA_FLAG_INIT, &pdev->archdata.flags); break; case BUS_NOTIFY_DEL_DEVICE: + dev->pwr_domain = NULL; break; } return 0; diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 509b36b45115..6207561ea34a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/cpu.h> #include <linux/interrupt.h> +#include <linux/sched.h> #include <asm/atomic.h> #include <asm/processor.h> #include <asm/system.h> @@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg) generic_smp_call_function_interrupt(); break; case SMP_MSG_RESCHEDULE: + scheduler_ipi(); break; case SMP_MSG_FUNCTION_SINGLE: generic_smp_call_function_single_interrupt(); diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 3484c2f65aba..b51a17104b5f 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -87,7 +87,6 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(1); printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); - sysfs_printk_last_file(); print_modules(); show_regs(regs); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e560d102215a..63a027c9ada5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,6 +25,10 @@ config SPARC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL + select HAVE_GENERIC_HARDIRQS + select GENERIC_HARDIRQS_NO_DEPRECATED + select GENERIC_IRQ_SHOW + select USE_GENERIC_SMP_HELPERS if SMP config SPARC32 def_bool !64BIT @@ -43,15 +47,12 @@ config SPARC64 select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS - select USE_GENERIC_SMP_HELPERS if SMP select RTC_DRV_CMOS select RTC_DRV_BQ4802 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE select HAVE_PERF_EVENTS select PERF_USE_VMALLOC - select HAVE_GENERIC_HARDIRQS - select GENERIC_IRQ_SHOW select IRQ_PREFLOW_FASTEOI config ARCH_DEFCONFIG diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h index 31d48a0e32c7..a4c5a938b936 100644 --- a/arch/sparc/include/asm/cpudata_32.h +++ b/arch/sparc/include/asm/cpudata_32.h @@ -16,6 +16,10 @@ typedef struct { unsigned long clock_tick; unsigned int multiplier; unsigned int counter; +#ifdef CONFIG_SMP + unsigned int irq_resched_count; + unsigned int irq_call_count; +#endif int prom_node; int mid; int next; @@ -23,5 +27,6 @@ typedef struct { DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) +#define local_cpu_data() __get_cpu_var(__cpu_data) #endif /* _SPARC_CPUDATA_H */ diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index 86666f70322e..482c79e2a416 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -281,28 +281,27 @@ static inline void sun_fd_enable_dma(void) pdma_areasize = pdma_size; } -/* Our low-level entry point in arch/sparc/kernel/entry.S */ -extern int sparc_floppy_request_irq(int irq, unsigned long flags, - irq_handler_t irq_handler); +extern int sparc_floppy_request_irq(unsigned int irq, + irq_handler_t irq_handler); static int sun_fd_request_irq(void) { static int once = 0; - int error; - if(!once) { + if (!once) { once = 1; - error = sparc_floppy_request_irq(FLOPPY_IRQ, - IRQF_DISABLED, - floppy_interrupt); - return ((error == 0) ? 0 : -1); - } else return 0; + return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt); + } else { + return 0; + } } static struct linux_prom_registers fd_regs[2]; static int sun_floppy_init(void) { + struct platform_device *op; + struct device_node *dp; char state[128]; phandle tnode, fd_node; int num_regs; @@ -310,7 +309,6 @@ static int sun_floppy_init(void) use_virtual_dma = 1; - FLOPPY_IRQ = 11; /* Forget it if we aren't on a machine that could possibly * ever have a floppy drive. */ @@ -349,6 +347,26 @@ static int sun_floppy_init(void) sun_fdc = (struct sun_flpy_controller *) of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy"); + /* Look up irq in platform_device. + * We try "SUNW,fdtwo" and "fd" + */ + for_each_node_by_name(dp, "SUNW,fdtwo") { + op = of_find_device_by_node(dp); + if (op) + break; + } + if (!op) { + for_each_node_by_name(dp, "fd") { + op = of_find_device_by_node(dp); + if (op) + break; + } + } + if (!op) + goto no_sun_fdc; + + FLOPPY_IRQ = op->archdata.irqs[0]; + /* Last minute sanity check... */ if(sun_fdc->status_82072 == 0xff) { sun_fdc = NULL; diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h index a34b2994937a..f6902cf3cbe9 100644 --- a/arch/sparc/include/asm/io.h +++ b/arch/sparc/include/asm/io.h @@ -5,4 +5,17 @@ #else #include <asm/io_32.h> #endif + +/* + * Defines used for both SPARC32 and SPARC64 + */ + +/* Big endian versions of memory read/write routines */ +#define readb_be(__addr) __raw_readb(__addr) +#define readw_be(__addr) __raw_readw(__addr) +#define readl_be(__addr) __raw_readl(__addr) +#define writeb_be(__b, __addr) __raw_writeb(__b, __addr) +#define writel_be(__w, __addr) __raw_writel(__w, __addr) +#define writew_be(__l, __addr) __raw_writew(__l, __addr) + #endif diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h index eced3e3ebd30..2ae3acaeb1b3 100644 --- a/arch/sparc/include/asm/irq_32.h +++ b/arch/sparc/include/asm/irq_32.h @@ -6,7 +6,11 @@ #ifndef _SPARC_IRQ_H #define _SPARC_IRQ_H -#define NR_IRQS 16 +/* Allocated number of logical irq numbers. + * sun4d boxes (ss2000e) should be OK with ~32. + * Be on the safe side and make room for 64 + */ +#define NR_IRQS 64 #include <linux/interrupt.h> diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 427d4684e0d2..fc73a82366f8 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,17 +7,20 @@ #define JUMP_LABEL_NOP_SIZE 4 -#define JUMP_LABEL(key, label) \ - do { \ - asm goto("1:\n\t" \ - "nop\n\t" \ - "nop\n\t" \ - ".pushsection __jump_table, \"a\"\n\t"\ - ".align 4\n\t" \ - ".word 1b, %l[" #label "], %c0\n\t" \ - ".popsection \n\t" \ - : : "i" (key) : : label);\ - } while (0) +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("1:\n\t" + "nop\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (key) : : l_yes); + return false; +l_yes: + return true; +} #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h index c04f96fb753c..6bdaf1e43d2a 100644 --- a/arch/sparc/include/asm/leon.h +++ b/arch/sparc/include/asm/leon.h @@ -52,29 +52,6 @@ #define LEON_DIAGF_VALID 0x2000 #define LEON_DIAGF_VALID_SHIFT 13 -/* - * Interrupt Sources - * - * The interrupt source numbers directly map to the trap type and to - * the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask, - * and the Interrupt Pending Registers. - */ -#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR 1 -#define LEON_INTERRUPT_UART_1_RX_TX 2 -#define LEON_INTERRUPT_UART_0_RX_TX 3 -#define LEON_INTERRUPT_EXTERNAL_0 4 -#define LEON_INTERRUPT_EXTERNAL_1 5 -#define LEON_INTERRUPT_EXTERNAL_2 6 -#define LEON_INTERRUPT_EXTERNAL_3 7 -#define LEON_INTERRUPT_TIMER1 8 -#define LEON_INTERRUPT_TIMER2 9 -#define LEON_INTERRUPT_EMPTY1 10 -#define LEON_INTERRUPT_EMPTY2 11 -#define LEON_INTERRUPT_OPEN_ETH 12 -#define LEON_INTERRUPT_EMPTY4 13 -#define LEON_INTERRUPT_EMPTY5 14 -#define LEON_INTERRUPT_EMPTY6 15 - /* irq masks */ #define LEON_HARD_INT(x) (1 << (x)) /* irq 0-15 */ #define LEON_IRQMASK_R 0x0000fffe /* bit 15- 1 of lregs.irqmask */ @@ -183,7 +160,6 @@ static inline void leon_srmmu_enabletlb(void) /* macro access for leon_readnobuffer_reg() */ #define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x)) -extern void sparc_leon_eirq_register(int eirq); extern void leon_init(void); extern void leon_switch_mm(void); extern void leon_init_IRQ(void); @@ -239,8 +215,8 @@ static inline int sparc_leon3_cpuid(void) #endif /*!__ASSEMBLY__*/ #ifdef CONFIG_SMP -# define LEON3_IRQ_RESCHEDULE 13 -# define LEON3_IRQ_TICKER (leon_percpu_timer_dev[0].irq) +# define LEON3_IRQ_IPI_DEFAULT 13 +# define LEON3_IRQ_TICKER (leon3_ticker_irq) # define LEON3_IRQ_CROSS_CALL 15 #endif @@ -339,9 +315,9 @@ struct leon2_cacheregs { #include <linux/interrupt.h> struct device_node; -extern int sparc_leon_eirq_get(int eirq, int cpu); -extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id); -extern void sparc_leon_eirq_register(int eirq); +extern unsigned int leon_build_device_irq(unsigned int real_irq, + irq_flow_handler_t flow_handler, + const char *name, int do_ack); extern void leon_clear_clock_irq(void); extern void leon_load_profile_irq(int cpu, unsigned int limit); extern void leon_init_timers(irq_handler_t counter_fn); @@ -358,6 +334,7 @@ extern void leon3_getCacheRegs(struct leon3_cacheregs *regs); extern int leon_flush_needed(void); extern void leon_switch_mm(void); extern int srmmu_swprobe_trace; +extern int leon3_ticker_irq; #ifdef CONFIG_SMP extern int leon_smp_nrcpus(void); @@ -366,17 +343,19 @@ extern void leon_smp_done(void); extern void leon_boot_cpus(void); extern int leon_boot_one_cpu(int i); void leon_init_smp(void); -extern void cpu_probe(void); extern void cpu_idle(void); extern void init_IRQ(void); extern void cpu_panic(void); extern int __leon_processor_id(void); void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu); +extern irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused); -extern unsigned int real_irq_entry[], smpleon_ticker[]; +extern unsigned int real_irq_entry[]; +extern unsigned int smpleon_ipi[]; extern unsigned int patchme_maybe_smp_msg[]; extern unsigned int t_nmi[], linux_trap_ipi15_leon[]; extern unsigned int linux_trap_ipi15_sun4m[]; +extern int leon_ipi_irq; #endif /* CONFIG_SMP */ diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h index f20ef562b265..7eb5d78f5211 100644 --- a/arch/sparc/include/asm/pcic.h +++ b/arch/sparc/include/asm/pcic.h @@ -29,11 +29,17 @@ struct linux_pcic { int pcic_imdim; }; -extern int pcic_probe(void); -/* Erm... MJ redefined pcibios_present() so that it does not work early. */ +#ifdef CONFIG_PCI extern int pcic_present(void); +extern int pcic_probe(void); +extern void pci_time_init(void); extern void sun4m_pci_init_IRQ(void); - +#else +static inline int pcic_present(void) { return 0; } +static inline int pcic_probe(void) { return 0; } +static inline void pci_time_init(void) {} +static inline void sun4m_pci_init_IRQ(void) {} +#endif #endif /* Size of PCI I/O space which we relocate. */ diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 303bd4dc8292..5b31a8e89823 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -8,6 +8,8 @@ * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/const.h> + #ifndef __ASSEMBLY__ #include <asm-generic/4level-fixup.h> @@ -456,9 +458,9 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, #endif /* !(__ASSEMBLY__) */ -#define VMALLOC_START 0xfe600000 +#define VMALLOC_START _AC(0xfe600000,UL) /* XXX Alter this when I get around to fixing sun4c - Anton */ -#define VMALLOC_END 0xffc00000 +#define VMALLOC_END _AC(0xffc00000,UL) /* We provide our own get_unmapped_area to cope with VA holes for userland */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index f8dddb7045bb..b77128c80524 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -699,6 +699,9 @@ extern pmd_t swapper_low_pmd_dir[2048]; extern void paging_init(void); extern unsigned long find_ecache_flush_span(unsigned long size); +struct seq_file; +extern void mmu_info(struct seq_file *); + /* These do nothing with the way I have things setup. */ #define mmu_lockarea(vaddr, len) (vaddr) #define mmu_unlockarea(vaddr, len) do { } while(0) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 2643c62f4ac0..64718ba26434 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -11,4 +11,16 @@ # define COMMAND_LINE_SIZE 256 #endif +#ifdef __KERNEL__ + +#ifdef CONFIG_SPARC32 +/* The CPU that was used for booting + * Only sun4d + leon may have boot_cpu_id != 0 + */ +extern unsigned char boot_cpu_id; +extern unsigned char boot_cpu_id4; +#endif + +#endif /* __KERNEL__ */ + #endif /* _SPARC_SETUP_H */ diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index d82d7f4c0a79..093f10843ff2 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -50,42 +50,38 @@ void smp_callin(void); void smp_boot_cpus(void); void smp_store_cpu_info(int); +void smp_resched_interrupt(void); +void smp_call_function_single_interrupt(void); +void smp_call_function_interrupt(void); + struct seq_file; void smp_bogo(struct seq_file *); void smp_info(struct seq_file *); BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long) BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void) +BTFIXUPDEF_CALL(void, smp_ipi_resched, int); +BTFIXUPDEF_CALL(void, smp_ipi_single, int); +BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int); BTFIXUPDEF_BLACKBOX(hard_smp_processor_id) BTFIXUPDEF_BLACKBOX(load_current) #define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4) -static inline void xc0(smpfunc_t func) { smp_cross_call(func, cpu_online_map, 0, 0, 0, 0); } +static inline void xc0(smpfunc_t func) { smp_cross_call(func, *cpu_online_mask, 0, 0, 0, 0); } static inline void xc1(smpfunc_t func, unsigned long arg1) -{ smp_cross_call(func, cpu_online_map, arg1, 0, 0, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); } static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, 0, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); } static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, 0); } +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); } static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) -{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, arg4); } - -static inline int smp_call_function(void (*func)(void *info), void *info, int wait) -{ - xc1((smpfunc_t)func, (unsigned long)info); - return 0; -} +{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, arg4); } -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), - void *info, int wait) -{ - smp_cross_call((smpfunc_t)func, cpumask_of_cpu(cpuid), - (unsigned long) info, 0, 0, 0); - return 0; -} +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); static inline int cpu_logical_map(int cpu) { @@ -135,6 +131,11 @@ static inline int hard_smp_processor_id(void) __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t" "nop; nop" : "=&r" (cpuid)); + - leon + __asm__ __volatile__( "rd %asr17, %0\n\t" + "srl %0, 0x1c, %0\n\t" + "nop\n\t" : + "=&r" (cpuid)); See btfixup.h and btfixupprep.c to understand how a blackbox works. */ __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t" diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index f49e11cd4ded..20bca8950710 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -49,6 +49,10 @@ extern void cpu_play_dead(void); extern void smp_fetch_global_regs(void); +struct seq_file; +void smp_bogo(struct seq_file *); +void smp_info(struct seq_file *); + #ifdef CONFIG_HOTPLUG_CPU extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 7f9b9dba38a6..5f5b8bf3f50d 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -9,6 +9,7 @@ #ifndef __ASSEMBLY__ #include <asm/psr.h> +#include <asm/processor.h> /* for cpu_relax */ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) diff --git a/arch/sparc/include/asm/system_32.h b/arch/sparc/include/asm/system_32.h index 890036b3689a..47a7e862474e 100644 --- a/arch/sparc/include/asm/system_32.h +++ b/arch/sparc/include/asm/system_32.h @@ -15,11 +15,6 @@ #include <linux/irqflags.h> -static inline unsigned int probe_irq_mask(unsigned long val) -{ - return 0; -} - /* * Sparc (general) CPU types */ diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h index e3b65d8cf41b..3c96d3bb9f15 100644 --- a/arch/sparc/include/asm/system_64.h +++ b/arch/sparc/include/asm/system_64.h @@ -29,10 +29,6 @@ enum sparc_cpu { /* This cannot ever be a sun4c :) That's just history. */ #define ARCH_SUN4C 0 -extern const char *sparc_cpu_type; -extern const char *sparc_fpu_type; -extern const char *sparc_pmu_type; - extern char reboot_command[]; /* These are here in an effort to more fully work around Spitfire Errata diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 1c79f32734a0..8b9c556d630b 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -65,6 +65,10 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #define smt_capable() (sparc64_multi_core) #endif /* CONFIG_SMP */ -#define cpu_coregroup_mask(cpu) (&cpu_core_map[cpu]) +extern cpumask_t cpu_core_map[NR_CPUS]; +static inline const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_core_map[cpu]; +} #endif /* _ASM_SPARC64_TOPOLOGY_H */ diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 9d897b6db983..c5387ed0add8 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -404,8 +404,9 @@ #define __NR_open_by_handle_at 333 #define __NR_clock_adjtime 334 #define __NR_syncfs 335 +#define __NR_sendmmsg 336 -#define NR_syscalls 336 +#define NR_syscalls 337 #ifdef __32bit_syscall_numbers__ /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h index 5b0a06dc3bcb..a9be04b0d049 100644 --- a/arch/sparc/include/asm/winmacro.h +++ b/arch/sparc/include/asm/winmacro.h @@ -103,6 +103,7 @@ st %scratch, [%cur_reg + TI_W_SAVED]; #ifdef CONFIG_SMP +/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */ #define LOAD_CURRENT4M(dest_reg, idreg) \ rd %tbr, %idreg; \ sethi %hi(current_set), %dest_reg; \ @@ -118,6 +119,14 @@ or %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \ ld [%idreg + %dest_reg], %dest_reg; +#define LOAD_CURRENT_LEON(dest_reg, idreg) \ + rd %asr17, %idreg; \ + sethi %hi(current_set), %dest_reg; \ + srl %idreg, 0x1c, %idreg; \ + or %dest_reg, %lo(current_set), %dest_reg; \ + sll %idreg, 0x2, %idreg; \ + ld [%idreg + %dest_reg], %dest_reg; + /* Blackbox - take care with this... - check smp4m and smp4d before changing this. */ #define LOAD_CURRENT(dest_reg, idreg) \ sethi %hi(___b_load_current), %idreg; \ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 99aa4db6e9c2..9cff2709a96d 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -71,10 +71,6 @@ obj-$(CONFIG_SPARC64) += pcr.o obj-$(CONFIG_SPARC64) += nmi.o obj-$(CONFIG_SPARC64_SMP) += cpumap.o -# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation -obj-$(CONFIG_SPARC32) += devres.o -devres-y := ../../../kernel/irq/devres.o - obj-y += dma.o obj-$(CONFIG_SPARC32_PCI) += pcic.o diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 7925c54f4133..138dbbc8dc84 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -4,6 +4,7 @@ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/seq_file.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -11,7 +12,9 @@ #include <linux/threads.h> #include <asm/spitfire.h> +#include <asm/pgtable.h> #include <asm/oplib.h> +#include <asm/setup.h> #include <asm/page.h> #include <asm/head.h> #include <asm/psr.h> @@ -23,6 +26,9 @@ DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; EXPORT_PER_CPU_SYMBOL(__cpu_data); +int ncpus_probed; +unsigned int fsr_storage; + struct cpu_info { int psr_vers; const char *name; @@ -247,13 +253,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { * machine type value into consideration too. I will fix this. */ -const char *sparc_cpu_type; -const char *sparc_fpu_type; +static const char *sparc_cpu_type; +static const char *sparc_fpu_type; const char *sparc_pmu_type; -unsigned int fsr_storage; -static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) +static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) { const struct manufacturer_info *manuf; int i; @@ -313,7 +318,123 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) } #ifdef CONFIG_SPARC32 -void __cpuinit cpu_probe(void) +static int show_cpuinfo(struct seq_file *m, void *__unused) +{ + seq_printf(m, + "cpu\t\t: %s\n" + "fpu\t\t: %s\n" + "promlib\t\t: Version %d Revision %d\n" + "prom\t\t: %d.%d\n" + "type\t\t: %s\n" + "ncpus probed\t: %d\n" + "ncpus active\t: %d\n" +#ifndef CONFIG_SMP + "CPU0Bogo\t: %lu.%02lu\n" + "CPU0ClkTck\t: %ld\n" +#endif + , + sparc_cpu_type, + sparc_fpu_type , + romvec->pv_romvers, + prom_rev, + romvec->pv_printrev >> 16, + romvec->pv_printrev & 0xffff, + &cputypval[0], + ncpus_probed, + num_online_cpus() +#ifndef CONFIG_SMP + , cpu_data(0).udelay_val/(500000/HZ), + (cpu_data(0).udelay_val/(5000/HZ)) % 100, + cpu_data(0).clock_tick +#endif + ); + +#ifdef CONFIG_SMP + smp_bogo(m); +#endif + mmu_info(m); +#ifdef CONFIG_SMP + smp_info(m); +#endif + return 0; +} +#endif /* CONFIG_SPARC32 */ + +#ifdef CONFIG_SPARC64 +unsigned int dcache_parity_tl1_occurred; +unsigned int icache_parity_tl1_occurred; + + +static int show_cpuinfo(struct seq_file *m, void *__unused) +{ + seq_printf(m, + "cpu\t\t: %s\n" + "fpu\t\t: %s\n" + "pmu\t\t: %s\n" + "prom\t\t: %s\n" + "type\t\t: %s\n" + "ncpus probed\t: %d\n" + "ncpus active\t: %d\n" + "D$ parity tl1\t: %u\n" + "I$ parity tl1\t: %u\n" +#ifndef CONFIG_SMP + "Cpu0ClkTck\t: %016lx\n" +#endif + , + sparc_cpu_type, + sparc_fpu_type, + sparc_pmu_type, + prom_version, + ((tlb_type == hypervisor) ? + "sun4v" : + "sun4u"), + ncpus_probed, + num_online_cpus(), + dcache_parity_tl1_occurred, + icache_parity_tl1_occurred +#ifndef CONFIG_SMP + , cpu_data(0).clock_tick +#endif + ); +#ifdef CONFIG_SMP + smp_bogo(m); +#endif + mmu_info(m); +#ifdef CONFIG_SMP + smp_info(m); +#endif + return 0; +} +#endif /* CONFIG_SPARC64 */ + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + /* The pointer we are returning is arbitrary, + * it just has to be non-NULL and not IS_ERR + * in the success case. + */ + return *pos == 0 ? &c_start : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start =c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +#ifdef CONFIG_SPARC32 +static int __init cpu_type_probe(void) { int psr_impl, psr_vers, fpu_vers; int psr; @@ -332,8 +453,12 @@ void __cpuinit cpu_probe(void) put_psr(psr); set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers); + + return 0; } -#else +#endif /* CONFIG_SPARC32 */ + +#ifdef CONFIG_SPARC64 static void __init sun4v_cpu_probe(void) { switch (sun4v_chip_type) { @@ -374,6 +499,6 @@ static int __init cpu_type_probe(void) } return 0; } +#endif /* CONFIG_SPARC64 */ early_initcall(cpu_type_probe); -#endif diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index 8de64c8126bc..d91fd782743a 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -202,7 +202,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void) new_tree->total_nodes = n; memcpy(&new_tree->level, tmp_level, sizeof(tmp_level)); - prev_cpu = cpu = first_cpu(cpu_online_map); + prev_cpu = cpu = cpumask_first(cpu_online_mask); /* Initialize all levels in the tree with the first CPU */ for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) { @@ -381,7 +381,7 @@ static int simple_map_to_cpu(unsigned int index) } /* Impossible, since num_online_cpus() <= num_possible_cpus() */ - return first_cpu(cpu_online_map); + return cpumask_first(cpu_online_mask); } static int _map_to_cpu(unsigned int index) diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c index d2eddd6647cd..113c052c3043 100644 --- a/arch/sparc/kernel/devices.c +++ b/arch/sparc/kernel/devices.c @@ -20,7 +20,6 @@ #include <asm/system.h> #include <asm/cpudata.h> -extern void cpu_probe(void); extern void clock_stop_probe(void); /* tadpole.c */ extern void sun4c_probe_memerr_reg(void); @@ -115,7 +114,7 @@ int cpu_get_hwmid(phandle prom_node) void __init device_scan(void) { - prom_printf("Booting Linux...\n"); + printk(KERN_NOTICE "Booting Linux...\n"); #ifndef CONFIG_SMP { @@ -133,7 +132,6 @@ void __init device_scan(void) } #endif /* !CONFIG_SMP */ - cpu_probe(); { extern void auxio_probe(void); extern void auxio_power_probe(void); diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 3add4de8a1a9..dd1342c0a3be 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -497,7 +497,7 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num, tag->num_records = ncpus; i = 0; - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { ent[i].cpu = cpu; ent[i].result = DR_CPU_RES_OK; ent[i].stat = default_stat; @@ -534,7 +534,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp, int resp_len, ncpus, cpu; unsigned long flags; - ncpus = cpus_weight(*mask); + ncpus = cpumask_weight(mask); resp_len = dr_cpu_size_response(ncpus); resp = kzalloc(resp_len, GFP_KERNEL); if (!resp) @@ -547,7 +547,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp, mdesc_populate_present_mask(mask); mdesc_fill_in_cpu_data(mask); - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { int err; printk(KERN_INFO "ds-%llu: Starting cpu %d...\n", @@ -593,7 +593,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp, int resp_len, ncpus, cpu; unsigned long flags; - ncpus = cpus_weight(*mask); + ncpus = cpumask_weight(mask); resp_len = dr_cpu_size_response(ncpus); resp = kzalloc(resp_len, GFP_KERNEL); if (!resp) @@ -603,7 +603,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp, resp_len, ncpus, mask, DR_CPU_STAT_UNCONFIGURED); - for_each_cpu_mask(cpu, *mask) { + for_each_cpu(cpu, mask) { int err; printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n", @@ -649,13 +649,13 @@ static void __cpuinit dr_cpu_data(struct ds_info *dp, purge_dups(cpu_list, tag->num_records); - cpus_clear(mask); + cpumask_clear(&mask); for (i = 0; i < tag->num_records; i++) { if (cpu_list[i] == CPU_SENTINEL) continue; if (cpu_list[i] < nr_cpu_ids) - cpu_set(cpu_list[i], mask); + cpumask_set_cpu(cpu_list[i], &mask); } if (tag->type == DR_CPU_CONFIGURE) diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 6da784a5612b..8341963f4c84 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -269,19 +269,22 @@ smp4m_ticker: /* Here is where we check for possible SMP IPI passed to us * on some level other than 15 which is the NMI and only used * for cross calls. That has a separate entry point below. + * + * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*. */ maybe_smp4m_msg: GET_PROCESSOR4M_ID(o3) sethi %hi(sun4m_irq_percpu), %l5 sll %o3, 2, %o3 or %l5, %lo(sun4m_irq_percpu), %o5 - sethi %hi(0x40000000), %o2 + sethi %hi(0x70000000), %o2 ! Check all soft-IRQs ld [%o5 + %o3], %o1 ld [%o1 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending andcc %o3, %o2, %g0 be,a smp4m_ticker cmp %l7, 14 - st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x40000000 + /* Soft-IRQ IPI */ + st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x70000000 WRITE_PAUSE ld [%o1 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending WRITE_PAUSE @@ -290,9 +293,27 @@ maybe_smp4m_msg: WRITE_PAUSE wr %l4, PSR_ET, %psr WRITE_PAUSE - call smp_reschedule_irq + sll %o2, 28, %o2 ! shift for simpler checks below +maybe_smp4m_msg_check_single: + andcc %o2, 0x1, %g0 + beq,a maybe_smp4m_msg_check_mask + andcc %o2, 0x2, %g0 + call smp_call_function_single_interrupt nop - + andcc %o2, 0x2, %g0 +maybe_smp4m_msg_check_mask: + beq,a maybe_smp4m_msg_check_resched + andcc %o2, 0x4, %g0 + call smp_call_function_interrupt + nop + andcc %o2, 0x4, %g0 +maybe_smp4m_msg_check_resched: + /* rescheduling is done in RESTORE_ALL regardless, but incr stats */ + beq,a maybe_smp4m_msg_out + nop + call smp_resched_interrupt + nop +maybe_smp4m_msg_out: RESTORE_ALL .align 4 @@ -401,18 +422,18 @@ linux_trap_ipi15_sun4d: 1: b,a 1b #ifdef CONFIG_SPARC_LEON - - .globl smpleon_ticker - /* SMP per-cpu ticker interrupts are handled specially. */ -smpleon_ticker: + .globl smpleon_ipi + .extern leon_ipi_interrupt + /* SMP per-cpu IPI interrupts are handled specially. */ +smpleon_ipi: SAVE_ALL or %l0, PSR_PIL, %g2 wr %g2, 0x0, %psr WRITE_PAUSE wr %g2, PSR_ET, %psr WRITE_PAUSE - call leon_percpu_timer_interrupt - add %sp, STACKFRAME_SZ, %o0 + call leonsmp_ipi_interrupt + add %sp, STACKFRAME_SZ, %o1 ! pt_regs wr %l0, PSR_ET, %psr WRITE_PAUSE RESTORE_ALL diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index 59423491cef8..587785759838 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -810,31 +810,25 @@ found_version: got_prop: #ifdef CONFIG_SPARC_LEON /* no cpu-type check is needed, it is a SPARC-LEON */ -#ifdef CONFIG_SMP - ba leon_smp_init - nop - .global leon_smp_init -leon_smp_init: - sethi %hi(boot_cpu_id), %g1 ! master always 0 - stb %g0, [%g1 + %lo(boot_cpu_id)] - sethi %hi(boot_cpu_id4), %g1 ! master always 0 - stb %g0, [%g1 + %lo(boot_cpu_id4)] + sethi %hi(boot_cpu_id), %g2 ! boot-cpu index - rd %asr17,%g1 - srl %g1,28,%g1 +#ifdef CONFIG_SMP + ldub [%g2 + %lo(boot_cpu_id)], %g1 + cmp %g1, 0xff ! unset means first CPU + bne leon_smp_cpu_startup ! continue only with master + nop +#endif + /* Get CPU-ID from most significant 4-bit of ASR17 */ + rd %asr17, %g1 + srl %g1, 28, %g1 - cmp %g0,%g1 - beq sun4c_continue_boot !continue with master - nop + /* Update boot_cpu_id only on boot cpu */ + stub %g1, [%g2 + %lo(boot_cpu_id)] - ba leon_smp_cpu_startup - nop -#else ba sun4c_continue_boot nop #endif -#endif set cputypval, %o2 ldub [%o2 + 0x4], %l1 @@ -893,9 +887,6 @@ sun4d_init: sta %g4, [%g0] ASI_M_VIKING_TMP1 sethi %hi(boot_cpu_id), %g5 stb %g4, [%g5 + %lo(boot_cpu_id)] - sll %g4, 2, %g4 - sethi %hi(boot_cpu_id4), %g5 - stb %g4, [%g5 + %lo(boot_cpu_id4)] #endif /* Fall through to sun4m_init */ @@ -1024,14 +1015,28 @@ sun4c_continue_boot: bl 1b add %o0, 0x1, %o0 + /* If boot_cpu_id has not been setup by machine specific + * init-code above we default it to zero. + */ + sethi %hi(boot_cpu_id), %g2 + ldub [%g2 + %lo(boot_cpu_id)], %g3 + cmp %g3, 0xff + bne 1f + nop + mov %g0, %g3 + stub %g3, [%g2 + %lo(boot_cpu_id)] + +1: /* boot_cpu_id set. calculate boot_cpu_id4 = boot_cpu_id*4 */ + sll %g3, 2, %g3 + sethi %hi(boot_cpu_id4), %g2 + stub %g3, [%g2 + %lo(boot_cpu_id4)] + /* Initialize the uwinmask value for init task just in case. * But first make current_set[boot_cpu_id] point to something useful. */ set init_thread_union, %g6 set current_set, %g2 #ifdef CONFIG_SMP - sethi %hi(boot_cpu_id4), %g3 - ldub [%g3 + %lo(boot_cpu_id4)], %g3 st %g6, [%g2] add %g2, %g3, %g2 #endif diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index c6ce9a6a4790..1c9c80a1a86a 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -50,10 +50,15 @@ #include <asm/io-unit.h> #include <asm/leon.h> +/* This function must make sure that caches and memory are coherent after DMA + * On LEON systems without cache snooping it flushes the entire D-CACHE. + */ #ifndef CONFIG_SPARC_LEON -#define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */ +static inline void dma_make_coherent(unsigned long pa, unsigned long len) +{ +} #else -static inline void mmu_inval_dma_area(void *va, unsigned long len) +static inline void dma_make_coherent(unsigned long pa, unsigned long len) { if (!sparc_leon3_snooping_enabled()) leon_flush_dcache_all(); @@ -284,7 +289,6 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total); goto err_nova; } - mmu_inval_dma_area((void *)va, len_total); // XXX The mmu_map_dma_area does this for us below, see comments. // sparc_mapiorange(0, virt_to_phys(va), res->start, len_total); @@ -336,7 +340,6 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p, release_resource(res); kfree(res); - /* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */ pgv = virt_to_page(p); mmu_unmap_dma_area(dev, ba, n); @@ -463,7 +466,6 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len, printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total); goto err_nova; } - mmu_inval_dma_area(va, len_total); sparc_mapiorange(0, virt_to_phys(va), res->start, len_total); *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */ @@ -489,7 +491,6 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p, dma_addr_t ba) { struct resource *res; - void *pgp; if ((res = _sparc_find_resource(&_sparc_dvma, (unsigned long)p)) == NULL) { @@ -509,14 +510,12 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p, return; } - pgp = phys_to_virt(ba); /* bus_to_virt actually */ - mmu_inval_dma_area(pgp, n); + dma_make_coherent(ba, n); sparc_unmapiorange((unsigned long)p, n); release_resource(res); kfree(res); - - free_pages((unsigned long)pgp, get_order(n)); + free_pages((unsigned long)phys_to_virt(ba), get_order(n)); } /* @@ -535,7 +534,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { if (dir != PCI_DMA_TODEVICE) - mmu_inval_dma_area(phys_to_virt(ba), PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } /* Map a set of buffers described by scatterlist in streaming @@ -562,8 +561,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl, /* IIep is write-through, not flushing. */ for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - sg->dma_address = virt_to_phys(sg_virt(sg)); + sg->dma_address = sg_phys(sg); sg->dma_length = sg->length; } return nents; @@ -582,9 +580,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl, if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } @@ -603,8 +599,7 @@ static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir) { if (dir != PCI_DMA_TODEVICE) { - mmu_inval_dma_area(phys_to_virt(ba), - PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } } @@ -612,8 +607,7 @@ static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba, size_t size, enum dma_data_direction dir) { if (dir != PCI_DMA_TODEVICE) { - mmu_inval_dma_area(phys_to_virt(ba), - PAGE_ALIGN(size)); + dma_make_coherent(ba, PAGE_ALIGN(size)); } } @@ -631,9 +625,7 @@ static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } @@ -646,9 +638,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist * if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg_page(sg)) == NULL); - mmu_inval_dma_area(page_address(sg_page(sg)), - PAGE_ALIGN(sg->length)); + dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); } } } diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h index 008453b798ec..100b9c204e78 100644 --- a/arch/sparc/kernel/irq.h +++ b/arch/sparc/kernel/irq.h @@ -2,6 +2,23 @@ #include <asm/btfixup.h> +struct irq_bucket { + struct irq_bucket *next; + unsigned int real_irq; + unsigned int irq; + unsigned int pil; +}; + +#define SUN4D_MAX_BOARD 10 +#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5) + +/* Map between the irq identifier used in hw to the + * irq_bucket. The map is sufficient large to hold + * the sun4d hw identifiers. + */ +extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ]; + + /* sun4m specific type definitions */ /* This maps direct to CPU specific interrupt registers */ @@ -35,6 +52,10 @@ struct sparc_irq_config { }; extern struct sparc_irq_config sparc_irq_config; +unsigned int irq_alloc(unsigned int real_irq, unsigned int pil); +void irq_link(unsigned int irq); +void irq_unlink(unsigned int irq); +void handler_irq(unsigned int pil, struct pt_regs *regs); /* Dave Redman (djhr@tadpole.co.uk) * changed these to function pointers.. it saves cycles and will allow @@ -44,33 +65,9 @@ extern struct sparc_irq_config sparc_irq_config; * Changed these to btfixup entities... It saves cycles :) */ -BTFIXUPDEF_CALL(void, disable_irq, unsigned int) -BTFIXUPDEF_CALL(void, enable_irq, unsigned int) -BTFIXUPDEF_CALL(void, disable_pil_irq, unsigned int) -BTFIXUPDEF_CALL(void, enable_pil_irq, unsigned int) BTFIXUPDEF_CALL(void, clear_clock_irq, void) BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int) -static inline void __disable_irq(unsigned int irq) -{ - BTFIXUP_CALL(disable_irq)(irq); -} - -static inline void __enable_irq(unsigned int irq) -{ - BTFIXUP_CALL(enable_irq)(irq); -} - -static inline void disable_pil_irq(unsigned int irq) -{ - BTFIXUP_CALL(disable_pil_irq)(irq); -} - -static inline void enable_pil_irq(unsigned int irq) -{ - BTFIXUP_CALL(enable_pil_irq)(irq); -} - static inline void clear_clock_irq(void) { BTFIXUP_CALL(clear_clock_irq)(); @@ -89,4 +86,10 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int) #define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level) #define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level) #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu) + +/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */ +#define SUN4D_IPI_IRQ 14 + +extern void sun4d_ipi_interrupt(void); + #endif diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c index 7c93df4099cb..9b89d842913c 100644 --- a/arch/sparc/kernel/irq_32.c +++ b/arch/sparc/kernel/irq_32.c @@ -15,6 +15,7 @@ #include <linux/seq_file.h> #include <asm/cacheflush.h> +#include <asm/cpudata.h> #include <asm/pcic.h> #include <asm/leon.h> @@ -101,284 +102,173 @@ EXPORT_SYMBOL(arch_local_irq_restore); * directed CPU interrupts using the existing enable/disable irq code * with tweaks. * + * Sun4d complicates things even further. IRQ numbers are arbitrary + * 32-bit values in that case. Since this is similar to sparc64, + * we adopt a virtual IRQ numbering scheme as is done there. + * Virutal interrupt numbers are allocated by build_irq(). So NR_IRQS + * just becomes a limit of how many interrupt sources we can handle in + * a single system. Even fully loaded SS2000 machines top off at + * about 32 interrupt sources or so, therefore a NR_IRQS value of 64 + * is more than enough. + * + * We keep a map of per-PIL enable interrupts. These get wired + * up via the irq_chip->startup() method which gets invoked by + * the generic IRQ layer during request_irq(). */ +/* Table of allocated irqs. Unused entries has irq == 0 */ +static struct irq_bucket irq_table[NR_IRQS]; +/* Protect access to irq_table */ +static DEFINE_SPINLOCK(irq_table_lock); -/* - * Dave Redman (djhr@tadpole.co.uk) - * - * There used to be extern calls and hard coded values here.. very sucky! - * instead, because some of the devices attach very early, I do something - * equally sucky but at least we'll never try to free statically allocated - * space or call kmalloc before kmalloc_init :(. - * - * In fact it's the timer10 that attaches first.. then timer14 - * then kmalloc_init is called.. then the tty interrupts attach. - * hmmm.... - * - */ -#define MAX_STATIC_ALLOC 4 -struct irqaction static_irqaction[MAX_STATIC_ALLOC]; -int static_irq_count; - -static struct { - struct irqaction *action; - int flags; -} sparc_irq[NR_IRQS]; -#define SPARC_IRQ_INPROGRESS 1 - -/* Used to protect the IRQ action lists */ -DEFINE_SPINLOCK(irq_action_lock); +/* Map between the irq identifier used in hw to the irq_bucket. */ +struct irq_bucket *irq_map[SUN4D_MAX_IRQ]; +/* Protect access to irq_map */ +static DEFINE_SPINLOCK(irq_map_lock); -int show_interrupts(struct seq_file *p, void *v) +/* Allocate a new irq from the irq_table */ +unsigned int irq_alloc(unsigned int real_irq, unsigned int pil) { - int i = *(loff_t *)v; - struct irqaction *action; unsigned long flags; -#ifdef CONFIG_SMP - int j; -#endif + unsigned int i; + + spin_lock_irqsave(&irq_table_lock, flags); + for (i = 1; i < NR_IRQS; i++) { + if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil) + goto found; + } - if (sparc_cpu_model == sun4d) - return show_sun4d_interrupts(p, v); + for (i = 1; i < NR_IRQS; i++) { + if (!irq_table[i].irq) + break; + } - spin_lock_irqsave(&irq_action_lock, flags); if (i < NR_IRQS) { - action = sparc_irq[i].action; - if (!action) - goto out_unlock; - seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) { - seq_printf(p, "%10u ", - kstat_cpu(j).irqs[i]); - } -#endif - seq_printf(p, " %c %s", - (action->flags & IRQF_DISABLED) ? '+' : ' ', - action->name); - for (action = action->next; action; action = action->next) { - seq_printf(p, ",%s %s", - (action->flags & IRQF_DISABLED) ? " +" : "", - action->name); - } - seq_putc(p, '\n'); + irq_table[i].real_irq = real_irq; + irq_table[i].irq = i; + irq_table[i].pil = pil; + } else { + printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); + i = 0; } -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); - return 0; +found: + spin_unlock_irqrestore(&irq_table_lock, flags); + + return i; } -void free_irq(unsigned int irq, void *dev_id) +/* Based on a single pil handler_irq may need to call several + * interrupt handlers. Use irq_map as entry to irq_table, + * and let each entry in irq_table point to the next entry. + */ +void irq_link(unsigned int irq) { - struct irqaction *action; - struct irqaction **actionp; + struct irq_bucket *p; unsigned long flags; - unsigned int cpu_irq; - - if (sparc_cpu_model == sun4d) { - sun4d_free_irq(irq, dev_id); - return; - } - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { /* 14 irq levels on the sparc */ - printk(KERN_ERR "Trying to free bogus IRQ %d\n", irq); - return; - } + unsigned int pil; - spin_lock_irqsave(&irq_action_lock, flags); + BUG_ON(irq >= NR_IRQS); - actionp = &sparc_irq[cpu_irq].action; - action = *actionp; + spin_lock_irqsave(&irq_map_lock, flags); - if (!action->handler) { - printk(KERN_ERR "Trying to free free IRQ%d\n", irq); - goto out_unlock; - } - if (dev_id) { - for (; action; action = action->next) { - if (action->dev_id == dev_id) - break; - actionp = &action->next; - } - if (!action) { - printk(KERN_ERR "Trying to free free shared IRQ%d\n", - irq); - goto out_unlock; - } - } else if (action->flags & IRQF_SHARED) { - printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n", - irq); - goto out_unlock; - } - if (action->flags & SA_STATIC_ALLOC) { - /* - * This interrupt is marked as specially allocated - * so it is a bad idea to free it. - */ - printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n", - irq, action->name); - goto out_unlock; - } - - *actionp = action->next; + p = &irq_table[irq]; + pil = p->pil; + BUG_ON(pil > SUN4D_MAX_IRQ); + p->next = irq_map[pil]; + irq_map[pil] = p; - spin_unlock_irqrestore(&irq_action_lock, flags); + spin_unlock_irqrestore(&irq_map_lock, flags); +} - synchronize_irq(irq); +void irq_unlink(unsigned int irq) +{ + struct irq_bucket *p, **pnext; + unsigned long flags; - spin_lock_irqsave(&irq_action_lock, flags); + BUG_ON(irq >= NR_IRQS); - kfree(action); + spin_lock_irqsave(&irq_map_lock, flags); - if (!sparc_irq[cpu_irq].action) - __disable_irq(irq); + p = &irq_table[irq]; + BUG_ON(p->pil > SUN4D_MAX_IRQ); + pnext = &irq_map[p->pil]; + while (*pnext != p) + pnext = &(*pnext)->next; + *pnext = p->next; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); + spin_unlock_irqrestore(&irq_map_lock, flags); } -EXPORT_SYMBOL(free_irq); - -/* - * This is called when we want to synchronize with - * interrupts. We may for example tell a device to - * stop sending interrupts: but to make sure there - * are no interrupts that are executing on another - * CPU we need to call this function. - */ -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - unsigned int cpu_irq; - cpu_irq = irq & (NR_IRQS - 1); - while (sparc_irq[cpu_irq].flags & SPARC_IRQ_INPROGRESS) - cpu_relax(); -} -EXPORT_SYMBOL(synchronize_irq); -#endif /* SMP */ -void unexpected_irq(int irq, void *dev_id, struct pt_regs *regs) +/* /proc/interrupts printing */ +int arch_show_interrupts(struct seq_file *p, int prec) { - int i; - struct irqaction *action; - unsigned int cpu_irq; + int j; - cpu_irq = irq & (NR_IRQS - 1); - action = sparc_irq[cpu_irq].action; - - printk(KERN_ERR "IO device interrupt, irq = %d\n", irq); - printk(KERN_ERR "PC = %08lx NPC = %08lx FP=%08lx\n", regs->pc, - regs->npc, regs->u_regs[14]); - if (action) { - printk(KERN_ERR "Expecting: "); - for (i = 0; i < 16; i++) - if (action->handler) - printk(KERN_CONT "[%s:%d:0x%x] ", action->name, - i, (unsigned int)action->handler); - } - printk(KERN_ERR "AIEEE\n"); - panic("bogus interrupt received"); +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).irq_resched_count); + seq_printf(p, " IPI rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).irq_call_count); + seq_printf(p, " IPI function call interrupts\n"); +#endif + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).counter); + seq_printf(p, " Non-maskable interrupts\n"); + return 0; } -void handler_irq(int pil, struct pt_regs *regs) +void handler_irq(unsigned int pil, struct pt_regs *regs) { struct pt_regs *old_regs; - struct irqaction *action; - int cpu = smp_processor_id(); + struct irq_bucket *p; + BUG_ON(pil > 15); old_regs = set_irq_regs(regs); irq_enter(); - disable_pil_irq(pil); -#ifdef CONFIG_SMP - /* Only rotate on lower priority IRQs (scsi, ethernet, etc.). */ - if ((sparc_cpu_model==sun4m) && (pil < 10)) - smp4m_irq_rotate(cpu); -#endif - action = sparc_irq[pil].action; - sparc_irq[pil].flags |= SPARC_IRQ_INPROGRESS; - kstat_cpu(cpu).irqs[pil]++; - do { - if (!action || !action->handler) - unexpected_irq(pil, NULL, regs); - action->handler(pil, action->dev_id); - action = action->next; - } while (action); - sparc_irq[pil].flags &= ~SPARC_IRQ_INPROGRESS; - enable_pil_irq(pil); + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next = p->next; + + generic_handle_irq(p->irq); + p = next; + } irq_exit(); set_irq_regs(old_regs); } #if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE) +static unsigned int floppy_irq; -/* - * Fast IRQs on the Sparc can only have one routine attached to them, - * thus no sharing possible. - */ -static int request_fast_irq(unsigned int irq, - void (*handler)(void), - unsigned long irqflags, const char *devname) +int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler) { - struct irqaction *action; - unsigned long flags; unsigned int cpu_irq; - int ret; + int err; + #if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON struct tt_entry *trap_table; #endif - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { - ret = -EINVAL; - goto out; - } - if (!handler) { - ret = -EINVAL; - goto out; - } - spin_lock_irqsave(&irq_action_lock, flags); + err = request_irq(irq, irq_handler, 0, "floppy", NULL); + if (err) + return -1; - action = sparc_irq[cpu_irq].action; - if (action) { - if (action->flags & IRQF_SHARED) - panic("Trying to register fast irq when already shared.\n"); - if (irqflags & IRQF_SHARED) - panic("Trying to register fast irq as shared.\n"); + /* Save for later use in floppy interrupt handler */ + floppy_irq = irq; - /* Anyway, someone already owns it so cannot be made fast. */ - printk(KERN_ERR "request_fast_irq: Trying to register yet already owned.\n"); - ret = -EBUSY; - goto out_unlock; - } - - /* - * If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Fast IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } + cpu_irq = (irq & (NR_IRQS - 1)); /* Dork with trap table if we get this far. */ #define INSTANTIATE(table) \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \ - SPARC_BRANCH((unsigned long) handler, \ + SPARC_BRANCH((unsigned long) floppy_hardint, \ (unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \ table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP; @@ -399,22 +289,9 @@ static int request_fast_irq(unsigned int irq, * writing we have no CPU-neutral interface to fine-grained flushes. */ flush_cache_all(); - - action->flags = irqflags; - action->name = devname; - action->dev_id = NULL; - action->next = NULL; - - sparc_irq[cpu_irq].action = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; + return 0; } +EXPORT_SYMBOL(sparc_floppy_request_irq); /* * These variables are used to access state from the assembler @@ -440,154 +317,23 @@ EXPORT_SYMBOL(pdma_base); unsigned long pdma_areasize; EXPORT_SYMBOL(pdma_areasize); -static irq_handler_t floppy_irq_handler; - +/* Use the generic irq support to call floppy_interrupt + * which was setup using request_irq() in sparc_floppy_request_irq(). + * We only have one floppy interrupt so we do not need to check + * for additional handlers being wired up by irq_link() + */ void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) { struct pt_regs *old_regs; - int cpu = smp_processor_id(); old_regs = set_irq_regs(regs); - disable_pil_irq(irq); irq_enter(); - kstat_cpu(cpu).irqs[irq]++; - floppy_irq_handler(irq, dev_id); + generic_handle_irq(floppy_irq); irq_exit(); - enable_pil_irq(irq); set_irq_regs(old_regs); - /* - * XXX Eek, it's totally changed with preempt_count() and such - * if (softirq_pending(cpu)) - * do_softirq(); - */ -} - -int sparc_floppy_request_irq(int irq, unsigned long flags, - irq_handler_t irq_handler) -{ - floppy_irq_handler = irq_handler; - return request_fast_irq(irq, floppy_hardint, flags, "floppy"); } -EXPORT_SYMBOL(sparc_floppy_request_irq); - #endif -int request_irq(unsigned int irq, - irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - struct irqaction *action, **actionp; - unsigned long flags; - unsigned int cpu_irq; - int ret; - - if (sparc_cpu_model == sun4d) - return sun4d_request_irq(irq, handler, irqflags, devname, dev_id); - - cpu_irq = irq & (NR_IRQS - 1); - if (cpu_irq > 14) { - ret = -EINVAL; - goto out; - } - if (!handler) { - ret = -EINVAL; - goto out; - } - - spin_lock_irqsave(&irq_action_lock, flags); - - actionp = &sparc_irq[cpu_irq].action; - action = *actionp; - if (action) { - if (!(action->flags & IRQF_SHARED) || !(irqflags & IRQF_SHARED)) { - ret = -EBUSY; - goto out_unlock; - } - if ((action->flags & IRQF_DISABLED) != (irqflags & IRQF_DISABLED)) { - printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n", - irq); - ret = -EBUSY; - goto out_unlock; - } - for ( ; action; action = *actionp) - actionp = &action->next; - } - - /* If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } - - action->handler = handler; - action->flags = irqflags; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - *actionp = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; -} -EXPORT_SYMBOL(request_irq); - -void disable_irq_nosync(unsigned int irq) -{ - __disable_irq(irq); -} -EXPORT_SYMBOL(disable_irq_nosync); - -void disable_irq(unsigned int irq) -{ - __disable_irq(irq); -} -EXPORT_SYMBOL(disable_irq); - -void enable_irq(unsigned int irq) -{ - __enable_irq(irq); -} -EXPORT_SYMBOL(enable_irq); - -/* - * We really don't need these at all on the Sparc. We only have - * stubs here because they are exported to modules. - */ -unsigned long probe_irq_on(void) -{ - return 0; -} -EXPORT_SYMBOL(probe_irq_on); - -int probe_irq_off(unsigned long mask) -{ - return 0; -} -EXPORT_SYMBOL(probe_irq_off); - -static unsigned int build_device_irq(struct platform_device *op, - unsigned int real_irq) -{ - return real_irq; -} - /* djhr * This could probably be made indirect too and assigned in the CPU * bits of the code. That would be much nicer I think and would also @@ -598,8 +344,6 @@ static unsigned int build_device_irq(struct platform_device *op, void __init init_IRQ(void) { - sparc_irq_config.build_device_irq = build_device_irq; - switch (sparc_cpu_model) { case sun4c: case sun4: @@ -607,14 +351,11 @@ void __init init_IRQ(void) break; case sun4m: -#ifdef CONFIG_PCI pcic_probe(); - if (pcic_present()) { + if (pcic_present()) sun4m_pci_init_IRQ(); - break; - } -#endif - sun4m_init_IRQ(); + else + sun4m_init_IRQ(); break; case sun4d: @@ -632,9 +373,3 @@ void __init init_IRQ(void) btfixup(); } -#ifdef CONFIG_PROC_FS -void init_irq_proc(void) -{ - /* For now, nothing... */ -} -#endif /* CONFIG_PROC_FS */ diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index b1d275ce3435..4e78862d12fd 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -224,13 +224,13 @@ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) int cpuid; cpumask_copy(&mask, affinity); - if (cpus_equal(mask, cpu_online_map)) { + if (cpumask_equal(&mask, cpu_online_mask)) { cpuid = map_to_cpu(irq); } else { cpumask_t tmp; - cpus_and(tmp, cpu_online_map, mask); - cpuid = cpus_empty(tmp) ? map_to_cpu(irq) : first_cpu(tmp); + cpumask_and(&tmp, cpu_online_mask, &mask); + cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp); } return cpuid; diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 24ad449886be..6f6544cfa0ef 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -6,11 +6,9 @@ #include <asm/traps.h> /* cpu.c */ -extern const char *sparc_cpu_type; extern const char *sparc_pmu_type; -extern const char *sparc_fpu_type; - extern unsigned int fsr_storage; +extern int ncpus_probed; #ifdef CONFIG_SPARC32 /* cpu.c */ @@ -37,6 +35,7 @@ extern void sun4c_init_IRQ(void); extern unsigned int lvl14_resolution; extern void sun4m_init_IRQ(void); +extern void sun4m_unmask_profile_irq(void); extern void sun4m_clear_profile_irq(int cpu); /* sun4d_irq.c */ diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 2969f777fa11..2f538ac2e139 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -19,53 +19,70 @@ #include <asm/leon_amba.h> #include <asm/traps.h> #include <asm/cacheflush.h> +#include <asm/smp.h> +#include <asm/setup.h> #include "prom.h" #include "irq.h" struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */ struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */ -struct amba_apb_device leon_percpu_timer_dev[16]; int leondebug_irq_disable; int leon_debug_irqout; static int dummy_master_l10_counter; unsigned long amba_system_id; +static DEFINE_SPINLOCK(leon_irq_lock); unsigned long leon3_gptimer_irq; /* interrupt controller irq number */ unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */ +int leon3_ticker_irq; /* Timer ticker IRQ */ unsigned int sparc_leon_eirq; -#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0])) +#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu]) +#define LEON_IACK (&leon3_irqctrl_regs->iclear) +#define LEON_DO_ACK_HW 1 -/* Return the IRQ of the pending IRQ on the extended IRQ controller */ -int sparc_leon_eirq_get(int eirq, int cpu) +/* Return the last ACKed IRQ by the Extended IRQ controller. It has already + * been (automatically) ACKed when the CPU takes the trap. + */ +static inline unsigned int leon_eirq_get(int cpu) { return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f; } -irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id) +/* Handle one or multiple IRQs from the extended interrupt controller */ +static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc) { - printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n"); - return IRQ_HANDLED; + unsigned int eirq; + int cpu = sparc_leon3_cpuid(); + + eirq = leon_eirq_get(cpu); + if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */ + generic_handle_irq(irq_map[eirq]->irq); } /* The extended IRQ controller has been found, this function registers it */ -void sparc_leon_eirq_register(int eirq) +void leon_eirq_setup(unsigned int eirq) { - int irq; + unsigned long mask, oldmask; + unsigned int veirq; - /* Register a "BAD" handler for this interrupt, it should never happen */ - irq = request_irq(eirq, sparc_leon_eirq_isr, - (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL); - - if (irq) { - printk(KERN_ERR - "sparc_leon_eirq_register: unable to attach IRQ%d\n", - eirq); - } else { - sparc_leon_eirq = eirq; + if (eirq < 1 || eirq > 0xf) { + printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq); + return; } + veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0); + + /* + * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ + * controller have a mask-bit of their own, so this is safe. + */ + irq_link(veirq); + mask = 1 << eirq; + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask)); + sparc_leon_eirq = eirq; } static inline unsigned long get_irqmask(unsigned int irq) @@ -83,35 +100,151 @@ static inline unsigned long get_irqmask(unsigned int irq) return mask; } -static void leon_enable_irq(unsigned int irq_nr) +#ifdef CONFIG_SMP +static int irq_choose_cpu(const struct cpumask *affinity) { - unsigned long mask, flags; - mask = get_irqmask(irq_nr); - local_irq_save(flags); - LEON3_BYPASS_STORE_PA(LEON_IMASK, - (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask))); - local_irq_restore(flags); + cpumask_t mask; + + cpus_and(mask, cpu_online_map, *affinity); + if (cpus_equal(mask, cpu_online_map) || cpus_empty(mask)) + return boot_cpu_id; + else + return first_cpu(mask); } +#else +#define irq_choose_cpu(affinity) boot_cpu_id +#endif -static void leon_disable_irq(unsigned int irq_nr) +static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest, + bool force) { - unsigned long mask, flags; - mask = get_irqmask(irq_nr); - local_irq_save(flags); - LEON3_BYPASS_STORE_PA(LEON_IMASK, - (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask))); - local_irq_restore(flags); + unsigned long mask, oldmask, flags; + int oldcpu, newcpu; + + mask = (unsigned long)data->chip_data; + oldcpu = irq_choose_cpu(data->affinity); + newcpu = irq_choose_cpu(dest); + + if (oldcpu == newcpu) + goto out; + + /* unmask on old CPU first before enabling on the selected CPU */ + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask)); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +out: + return IRQ_SET_MASK_OK; +} + +static void leon_unmask_irq(struct irq_data *data) +{ + unsigned long mask, oldmask, flags; + int cpu; + + mask = (unsigned long)data->chip_data; + cpu = irq_choose_cpu(data->affinity); + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +} + +static void leon_mask_irq(struct irq_data *data) +{ + unsigned long mask, oldmask, flags; + int cpu; + + mask = (unsigned long)data->chip_data; + cpu = irq_choose_cpu(data->affinity); + spin_lock_irqsave(&leon_irq_lock, flags); + oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu)); + LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); +} + +static unsigned int leon_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + leon_unmask_irq(data); + return 0; +} +static void leon_shutdown_irq(struct irq_data *data) +{ + leon_mask_irq(data); + irq_unlink(data->irq); +} + +/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */ +static void leon_eoi_irq(struct irq_data *data) +{ + unsigned long mask = (unsigned long)data->chip_data; + + if (mask & LEON_DO_ACK_HW) + LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW); +} + +static struct irq_chip leon_irq = { + .name = "leon", + .irq_startup = leon_startup_irq, + .irq_shutdown = leon_shutdown_irq, + .irq_mask = leon_mask_irq, + .irq_unmask = leon_unmask_irq, + .irq_eoi = leon_eoi_irq, + .irq_set_affinity = leon_set_affinity, +}; + +/* + * Build a LEON IRQ for the edge triggered LEON IRQ controller: + * Edge (normal) IRQ - handle_simple_irq, ack=DONT-CARE, never ack + * Level IRQ (PCI|Level-GPIO) - handle_fasteoi_irq, ack=1, ack after ISR + * Per-CPU Edge - handle_percpu_irq, ack=0 + */ +unsigned int leon_build_device_irq(unsigned int real_irq, + irq_flow_handler_t flow_handler, + const char *name, int do_ack) +{ + unsigned int irq; + unsigned long mask; + + irq = 0; + mask = get_irqmask(real_irq); + if (mask == 0) + goto out; + + irq = irq_alloc(real_irq, real_irq); + if (irq == 0) + goto out; + + if (do_ack) + mask |= LEON_DO_ACK_HW; + + irq_set_chip_and_handler_name(irq, &leon_irq, + flow_handler, name); + irq_set_chip_data(irq, (void *)mask); + +out: + return irq; +} + +static unsigned int _leon_build_device_irq(struct platform_device *op, + unsigned int real_irq) +{ + return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0); } void __init leon_init_timers(irq_handler_t counter_fn) { - int irq; + int irq, eirq; struct device_node *rootnp, *np, *nnp; struct property *pp; int len; - int cpu, icsel; + int icsel; int ampopts; + int err; leondebug_irq_disable = 0; leon_debug_irqout = 0; @@ -173,98 +306,85 @@ void __init leon_init_timers(irq_handler_t counter_fn) leon3_gptimer_irq = *(unsigned int *)pp->value; } while (0); - if (leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq) { - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx].rld, - (((1000000 / HZ) - 1))); - LEON3_BYPASS_STORE_PA( + if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq)) + goto bad; + + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld, + (((1000000 / HZ) - 1))); + LEON3_BYPASS_STORE_PA( &leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0); #ifdef CONFIG_SMP - leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs; - leon_percpu_timer_dev[0].irq = leon3_gptimer_irq + 1 + - leon3_gptimer_idx; - - if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) & - (1<<LEON3_GPTIMER_SEPIRQ))) { - prom_printf("irq timer not configured with separate irqs\n"); - BUG(); - } + leon3_ticker_irq = leon3_gptimer_irq + 1 + leon3_gptimer_idx; - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, 0); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld, - (((1000000/HZ) - 1))); - LEON3_BYPASS_STORE_PA( - &leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 0); -# endif - - /* - * The IRQ controller may (if implemented) consist of multiple - * IRQ controllers, each mapped on a 4Kb boundary. - * Each CPU may be routed to different IRQCTRLs, however - * we assume that all CPUs (in SMP system) is routed to the - * same IRQ Controller, and for non-SMP only one IRQCTRL is - * accessed anyway. - * In AMP systems, Linux must run on CPU0 for the time being. - */ - cpu = sparc_leon3_cpuid(); - icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[cpu/8]); - icsel = (icsel >> ((7 - (cpu&0x7)) * 4)) & 0xf; - leon3_irqctrl_regs += icsel; - } else { - goto bad; + if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) & + (1<<LEON3_GPTIMER_SEPIRQ))) { + printk(KERN_ERR "timer not configured with separate irqs\n"); + BUG(); } - irq = request_irq(leon3_gptimer_irq+leon3_gptimer_idx, - counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, + 0); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld, + (((1000000/HZ) - 1))); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, + 0); +#endif - if (irq) { - printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n", - LEON_INTERRUPT_TIMER1); + /* + * The IRQ controller may (if implemented) consist of multiple + * IRQ controllers, each mapped on a 4Kb boundary. + * Each CPU may be routed to different IRQCTRLs, however + * we assume that all CPUs (in SMP system) is routed to the + * same IRQ Controller, and for non-SMP only one IRQCTRL is + * accessed anyway. + * In AMP systems, Linux must run on CPU0 for the time being. + */ + icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]); + icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf; + leon3_irqctrl_regs += icsel; + + /* Mask all IRQs on boot-cpu IRQ controller */ + LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0); + + /* Probe extended IRQ controller */ + eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus) + >> 16) & 0xf; + if (eirq != 0) + leon_eirq_setup(eirq); + + irq = _leon_build_device_irq(NULL, leon3_gptimer_irq+leon3_gptimer_idx); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); + if (err) { + printk(KERN_ERR "unable to attach timer IRQ%d\n", irq); prom_halt(); } -# ifdef CONFIG_SMP - { - unsigned long flags; - struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)]; - - /* For SMP we use the level 14 ticker, however the bootup code - * has copied the firmwares level 14 vector into boot cpu's - * trap table, we must fix this now or we get squashed. - */ - local_irq_save(flags); - - patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */ - - /* Adjust so that we jump directly to smpleon_ticker */ - trap_table->inst_three += smpleon_ticker - real_irq_entry; + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, + LEON3_GPTIMER_EN | + LEON3_GPTIMER_RL | + LEON3_GPTIMER_LD | + LEON3_GPTIMER_IRQEN); - local_flush_cache_all(); - local_irq_restore(flags); +#ifdef CONFIG_SMP + /* Install per-cpu IRQ handler for broadcasted ticker */ + irq = leon_build_device_irq(leon3_ticker_irq, handle_percpu_irq, + "per-cpu", 0); + err = request_irq(irq, leon_percpu_timer_interrupt, + IRQF_PERCPU | IRQF_TIMER, "ticker", + NULL); + if (err) { + printk(KERN_ERR "unable to attach ticker IRQ%d\n", irq); + prom_halt(); } -# endif - - if (leon3_gptimer_regs) { - LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, - LEON3_GPTIMER_EN | - LEON3_GPTIMER_RL | - LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN); -#ifdef CONFIG_SMP - LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, - LEON3_GPTIMER_EN | - LEON3_GPTIMER_RL | - LEON3_GPTIMER_LD | - LEON3_GPTIMER_IRQEN); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, + LEON3_GPTIMER_EN | + LEON3_GPTIMER_RL | + LEON3_GPTIMER_LD | + LEON3_GPTIMER_IRQEN); #endif - - } return; bad: printk(KERN_ERR "No Timer/irqctrl found\n"); @@ -281,9 +401,6 @@ void leon_load_profile_irq(int cpu, unsigned int limit) BUG(); } - - - void __init leon_trans_init(struct device_node *dp) { if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) { @@ -337,22 +454,18 @@ void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu) { unsigned long mask, flags, *addr; mask = get_irqmask(irq_nr); - local_irq_save(flags); - addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]); - LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask))); - local_irq_restore(flags); + spin_lock_irqsave(&leon_irq_lock, flags); + addr = (unsigned long *)LEON_IMASK(cpu); + LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask)); + spin_unlock_irqrestore(&leon_irq_lock, flags); } #endif void __init leon_init_IRQ(void) { - sparc_irq_config.init_timers = leon_init_timers; - - BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM); + sparc_irq_config.init_timers = leon_init_timers; + sparc_irq_config.build_device_irq = _leon_build_device_irq; BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 8f5de4aa3c0a..fe8fb44c609c 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -14,6 +14,7 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <linux/of.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/mm.h> @@ -29,6 +30,7 @@ #include <asm/ptrace.h> #include <asm/atomic.h> #include <asm/irq_regs.h> +#include <asm/traps.h> #include <asm/delay.h> #include <asm/irq.h> @@ -50,9 +52,12 @@ extern ctxd_t *srmmu_ctx_table_phys; static int smp_processors_ready; extern volatile unsigned long cpu_callin_map[NR_CPUS]; -extern unsigned char boot_cpu_id; extern cpumask_t smp_commenced_mask; void __init leon_configure_cache_smp(void); +static void leon_ipi_init(void); + +/* IRQ number of LEON IPIs */ +int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT; static inline unsigned long do_swap(volatile unsigned long *ptr, unsigned long val) @@ -94,8 +99,6 @@ void __cpuinit leon_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - /* Fix idle thread fields. */ __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(¤t_set[cpuid]) : "memory" /* paranoid */); @@ -104,11 +107,11 @@ void __cpuinit leon_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) mb(); local_irq_enable(); - cpu_set(cpuid, cpu_online_map); + set_cpu_online(cpuid, true); } /* @@ -179,13 +182,16 @@ void __init leon_boot_cpus(void) int nrcpu = leon_smp_nrcpus(); int me = smp_processor_id(); + /* Setup IPI */ + leon_ipi_init(); + printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me, (unsigned int)nrcpu, (unsigned int)NR_CPUS, (unsigned int)&(leon3_irqctrl_regs->mpstatus)); leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me); leon_enable_irq_cpu(LEON3_IRQ_TICKER, me); - leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me); + leon_enable_irq_cpu(leon_ipi_irq, me); leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER); @@ -220,6 +226,10 @@ int __cpuinit leon_boot_one_cpu(int i) (unsigned int)&leon3_irqctrl_regs->mpstatus); local_flush_cache_all(); + /* Make sure all IRQs are of from the start for this new CPU */ + LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0); + + /* Wake one CPU */ LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i); /* wheee... it's going... */ @@ -236,7 +246,7 @@ int __cpuinit leon_boot_one_cpu(int i) } else { leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i); leon_enable_irq_cpu(LEON3_IRQ_TICKER, i); - leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i); + leon_enable_irq_cpu(leon_ipi_irq, i); } local_flush_cache_all(); @@ -262,21 +272,21 @@ void __init leon_smp_done(void) local_flush_cache_all(); /* Free unneeded trap tables */ - if (!cpu_isset(1, cpu_present_map)) { + if (!cpu_present(1)) { ClearPageReserved(virt_to_page(&trapbase_cpu1)); init_page_count(virt_to_page(&trapbase_cpu1)); free_page((unsigned long)&trapbase_cpu1); totalram_pages++; num_physpages++; } - if (!cpu_isset(2, cpu_present_map)) { + if (!cpu_present(2)) { ClearPageReserved(virt_to_page(&trapbase_cpu2)); init_page_count(virt_to_page(&trapbase_cpu2)); free_page((unsigned long)&trapbase_cpu2); totalram_pages++; num_physpages++; } - if (!cpu_isset(3, cpu_present_map)) { + if (!cpu_present(3)) { ClearPageReserved(virt_to_page(&trapbase_cpu3)); init_page_count(virt_to_page(&trapbase_cpu3)); free_page((unsigned long)&trapbase_cpu3); @@ -292,6 +302,99 @@ void leon_irq_rotate(int cpu) { } +struct leon_ipi_work { + int single; + int msk; + int resched; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work); + +/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ + * is used for all three types of IPIs. + */ +static void __init leon_ipi_init(void) +{ + int cpu, len; + struct leon_ipi_work *work; + struct property *pp; + struct device_node *rootnp; + struct tt_entry *trap_table; + unsigned long flags; + + /* Find IPI IRQ or stick with default value */ + rootnp = of_find_node_by_path("/ambapp0"); + if (rootnp) { + pp = of_find_property(rootnp, "ipi_num", &len); + if (pp && (*(int *)pp->value)) + leon_ipi_irq = *(int *)pp->value; + } + printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq); + + /* Adjust so that we jump directly to smpleon_ipi */ + local_irq_save(flags); + trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)]; + trap_table->inst_three += smpleon_ipi - real_irq_entry; + local_flush_cache_all(); + local_irq_restore(flags); + + for_each_possible_cpu(cpu) { + work = &per_cpu(leon_ipi_work, cpu); + work->single = work->msk = work->resched = 0; + } +} + +static void leon_ipi_single(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->single = 1; + + /* Generate IRQ on the CPU */ + set_cpu_int(cpu, leon_ipi_irq); +} + +static void leon_ipi_mask_one(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->msk = 1; + + /* Generate IRQ on the CPU */ + set_cpu_int(cpu, leon_ipi_irq); +} + +static void leon_ipi_resched(int cpu) +{ + struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu); + + /* Mark work */ + work->resched = 1; + + /* Generate IRQ on the CPU (any IRQ will cause resched) */ + set_cpu_int(cpu, leon_ipi_irq); +} + +void leonsmp_ipi_interrupt(void) +{ + struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work); + + if (work->single) { + work->single = 0; + smp_call_function_single_interrupt(); + } + if (work->msk) { + work->msk = 0; + smp_call_function_interrupt(); + } + if (work->resched) { + work->resched = 0; + smp_resched_interrupt(); + } +} + static struct smp_funcall { smpfunc_t func; unsigned long arg1; @@ -337,10 +440,10 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i <= high; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; set_cpu_int(i, LEON3_IRQ_CROSS_CALL); @@ -354,7 +457,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) @@ -363,7 +466,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) @@ -386,27 +489,23 @@ void leon_cross_call_irq(void) ccall_info.processors_out[i] = 1; } -void leon_percpu_timer_interrupt(struct pt_regs *regs) +irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused) { - struct pt_regs *old_regs; int cpu = smp_processor_id(); - old_regs = set_irq_regs(regs); - leon_clear_profile_irq(cpu); profile_tick(CPU_PROFILING); if (!--prof_counter(cpu)) { - int user = user_mode(regs); + int user = user_mode(get_irq_regs()); - irq_enter(); update_process_times(user); - irq_exit(); prof_counter(cpu) = prof_multiplier(cpu); } - set_irq_regs(old_regs); + + return IRQ_HANDLED; } static void __init smp_setup_percpu_timer(void) @@ -449,6 +548,9 @@ void __init leon_init_smp(void) BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM); } #endif /* CONFIG_SPARC_LEON */ diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 56db06432ce9..42f28c7420e1 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -768,7 +768,7 @@ static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handl cpuid, NR_CPUS); continue; } - if (!cpu_isset(cpuid, *mask)) + if (!cpumask_test_cpu(cpuid, mask)) continue; #endif diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 5c149689bb20..3bb2eace58cf 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -622,8 +622,9 @@ static unsigned int __init build_one_device_irq(struct platform_device *op, out: nid = of_node_to_nid(dp); if (nid != -1) { - cpumask_t numa_mask = *cpumask_of_node(nid); + cpumask_t numa_mask; + cpumask_copy(&numa_mask, cpumask_of_node(nid)); irq_set_affinity(irq, &numa_mask); } diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 30982e9ab626..580651af73f2 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -284,8 +284,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, nid = pbm->numa_node; if (nid != -1) { - cpumask_t numa_mask = *cpumask_of_node(nid); + cpumask_t numa_mask; + cpumask_copy(&numa_mask, cpumask_of_node(nid)); irq_set_affinity(irq, &numa_mask); } err = request_irq(irq, sparc64_msiq_interrupt, 0, diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c index 948068a083fc..d1840dbdaa2f 100644 --- a/arch/sparc/kernel/pci_sabre.c +++ b/arch/sparc/kernel/pci_sabre.c @@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm, sabre_scan_bus(pbm, &op->dev); } +static const struct of_device_id sabre_match[]; static int __devinit sabre_probe(struct platform_device *op) { + const struct of_device_id *match; const struct linux_prom64_registers *pr_regs; struct device_node *dp = op->dev.of_node; struct pci_pbm_info *pbm; @@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op) const u32 *vdma; u64 clear_irq; - hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL); + match = of_match_device(sabre_match, &op->dev); + hummingbird_p = match && (match->data != NULL); if (!hummingbird_p) { struct device_node *cpu_dp; diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index fecfcb2063c8..283fbc329a43 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c @@ -1458,11 +1458,15 @@ out_err: return err; } +static const struct of_device_id schizo_match[]; static int __devinit schizo_probe(struct platform_device *op) { - if (!op->dev.of_match) + const struct of_device_id *match; + + match = of_match_device(schizo_match, &op->dev); + if (!match) return -EINVAL; - return __schizo_init(op, (unsigned long) op->dev.of_match->data); + return __schizo_init(op, (unsigned long)match->data); } /* The ordering of this table is very important. Some Tomatillo diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 2cdc131b50ac..948601a066ff 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -164,6 +164,9 @@ void __iomem *pcic_regs; volatile int pcic_speculative; volatile int pcic_trapped; +/* forward */ +unsigned int pcic_build_device_irq(struct platform_device *op, + unsigned int real_irq); #define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3)) @@ -523,6 +526,7 @@ static void pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) { struct pcic_ca2irq *p; + unsigned int real_irq; int i, ivec; char namebuf[64]; @@ -551,26 +555,25 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) i = p->pin; if (i >= 0 && i < 4) { ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO); - dev->irq = ivec >> (i << 2) & 0xF; + real_irq = ivec >> (i << 2) & 0xF; } else if (i >= 4 && i < 8) { ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI); - dev->irq = ivec >> ((i-4) << 2) & 0xF; + real_irq = ivec >> ((i-4) << 2) & 0xF; } else { /* Corrupted map */ printk("PCIC: BAD PIN %d\n", i); for (;;) {} } /* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */ - /* - * dev->irq=0 means PROM did not bother to program the upper + /* real_irq means PROM did not bother to program the upper * half of PCIC. This happens on JS-E with PROM 3.11, for instance. */ - if (dev->irq == 0 || p->force) { + if (real_irq == 0 || p->force) { if (p->irq == 0 || p->irq >= 15) { /* Corrupted map */ printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {} } printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n", p->irq, p->pin, dev->bus->number, dev->devfn); - dev->irq = p->irq; + real_irq = p->irq; i = p->pin; if (i >= 4) { @@ -584,7 +587,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) ivec |= p->irq << (i << 2); writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO); } - } + } + dev->irq = pcic_build_device_irq(NULL, real_irq); } /* @@ -729,6 +733,7 @@ void __init pci_time_init(void) struct linux_pcic *pcic = &pcic0; unsigned long v; int timer_irq, irq; + int err; do_arch_gettimeoffset = pci_gettimeoffset; @@ -740,9 +745,10 @@ void __init pci_time_init(void) timer_irq = PCI_COUNTER_IRQ_SYS(v); writel (PCI_COUNTER_IRQ_SET(timer_irq, 0), pcic->pcic_regs+PCI_COUNTER_IRQ); - irq = request_irq(timer_irq, pcic_timer_handler, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); - if (irq) { + irq = pcic_build_device_irq(NULL, timer_irq); + err = request_irq(irq, pcic_timer_handler, + IRQF_TIMER, "timer", NULL); + if (err) { prom_printf("time_init: unable to attach IRQ%d\n", timer_irq); prom_halt(); } @@ -803,50 +809,73 @@ static inline unsigned long get_irqmask(int irq_nr) return 1 << irq_nr; } -static void pcic_disable_irq(unsigned int irq_nr) +static void pcic_mask_irq(struct irq_data *data) { unsigned long mask, flags; - mask = get_irqmask(irq_nr); + mask = (unsigned long)data->chip_data; local_irq_save(flags); writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET); local_irq_restore(flags); } -static void pcic_enable_irq(unsigned int irq_nr) +static void pcic_unmask_irq(struct irq_data *data) { unsigned long mask, flags; - mask = get_irqmask(irq_nr); + mask = (unsigned long)data->chip_data; local_irq_save(flags); writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR); local_irq_restore(flags); } -static void pcic_load_profile_irq(int cpu, unsigned int limit) +static unsigned int pcic_startup_irq(struct irq_data *data) { - printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__); + irq_link(data->irq); + pcic_unmask_irq(data); + return 0; } -/* We assume the caller has disabled local interrupts when these are called, - * or else very bizarre behavior will result. - */ -static void pcic_disable_pil_irq(unsigned int pil) +static struct irq_chip pcic_irq = { + .name = "pcic", + .irq_startup = pcic_startup_irq, + .irq_mask = pcic_mask_irq, + .irq_unmask = pcic_unmask_irq, +}; + +unsigned int pcic_build_device_irq(struct platform_device *op, + unsigned int real_irq) { - writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET); + unsigned int irq; + unsigned long mask; + + irq = 0; + mask = get_irqmask(real_irq); + if (mask == 0) + goto out; + + irq = irq_alloc(real_irq, real_irq); + if (irq == 0) + goto out; + + irq_set_chip_and_handler_name(irq, &pcic_irq, + handle_level_irq, "PCIC"); + irq_set_chip_data(irq, (void *)mask); + +out: + return irq; } -static void pcic_enable_pil_irq(unsigned int pil) + +static void pcic_load_profile_irq(int cpu, unsigned int limit) { - writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR); + printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__); } void __init sun4m_pci_init_IRQ(void) { - BTFIXUPSET_CALL(enable_irq, pcic_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, pcic_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, pcic_enable_pil_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, pcic_disable_pil_irq, BTFIXUPCALL_NORM); + sparc_irq_config.build_device_irq = pcic_build_device_irq; + BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index ee8426ede7c7..2cb0e1c001e2 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -26,6 +26,7 @@ #include <asm/nmi.h> #include <asm/pcr.h> +#include "kernel.h" #include "kstack.h" /* Sparc64 chips have two performance counters, 32-bits each, with diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 17529298c50a..c8cc461ff75f 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -128,8 +128,16 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while(1) { - while (!need_resched()) - cpu_relax(); +#ifdef CONFIG_SPARC_LEON + if (pm_idle) { + while (!need_resched()) + (*pm_idle)(); + } else +#endif + { + while (!need_resched()) + cpu_relax(); + } preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index 05fb25330583..5ce3d15a99b0 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -326,7 +326,6 @@ void __init of_console_init(void) of_console_options = NULL; } - prom_printf(msg, of_console_path); printk(msg, of_console_path); } diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 7b8b76c9557f..3609bdee9ed2 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -103,16 +103,20 @@ static unsigned int boot_flags __initdata = 0; /* Exported for mm/init.c:paging_init. */ unsigned long cmdline_memory_size __initdata = 0; +/* which CPU booted us (0xff = not set) */ +unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */ +unsigned char boot_cpu_id4; /* boot_cpu_id << 2 */ + static void prom_console_write(struct console *con, const char *s, unsigned n) { prom_write(s, n); } -static struct console prom_debug_console = { - .name = "debug", +static struct console prom_early_console = { + .name = "earlyprom", .write = prom_console_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; @@ -133,8 +137,7 @@ static void __init process_switch(char c) prom_halt(); break; case 'p': - /* Use PROM debug console. */ - register_console(&prom_debug_console); + /* Just ignore, this behavior is now the default. */ break; default: printk("Unknown boot switch (-%c)\n", c); @@ -215,6 +218,10 @@ void __init setup_arch(char **cmdline_p) strcpy(boot_command_line, *cmdline_p); parse_early_param(); + boot_flags_init(*cmdline_p); + + register_console(&prom_early_console); + /* Set sparc_cpu_model */ sparc_cpu_model = sun_unknown; if (!strcmp(&cputypval[0], "sun4 ")) @@ -265,7 +272,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif - boot_flags_init(*cmdline_p); idprom_init(); if (ARCH_SUN4C) @@ -311,75 +317,6 @@ void __init setup_arch(char **cmdline_p) smp_setup_cpu_possible_map(); } -static int ncpus_probed; - -static int show_cpuinfo(struct seq_file *m, void *__unused) -{ - seq_printf(m, - "cpu\t\t: %s\n" - "fpu\t\t: %s\n" - "promlib\t\t: Version %d Revision %d\n" - "prom\t\t: %d.%d\n" - "type\t\t: %s\n" - "ncpus probed\t: %d\n" - "ncpus active\t: %d\n" -#ifndef CONFIG_SMP - "CPU0Bogo\t: %lu.%02lu\n" - "CPU0ClkTck\t: %ld\n" -#endif - , - sparc_cpu_type, - sparc_fpu_type , - romvec->pv_romvers, - prom_rev, - romvec->pv_printrev >> 16, - romvec->pv_printrev & 0xffff, - &cputypval[0], - ncpus_probed, - num_online_cpus() -#ifndef CONFIG_SMP - , cpu_data(0).udelay_val/(500000/HZ), - (cpu_data(0).udelay_val/(5000/HZ)) % 100, - cpu_data(0).clock_tick -#endif - ); - -#ifdef CONFIG_SMP - smp_bogo(m); -#endif - mmu_info(m); -#ifdef CONFIG_SMP - smp_info(m); -#endif - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - /* The pointer we are returning is arbitrary, - * it just has to be non-NULL and not IS_ERR - * in the success case. - */ - return *pos == 0 ? &c_start : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - extern int stop_a_enabled; void sun_do_break(void) diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 29bafe051bb1..f3b6850cc8db 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -339,84 +339,6 @@ void __init setup_arch(char **cmdline_p) paging_init(); } -/* BUFFER is PAGE_SIZE bytes long. */ - -extern void smp_info(struct seq_file *); -extern void smp_bogo(struct seq_file *); -extern void mmu_info(struct seq_file *); - -unsigned int dcache_parity_tl1_occurred; -unsigned int icache_parity_tl1_occurred; - -int ncpus_probed; - -static int show_cpuinfo(struct seq_file *m, void *__unused) -{ - seq_printf(m, - "cpu\t\t: %s\n" - "fpu\t\t: %s\n" - "pmu\t\t: %s\n" - "prom\t\t: %s\n" - "type\t\t: %s\n" - "ncpus probed\t: %d\n" - "ncpus active\t: %d\n" - "D$ parity tl1\t: %u\n" - "I$ parity tl1\t: %u\n" -#ifndef CONFIG_SMP - "Cpu0ClkTck\t: %016lx\n" -#endif - , - sparc_cpu_type, - sparc_fpu_type, - sparc_pmu_type, - prom_version, - ((tlb_type == hypervisor) ? - "sun4v" : - "sun4u"), - ncpus_probed, - num_online_cpus(), - dcache_parity_tl1_occurred, - icache_parity_tl1_occurred -#ifndef CONFIG_SMP - , cpu_data(0).clock_tick -#endif - ); -#ifdef CONFIG_SMP - smp_bogo(m); -#endif - mmu_info(m); -#ifdef CONFIG_SMP - smp_info(m); -#endif - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - /* The pointer we are returning is arbitrary, - * it just has to be non-NULL and not IS_ERR - * in the success case. - */ - return *pos == 0 ? &c_start : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - extern int stop_a_enabled; void sun_do_break(void) diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 850a1360c0d6..d5b3958be0b4 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -37,8 +37,6 @@ #include "irq.h" volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; -unsigned char boot_cpu_id = 0; -unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ cpumask_t smp_commenced_mask = CPU_MASK_NONE; @@ -129,13 +127,58 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 }; void smp_send_reschedule(int cpu) { - /* See sparc64 */ + /* + * CPU model dependent way of implementing IPI generation targeting + * a single CPU. The trap handler needs only to do trap entry/return + * to call schedule. + */ + BTFIXUP_CALL(smp_ipi_resched)(cpu); } void smp_send_stop(void) { } +void arch_send_call_function_single_ipi(int cpu) +{ + /* trigger one IPI single call on one CPU */ + BTFIXUP_CALL(smp_ipi_single)(cpu); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + int cpu; + + /* trigger IPI mask call on each CPU */ + for_each_cpu(cpu, mask) + BTFIXUP_CALL(smp_ipi_mask_one)(cpu); +} + +void smp_resched_interrupt(void) +{ + irq_enter(); + scheduler_ipi(); + local_cpu_data().irq_resched_count++; + irq_exit(); + /* re-schedule routine called by interrupt return code. */ +} + +void smp_call_function_single_interrupt(void) +{ + irq_enter(); + generic_smp_call_function_single_interrupt(); + local_cpu_data().irq_call_count++; + irq_exit(); +} + +void smp_call_function_interrupt(void) +{ + irq_enter(); + generic_smp_call_function_interrupt(); + local_cpu_data().irq_call_count++; + irq_exit(); +} + void smp_flush_cache_all(void) { xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all)); @@ -151,9 +194,10 @@ void smp_flush_tlb_all(void) void smp_flush_cache_mm(struct mm_struct *mm) { if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm); local_flush_cache_mm(mm); } @@ -162,9 +206,10 @@ void smp_flush_cache_mm(struct mm_struct *mm) void smp_flush_tlb_mm(struct mm_struct *mm) { if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) { xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm); if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm) cpumask_copy(mm_cpumask(mm), @@ -180,9 +225,10 @@ void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end); local_flush_cache_range(vma, start, end); } @@ -194,9 +240,10 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end); local_flush_tlb_range(vma, start, end); } @@ -207,9 +254,10 @@ void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page) struct mm_struct *mm = vma->vm_mm; if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page); local_flush_cache_page(vma, page); } @@ -220,19 +268,15 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) struct mm_struct *mm = vma->vm_mm; if(mm->context != NO_CONTEXT) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page); local_flush_tlb_page(vma, page); } } -void smp_reschedule_irq(void) -{ - set_need_resched(); -} - void smp_flush_page_to_ram(unsigned long page) { /* Current theory is that those who call this are the one's @@ -249,9 +293,10 @@ void smp_flush_page_to_ram(unsigned long page) void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) { - cpumask_t cpu_mask = *mm_cpumask(mm); - cpu_clear(smp_processor_id(), cpu_mask); - if (!cpus_empty(cpu_mask)) + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr); local_flush_sig_insns(mm, insn_addr); } @@ -405,7 +450,7 @@ int __cpuinit __cpu_up(unsigned int cpu) }; if (!ret) { - cpu_set(cpu, smp_commenced_mask); + cpumask_set_cpu(cpu, &smp_commenced_mask); while (!cpu_online(cpu)) mb(); } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 3e94a8c23238..99cb17251bb5 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -121,11 +121,11 @@ void __cpuinit smp_callin(void) /* inform the notifiers about the new cpu */ notify_cpu_starting(cpuid); - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) rmb(); ipi_call_lock_irq(); - cpu_set(cpuid, cpu_online_map); + set_cpu_online(cpuid, true); ipi_call_unlock_irq(); /* idle thread is expected to have preempt disabled */ @@ -785,7 +785,7 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask /* Send cross call to all processors mentioned in MASK_P * except self. Really, there are only two cases currently, - * "&cpu_online_map" and "&mm->cpu_vm_mask". + * "cpu_online_mask" and "mm_cpumask(mm)". */ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask) { @@ -797,7 +797,7 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d /* Send cross call to all processors except self. */ static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) { - smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map); + smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask); } extern unsigned long xcall_sync_tick; @@ -805,7 +805,7 @@ extern unsigned long xcall_sync_tick; static void smp_start_sync_tick_client(int cpu) { xcall_deliver((u64) &xcall_sync_tick, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } extern unsigned long xcall_call_function; @@ -820,7 +820,7 @@ extern unsigned long xcall_call_function_single; void arch_send_call_function_single_ipi(int cpu) { xcall_deliver((u64) &xcall_call_function_single, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) @@ -918,7 +918,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) } if (data0) { xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, &cpumask_of_cpu(cpu)); + (u64) pg_addr, cpumask_of(cpu)); #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall); #endif @@ -954,7 +954,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) } if (data0) { xcall_deliver(data0, __pa(pg_addr), - (u64) pg_addr, &cpu_online_map); + (u64) pg_addr, cpu_online_mask); #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall); #endif @@ -1197,32 +1197,32 @@ void __devinit smp_fill_in_sib_core_maps(void) for_each_present_cpu(i) { unsigned int j; - cpus_clear(cpu_core_map[i]); + cpumask_clear(&cpu_core_map[i]); if (cpu_data(i).core_id == 0) { - cpu_set(i, cpu_core_map[i]); + cpumask_set_cpu(i, &cpu_core_map[i]); continue; } for_each_present_cpu(j) { if (cpu_data(i).core_id == cpu_data(j).core_id) - cpu_set(j, cpu_core_map[i]); + cpumask_set_cpu(j, &cpu_core_map[i]); } } for_each_present_cpu(i) { unsigned int j; - cpus_clear(per_cpu(cpu_sibling_map, i)); + cpumask_clear(&per_cpu(cpu_sibling_map, i)); if (cpu_data(i).proc_id == -1) { - cpu_set(i, per_cpu(cpu_sibling_map, i)); + cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i)); continue; } for_each_present_cpu(j) { if (cpu_data(i).proc_id == cpu_data(j).proc_id) - cpu_set(j, per_cpu(cpu_sibling_map, i)); + cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i)); } } } @@ -1232,10 +1232,10 @@ int __cpuinit __cpu_up(unsigned int cpu) int ret = smp_boot_one_cpu(cpu); if (!ret) { - cpu_set(cpu, smp_commenced_mask); - while (!cpu_isset(cpu, cpu_online_map)) + cpumask_set_cpu(cpu, &smp_commenced_mask); + while (!cpu_online(cpu)) mb(); - if (!cpu_isset(cpu, cpu_online_map)) { + if (!cpu_online(cpu)) { ret = -ENODEV; } else { /* On SUN4V, writes to %tick and %stick are @@ -1269,7 +1269,7 @@ void cpu_play_dead(void) tb->nonresum_mondo_pa, 0); } - cpu_clear(cpu, smp_commenced_mask); + cpumask_clear_cpu(cpu, &smp_commenced_mask); membar_safe("#Sync"); local_irq_disable(); @@ -1290,13 +1290,13 @@ int __cpu_disable(void) cpuinfo_sparc *c; int i; - for_each_cpu_mask(i, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[i]); - cpus_clear(cpu_core_map[cpu]); + for_each_cpu(i, &cpu_core_map[cpu]) + cpumask_clear_cpu(cpu, &cpu_core_map[i]); + cpumask_clear(&cpu_core_map[cpu]); - for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu)) - cpu_clear(cpu, per_cpu(cpu_sibling_map, i)); - cpus_clear(per_cpu(cpu_sibling_map, cpu)); + for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu)) + cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i)); + cpumask_clear(&per_cpu(cpu_sibling_map, cpu)); c = &cpu_data(cpu); @@ -1313,7 +1313,7 @@ int __cpu_disable(void) local_irq_disable(); ipi_call_lock(); - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); ipi_call_unlock(); cpu_map_rebuild(); @@ -1327,11 +1327,11 @@ void __cpu_die(unsigned int cpu) for (i = 0; i < 100; i++) { smp_rmb(); - if (!cpu_isset(cpu, smp_commenced_mask)) + if (!cpumask_test_cpu(cpu, &smp_commenced_mask)) break; msleep(100); } - if (cpu_isset(cpu, smp_commenced_mask)) { + if (cpumask_test_cpu(cpu, &smp_commenced_mask)) { printk(KERN_ERR "CPU %u didn't die...\n", cpu); } else { #if defined(CONFIG_SUN_LDOMS) @@ -1341,7 +1341,7 @@ void __cpu_die(unsigned int cpu) do { hv_err = sun4v_cpu_stop(cpu); if (hv_err == HV_EOK) { - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); break; } } while (--limit > 0); @@ -1362,12 +1362,13 @@ void __init smp_cpus_done(unsigned int max_cpus) void smp_send_reschedule(int cpu) { xcall_deliver((u64) &xcall_receive_signal, 0, 0, - &cpumask_of_cpu(cpu)); + cpumask_of(cpu)); } void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + scheduler_ipi(); } /* This is a nop because we capture all other cpus diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c index 90eea38ad66f..f6bf25a2ff80 100644 --- a/arch/sparc/kernel/sun4c_irq.c +++ b/arch/sparc/kernel/sun4c_irq.c @@ -65,62 +65,94 @@ */ unsigned char __iomem *interrupt_enable; -static void sun4c_disable_irq(unsigned int irq_nr) +static void sun4c_mask_irq(struct irq_data *data) { - unsigned long flags; - unsigned char current_mask, new_mask; - - local_irq_save(flags); - irq_nr &= (NR_IRQS - 1); - current_mask = sbus_readb(interrupt_enable); - switch (irq_nr) { - case 1: - new_mask = ((current_mask) & (~(SUN4C_INT_E1))); - break; - case 8: - new_mask = ((current_mask) & (~(SUN4C_INT_E8))); - break; - case 10: - new_mask = ((current_mask) & (~(SUN4C_INT_E10))); - break; - case 14: - new_mask = ((current_mask) & (~(SUN4C_INT_E14))); - break; - default: + unsigned long mask = (unsigned long)data->chip_data; + + if (mask) { + unsigned long flags; + + local_irq_save(flags); + mask = sbus_readb(interrupt_enable) & ~mask; + sbus_writeb(mask, interrupt_enable); local_irq_restore(flags); - return; } - sbus_writeb(new_mask, interrupt_enable); - local_irq_restore(flags); } -static void sun4c_enable_irq(unsigned int irq_nr) +static void sun4c_unmask_irq(struct irq_data *data) { - unsigned long flags; - unsigned char current_mask, new_mask; - - local_irq_save(flags); - irq_nr &= (NR_IRQS - 1); - current_mask = sbus_readb(interrupt_enable); - switch (irq_nr) { - case 1: - new_mask = ((current_mask) | SUN4C_INT_E1); - break; - case 8: - new_mask = ((current_mask) | SUN4C_INT_E8); - break; - case 10: - new_mask = ((current_mask) | SUN4C_INT_E10); - break; - case 14: - new_mask = ((current_mask) | SUN4C_INT_E14); - break; - default: + unsigned long mask = (unsigned long)data->chip_data; + + if (mask) { + unsigned long flags; + + local_irq_save(flags); + mask = sbus_readb(interrupt_enable) | mask; + sbus_writeb(mask, interrupt_enable); local_irq_restore(flags); - return; } - sbus_writeb(new_mask, interrupt_enable); - local_irq_restore(flags); +} + +static unsigned int sun4c_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + sun4c_unmask_irq(data); + + return 0; +} + +static void sun4c_shutdown_irq(struct irq_data *data) +{ + sun4c_mask_irq(data); + irq_unlink(data->irq); +} + +static struct irq_chip sun4c_irq = { + .name = "sun4c", + .irq_startup = sun4c_startup_irq, + .irq_shutdown = sun4c_shutdown_irq, + .irq_mask = sun4c_mask_irq, + .irq_unmask = sun4c_unmask_irq, +}; + +static unsigned int sun4c_build_device_irq(struct platform_device *op, + unsigned int real_irq) +{ + unsigned int irq; + + if (real_irq >= 16) { + prom_printf("Bogus sun4c IRQ %u\n", real_irq); + prom_halt(); + } + + irq = irq_alloc(real_irq, real_irq); + if (irq) { + unsigned long mask = 0UL; + + switch (real_irq) { + case 1: + mask = SUN4C_INT_E1; + break; + case 8: + mask = SUN4C_INT_E8; + break; + case 10: + mask = SUN4C_INT_E10; + break; + case 14: + mask = SUN4C_INT_E14; + break; + default: + /* All the rest are either always enabled, + * or are for signalling software interrupts. + */ + break; + } + irq_set_chip_and_handler_name(irq, &sun4c_irq, + handle_level_irq, "level"); + irq_set_chip_data(irq, (void *)mask); + } + return irq; } struct sun4c_timer_info { @@ -144,8 +176,9 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit) static void __init sun4c_init_timers(irq_handler_t counter_fn) { - const struct linux_prom_irqs *irq; + const struct linux_prom_irqs *prom_irqs; struct device_node *dp; + unsigned int irq; const u32 *addr; int err; @@ -163,9 +196,9 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn) sun4c_timers = (void __iomem *) (unsigned long) addr[0]; - irq = of_get_property(dp, "intr", NULL); + prom_irqs = of_get_property(dp, "intr", NULL); of_node_put(dp); - if (!irq) { + if (!prom_irqs) { prom_printf("sun4c_init_timers: No intr property\n"); prom_halt(); } @@ -178,15 +211,15 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn) master_l10_counter = &sun4c_timers->l10_count; - err = request_irq(irq[0].pri, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), - "timer", NULL); + irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err); prom_halt(); } - sun4c_disable_irq(irq[1].pri); + /* disable timer interrupt */ + sun4c_mask_irq(irq_get_irq_data(irq)); } #ifdef CONFIG_SMP @@ -215,14 +248,11 @@ void __init sun4c_init_IRQ(void) interrupt_enable = (void __iomem *) (unsigned long) addr[0]; - BTFIXUPSET_CALL(enable_irq, sun4c_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4c_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, sun4c_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, sun4c_disable_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP); - sparc_irq_config.init_timers = sun4c_init_timers; + sparc_irq_config.init_timers = sun4c_init_timers; + sparc_irq_config.build_device_irq = sun4c_build_device_irq; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP); diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c index 77b4a8992710..a9ea60eb2c10 100644 --- a/arch/sparc/kernel/sun4d_irq.c +++ b/arch/sparc/kernel/sun4d_irq.c @@ -14,6 +14,7 @@ #include <asm/io.h> #include <asm/sbi.h> #include <asm/cacheflush.h> +#include <asm/setup.h> #include "kernel.h" #include "irq.h" @@ -22,22 +23,20 @@ * cpu local. CPU local interrupts cover the timer interrupts * and whatnot, and we encode those as normal PILs between * 0 and 15. - * - * SBUS interrupts are encoded integers including the board number - * (plus one), the SBUS level, and the SBUS slot number. Sun4D - * IRQ dispatch is done by: - * - * 1) Reading the BW local interrupt table in order to get the bus - * interrupt mask. - * - * This table is indexed by SBUS interrupt level which can be - * derived from the PIL we got interrupted on. - * - * 2) For each bus showing interrupt pending from #1, read the - * SBI interrupt state register. This will indicate which slots - * have interrupts pending for that SBUS interrupt level. + * SBUS interrupts are encodes as a combination of board, level and slot. */ +struct sun4d_handler_data { + unsigned int cpuid; /* target cpu */ + unsigned int real_irq; /* interrupt level */ +}; + + +static unsigned int sun4d_encode_irq(int board, int lvl, int slot) +{ + return (board + 1) << 5 | (lvl << 2) | slot; +} + struct sun4d_timer_regs { u32 l10_timer_limit; u32 l10_cur_countx; @@ -48,17 +47,12 @@ struct sun4d_timer_regs { static struct sun4d_timer_regs __iomem *sun4d_timers; -#define TIMER_IRQ 10 - -#define MAX_STATIC_ALLOC 4 -static unsigned char sbus_tid[32]; - -static struct irqaction *irq_action[NR_IRQS]; +#define SUN4D_TIMER_IRQ 10 -static struct sbus_action { - struct irqaction *action; - /* For SMP this needs to be extended */ -} *sbus_actions; +/* Specify which cpu handle interrupts from which board. + * Index is board - value is cpu. + */ +static unsigned char board_to_cpu[32]; static int pil_to_sbus[] = { 0, @@ -79,152 +73,81 @@ static int pil_to_sbus[] = { 0, }; -static int sbus_to_pil[] = { - 0, - 2, - 3, - 5, - 7, - 9, - 11, - 13, -}; - -static int nsbi; - /* Exported for sun4d_smp.c */ DEFINE_SPINLOCK(sun4d_imsk_lock); -int show_sun4d_interrupts(struct seq_file *p, void *v) +/* SBUS interrupts are encoded integers including the board number + * (plus one), the SBUS level, and the SBUS slot number. Sun4D + * IRQ dispatch is done by: + * + * 1) Reading the BW local interrupt table in order to get the bus + * interrupt mask. + * + * This table is indexed by SBUS interrupt level which can be + * derived from the PIL we got interrupted on. + * + * 2) For each bus showing interrupt pending from #1, read the + * SBI interrupt state register. This will indicate which slots + * have interrupts pending for that SBUS interrupt level. + * + * 3) Call the genreric IRQ support. + */ +static void sun4d_sbus_handler_irq(int sbusl) { - int i = *(loff_t *) v, j = 0, k = 0, sbusl; - struct irqaction *action; - unsigned long flags; -#ifdef CONFIG_SMP - int x; -#endif - - spin_lock_irqsave(&irq_action_lock, flags); - if (i < NR_IRQS) { - sbusl = pil_to_sbus[i]; - if (!sbusl) { - action = *(i + irq_action); - if (!action) - goto out_unlock; - } else { - for (j = 0; j < nsbi; j++) { - for (k = 0; k < 4; k++) - action = sbus_actions[(j << 5) + (sbusl << 2) + k].action; - if (action) - goto found_it; - } - goto out_unlock; - } -found_it: seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(x) - seq_printf(p, "%10u ", - kstat_cpu(cpu_logical_map(x)).irqs[i]); -#endif - seq_printf(p, "%c %s", - (action->flags & IRQF_DISABLED) ? '+' : ' ', - action->name); - action = action->next; - for (;;) { - for (; action; action = action->next) { - seq_printf(p, ",%s %s", - (action->flags & IRQF_DISABLED) ? " +" : "", - action->name); - } - if (!sbusl) - break; - k++; - if (k < 4) { - action = sbus_actions[(j << 5) + (sbusl << 2) + k].action; - } else { - j++; - if (j == nsbi) - break; - k = 0; - action = sbus_actions[(j << 5) + (sbusl << 2)].action; + unsigned int bus_mask; + unsigned int sbino, slot; + unsigned int sbil; + + bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff; + bw_clear_intr_mask(sbusl, bus_mask); + + sbil = (sbusl << 2); + /* Loop for each pending SBI */ + for (sbino = 0; bus_mask; sbino++) { + unsigned int idx, mask; + + bus_mask >>= 1; + if (!(bus_mask & 1)) + continue; + /* XXX This seems to ACK the irq twice. acquire_sbi() + * XXX uses swap, therefore this writes 0xf << sbil, + * XXX then later release_sbi() will write the individual + * XXX bits which were set again. + */ + mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil); + mask &= (0xf << sbil); + + /* Loop for each pending SBI slot */ + idx = 0; + slot = (1 << sbil); + while (mask != 0) { + unsigned int pil; + struct irq_bucket *p; + + idx++; + slot <<= 1; + if (!(mask & slot)) + continue; + + mask &= ~slot; + pil = sun4d_encode_irq(sbino, sbil, idx); + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next; + + next = p->next; + generic_handle_irq(p->irq); + p = next; } + release_sbi(SBI2DEVID(sbino), slot); } - seq_putc(p, '\n'); } -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); - return 0; -} - -void sun4d_free_irq(unsigned int irq, void *dev_id) -{ - struct irqaction *action, **actionp; - struct irqaction *tmp = NULL; - unsigned long flags; - - spin_lock_irqsave(&irq_action_lock, flags); - if (irq < 15) - actionp = irq + irq_action; - else - actionp = &(sbus_actions[irq - (1 << 5)].action); - action = *actionp; - if (!action) { - printk(KERN_ERR "Trying to free free IRQ%d\n", irq); - goto out_unlock; - } - if (dev_id) { - for (; action; action = action->next) { - if (action->dev_id == dev_id) - break; - tmp = action; - } - if (!action) { - printk(KERN_ERR "Trying to free free shared IRQ%d\n", - irq); - goto out_unlock; - } - } else if (action->flags & IRQF_SHARED) { - printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n", - irq); - goto out_unlock; - } - if (action->flags & SA_STATIC_ALLOC) { - /* - * This interrupt is marked as specially allocated - * so it is a bad idea to free it. - */ - printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n", - irq, action->name); - goto out_unlock; - } - - if (tmp) - tmp->next = action->next; - else - *actionp = action->next; - - spin_unlock_irqrestore(&irq_action_lock, flags); - - synchronize_irq(irq); - - spin_lock_irqsave(&irq_action_lock, flags); - - kfree(action); - - if (!(*actionp)) - __disable_irq(irq); - -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); } void sun4d_handler_irq(int pil, struct pt_regs *regs) { struct pt_regs *old_regs; - struct irqaction *action; - int cpu = smp_processor_id(); /* SBUS IRQ level (1 - 7) */ int sbusl = pil_to_sbus[pil]; @@ -233,160 +156,96 @@ void sun4d_handler_irq(int pil, struct pt_regs *regs) cc_set_iclr(1 << pil); +#ifdef CONFIG_SMP + /* + * Check IPI data structures after IRQ has been cleared. Hard and Soft + * IRQ can happen at the same time, so both cases are always handled. + */ + if (pil == SUN4D_IPI_IRQ) + sun4d_ipi_interrupt(); +#endif + old_regs = set_irq_regs(regs); irq_enter(); - kstat_cpu(cpu).irqs[pil]++; - if (!sbusl) { - action = *(pil + irq_action); - if (!action) - unexpected_irq(pil, NULL, regs); - do { - action->handler(pil, action->dev_id); - action = action->next; - } while (action); + if (sbusl == 0) { + /* cpu interrupt */ + struct irq_bucket *p; + + p = irq_map[pil]; + while (p) { + struct irq_bucket *next; + + next = p->next; + generic_handle_irq(p->irq); + p = next; + } } else { - int bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff; - int sbino; - struct sbus_action *actionp; - unsigned mask, slot; - int sbil = (sbusl << 2); - - bw_clear_intr_mask(sbusl, bus_mask); - - /* Loop for each pending SBI */ - for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1) - if (bus_mask & 1) { - mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil); - mask &= (0xf << sbil); - actionp = sbus_actions + (sbino << 5) + (sbil); - /* Loop for each pending SBI slot */ - for (slot = (1 << sbil); mask; slot <<= 1, actionp++) - if (mask & slot) { - mask &= ~slot; - action = actionp->action; - - if (!action) - unexpected_irq(pil, NULL, regs); - do { - action->handler(pil, action->dev_id); - action = action->next; - } while (action); - release_sbi(SBI2DEVID(sbino), slot); - } - } + /* SBUS interrupt */ + sun4d_sbus_handler_irq(sbusl); } irq_exit(); set_irq_regs(old_regs); } -int sun4d_request_irq(unsigned int irq, - irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) + +static void sun4d_mask_irq(struct irq_data *data) { - struct irqaction *action, *tmp = NULL, **actionp; + struct sun4d_handler_data *handler_data = data->handler_data; + unsigned int real_irq; +#ifdef CONFIG_SMP + int cpuid = handler_data->cpuid; unsigned long flags; - int ret; - - if (irq > 14 && irq < (1 << 5)) { - ret = -EINVAL; - goto out; - } - - if (!handler) { - ret = -EINVAL; - goto out; - } - - spin_lock_irqsave(&irq_action_lock, flags); - - if (irq >= (1 << 5)) - actionp = &(sbus_actions[irq - (1 << 5)].action); - else - actionp = irq + irq_action; - action = *actionp; - - if (action) { - if ((action->flags & IRQF_SHARED) && (irqflags & IRQF_SHARED)) { - for (tmp = action; tmp->next; tmp = tmp->next) - /* find last entry - tmp used below */; - } else { - ret = -EBUSY; - goto out_unlock; - } - if ((action->flags & IRQF_DISABLED) ^ (irqflags & IRQF_DISABLED)) { - printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n", - irq); - ret = -EBUSY; - goto out_unlock; - } - action = NULL; /* Or else! */ - } - - /* If this is flagged as statically allocated then we use our - * private struct which is never freed. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n", - irq, devname); - } - - if (action == NULL) - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - - if (!action) { - ret = -ENOMEM; - goto out_unlock; - } - - action->handler = handler; - action->flags = irqflags; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - if (tmp) - tmp->next = action; - else - *actionp = action; - - __enable_irq(irq); - - ret = 0; -out_unlock: - spin_unlock_irqrestore(&irq_action_lock, flags); -out: - return ret; +#endif + real_irq = handler_data->real_irq; +#ifdef CONFIG_SMP + spin_lock_irqsave(&sun4d_imsk_lock, flags); + cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq)); + spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +#else + cc_set_imsk(cc_get_imsk() | (1 << real_irq)); +#endif } -static void sun4d_disable_irq(unsigned int irq) +static void sun4d_unmask_irq(struct irq_data *data) { - int tid = sbus_tid[(irq >> 5) - 1]; + struct sun4d_handler_data *handler_data = data->handler_data; + unsigned int real_irq; +#ifdef CONFIG_SMP + int cpuid = handler_data->cpuid; unsigned long flags; +#endif + real_irq = handler_data->real_irq; - if (irq < NR_IRQS) - return; - +#ifdef CONFIG_SMP spin_lock_irqsave(&sun4d_imsk_lock, flags); - cc_set_imsk_other(tid, cc_get_imsk_other(tid) | (1 << sbus_to_pil[(irq >> 2) & 7])); + cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | ~(1 << real_irq)); spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +#else + cc_set_imsk(cc_get_imsk() | ~(1 << real_irq)); +#endif } -static void sun4d_enable_irq(unsigned int irq) +static unsigned int sun4d_startup_irq(struct irq_data *data) { - int tid = sbus_tid[(irq >> 5) - 1]; - unsigned long flags; - - if (irq < NR_IRQS) - return; + irq_link(data->irq); + sun4d_unmask_irq(data); + return 0; +} - spin_lock_irqsave(&sun4d_imsk_lock, flags); - cc_set_imsk_other(tid, cc_get_imsk_other(tid) & ~(1 << sbus_to_pil[(irq >> 2) & 7])); - spin_unlock_irqrestore(&sun4d_imsk_lock, flags); +static void sun4d_shutdown_irq(struct irq_data *data) +{ + sun4d_mask_irq(data); + irq_unlink(data->irq); } +struct irq_chip sun4d_irq = { + .name = "sun4d", + .irq_startup = sun4d_startup_irq, + .irq_shutdown = sun4d_shutdown_irq, + .irq_unmask = sun4d_unmask_irq, + .irq_mask = sun4d_mask_irq, +}; + #ifdef CONFIG_SMP static void sun4d_set_cpu_int(int cpu, int level) { @@ -413,7 +272,7 @@ void __init sun4d_distribute_irqs(void) for_each_node_by_name(dp, "sbi") { int devid = of_getintprop_default(dp, "device-id", 0); int board = of_getintprop_default(dp, "board#", 0); - sbus_tid[board] = cpuid; + board_to_cpu[board] = cpuid; set_sbi_tid(devid, cpuid << 3); } printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid); @@ -443,15 +302,16 @@ static void __init sun4d_load_profile_irqs(void) unsigned int sun4d_build_device_irq(struct platform_device *op, unsigned int real_irq) { - static int pil_to_sbus[] = { - 0, 0, 1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 0, - }; struct device_node *dp = op->dev.of_node; struct device_node *io_unit, *sbi = dp->parent; const struct linux_prom_registers *regs; + struct sun4d_handler_data *handler_data; + unsigned int pil; + unsigned int irq; int board, slot; int sbusl; + irq = 0; while (sbi) { if (!strcmp(sbi->name, "sbi")) break; @@ -484,7 +344,28 @@ unsigned int sun4d_build_device_irq(struct platform_device *op, sbusl = pil_to_sbus[real_irq]; if (sbusl) - return (((board + 1) << 5) + (sbusl << 2) + slot); + pil = sun4d_encode_irq(board, sbusl, slot); + else + pil = real_irq; + + irq = irq_alloc(real_irq, pil); + if (irq == 0) + goto err_out; + + handler_data = irq_get_handler_data(irq); + if (unlikely(handler_data)) + goto err_out; + + handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC); + if (unlikely(!handler_data)) { + prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n"); + prom_halt(); + } + handler_data->cpuid = board_to_cpu[board]; + handler_data->real_irq = real_irq; + irq_set_chip_and_handler_name(irq, &sun4d_irq, + handle_level_irq, "level"); + irq_set_handler_data(irq, handler_data); err_out: return real_irq; @@ -518,6 +399,7 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) { struct device_node *dp; struct resource res; + unsigned int irq; const u32 *reg; int err; @@ -552,9 +434,8 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) master_l10_counter = &sun4d_timers->l10_cur_count; - err = request_irq(TIMER_IRQ, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), - "timer", NULL); + irq = sun4d_build_device_irq(NULL, SUN4D_TIMER_IRQ); + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { prom_printf("sun4d_init_timers: request_irq() failed with %d\n", err); @@ -567,27 +448,16 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn) void __init sun4d_init_sbi_irq(void) { struct device_node *dp; - int target_cpu = 0; + int target_cpu; -#ifdef CONFIG_SMP target_cpu = boot_cpu_id; -#endif - - nsbi = 0; - for_each_node_by_name(dp, "sbi") - nsbi++; - sbus_actions = kzalloc(nsbi * 8 * 4 * sizeof(struct sbus_action), GFP_ATOMIC); - if (!sbus_actions) { - prom_printf("SUN4D: Cannot allocate sbus_actions, halting.\n"); - prom_halt(); - } for_each_node_by_name(dp, "sbi") { int devid = of_getintprop_default(dp, "device-id", 0); int board = of_getintprop_default(dp, "board#", 0); unsigned int mask; set_sbi_tid(devid, target_cpu << 3); - sbus_tid[board] = target_cpu; + board_to_cpu[board] = target_cpu; /* Get rid of pending irqs from PROM */ mask = acquire_sbi(devid, 0xffffffff); @@ -603,12 +473,10 @@ void __init sun4d_init_IRQ(void) { local_irq_disable(); - BTFIXUPSET_CALL(enable_irq, sun4d_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4d_disable_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM); - sparc_irq_config.init_timers = sun4d_init_timers; + sparc_irq_config.init_timers = sun4d_init_timers; sparc_irq_config.build_device_irq = sun4d_build_device_irq; #ifdef CONFIG_SMP diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 475d50b96cd0..133387980b56 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -32,6 +32,7 @@ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned lon return val; } +static void smp4d_ipi_init(void); static void smp_setup_percpu_timer(void); static unsigned char cpu_leds[32]; @@ -80,8 +81,6 @@ void __cpuinit smp4d_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - while ((unsigned long)current_set[cpuid] < PAGE_OFFSET) barrier(); @@ -105,7 +104,7 @@ void __cpuinit smp4d_callin(void) local_irq_enable(); /* We don't allow PIL 14 yet */ - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) barrier(); spin_lock_irqsave(&sun4d_imsk_lock, flags); @@ -120,6 +119,7 @@ void __cpuinit smp4d_callin(void) */ void __init smp4d_boot_cpus(void) { + smp4d_ipi_init(); if (boot_cpu_id) current_set[0] = NULL; smp_setup_percpu_timer(); @@ -191,6 +191,80 @@ void __init smp4d_smp_done(void) sun4d_distribute_irqs(); } +/* Memory structure giving interrupt handler information about IPI generated */ +struct sun4d_ipi_work { + int single; + int msk; + int resched; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work); + +/* Initialize IPIs on the SUN4D SMP machine */ +static void __init smp4d_ipi_init(void) +{ + int cpu; + struct sun4d_ipi_work *work; + + printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ); + + for_each_possible_cpu(cpu) { + work = &per_cpu(sun4d_ipi_work, cpu); + work->single = work->msk = work->resched = 0; + } +} + +void sun4d_ipi_interrupt(void) +{ + struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work); + + if (work->single) { + work->single = 0; + smp_call_function_single_interrupt(); + } + if (work->msk) { + work->msk = 0; + smp_call_function_interrupt(); + } + if (work->resched) { + work->resched = 0; + smp_resched_interrupt(); + } +} + +static void smp4d_ipi_single(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->single = 1; + + /* Generate IRQ on the CPU */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + +static void smp4d_ipi_mask_one(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->msk = 1; + + /* Generate IRQ on the CPU */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + +static void smp4d_ipi_resched(int cpu) +{ + struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu); + + /* Mark work */ + work->resched = 1; + + /* Generate IRQ on the CPU (any IRQ will cause resched) */ + sun4d_send_ipi(cpu, SUN4D_IPI_IRQ); +} + static struct smp_funcall { smpfunc_t func; unsigned long arg1; @@ -239,10 +313,10 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i <= high; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; sun4d_send_ipi(i, IRQ_CROSS_CALL); @@ -255,7 +329,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) barrier(); @@ -263,7 +337,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) barrier(); @@ -356,6 +430,9 @@ void __init sun4d_init_smp(void) BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM); for (i = 0; i < NR_CPUS; i++) { ccall_info.processors_in[i] = 1; diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c index 69df6257a32e..422c16dad1f6 100644 --- a/arch/sparc/kernel/sun4m_irq.c +++ b/arch/sparc/kernel/sun4m_irq.c @@ -100,6 +100,11 @@ struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS]; struct sun4m_irq_global __iomem *sun4m_irq_global; +struct sun4m_handler_data { + bool percpu; + long mask; +}; + /* Dave Redman (djhr@tadpole.co.uk) * The sun4m interrupt registers. */ @@ -142,9 +147,9 @@ struct sun4m_irq_global __iomem *sun4m_irq_global; #define OBP_INT_LEVEL_VME 0x40 #define SUN4M_TIMER_IRQ (OBP_INT_LEVEL_ONBOARD | 10) -#define SUM4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14) +#define SUN4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14) -static unsigned long irq_mask[0x50] = { +static unsigned long sun4m_imask[0x50] = { /* 0x00 - SMP */ 0, SUN4M_SOFT_INT(1), SUN4M_SOFT_INT(2), SUN4M_SOFT_INT(3), @@ -169,7 +174,7 @@ static unsigned long irq_mask[0x50] = { SUN4M_INT_VIDEO, SUN4M_INT_MODULE, SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY, (SUN4M_INT_SERIAL | SUN4M_INT_KBDMS), - SUN4M_INT_AUDIO, 0, SUN4M_INT_MODULE_ERR, + SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR, /* 0x30 - sbus */ 0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1), 0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3), @@ -182,105 +187,110 @@ static unsigned long irq_mask[0x50] = { 0, SUN4M_INT_VME(6), 0, 0 }; -static unsigned long sun4m_get_irqmask(unsigned int irq) +static void sun4m_mask_irq(struct irq_data *data) { - unsigned long mask; - - if (irq < 0x50) - mask = irq_mask[irq]; - else - mask = 0; + struct sun4m_handler_data *handler_data = data->handler_data; + int cpu = smp_processor_id(); - if (!mask) - printk(KERN_ERR "sun4m_get_irqmask: IRQ%d has no valid mask!\n", - irq); + if (handler_data->mask) { + unsigned long flags; - return mask; + local_irq_save(flags); + if (handler_data->percpu) { + sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set); + } else { + sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set); + } + local_irq_restore(flags); + } } -static void sun4m_disable_irq(unsigned int irq_nr) +static void sun4m_unmask_irq(struct irq_data *data) { - unsigned long mask, flags; + struct sun4m_handler_data *handler_data = data->handler_data; int cpu = smp_processor_id(); - mask = sun4m_get_irqmask(irq_nr); - local_irq_save(flags); - if (irq_nr > 15) - sbus_writel(mask, &sun4m_irq_global->mask_set); - else - sbus_writel(mask, &sun4m_irq_percpu[cpu]->set); - local_irq_restore(flags); -} - -static void sun4m_enable_irq(unsigned int irq_nr) -{ - unsigned long mask, flags; - int cpu = smp_processor_id(); + if (handler_data->mask) { + unsigned long flags; - /* Dreadful floppy hack. When we use 0x2b instead of - * 0x0b the system blows (it starts to whistle!). - * So we continue to use 0x0b. Fixme ASAP. --P3 - */ - if (irq_nr != 0x0b) { - mask = sun4m_get_irqmask(irq_nr); - local_irq_save(flags); - if (irq_nr > 15) - sbus_writel(mask, &sun4m_irq_global->mask_clear); - else - sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear); - local_irq_restore(flags); - } else { local_irq_save(flags); - sbus_writel(SUN4M_INT_FLOPPY, &sun4m_irq_global->mask_clear); + if (handler_data->percpu) { + sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear); + } else { + sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear); + } local_irq_restore(flags); } } -static unsigned long cpu_pil_to_imask[16] = { -/*0*/ 0x00000000, -/*1*/ 0x00000000, -/*2*/ SUN4M_INT_SBUS(0) | SUN4M_INT_VME(0), -/*3*/ SUN4M_INT_SBUS(1) | SUN4M_INT_VME(1), -/*4*/ SUN4M_INT_SCSI, -/*5*/ SUN4M_INT_SBUS(2) | SUN4M_INT_VME(2), -/*6*/ SUN4M_INT_ETHERNET, -/*7*/ SUN4M_INT_SBUS(3) | SUN4M_INT_VME(3), -/*8*/ SUN4M_INT_VIDEO, -/*9*/ SUN4M_INT_SBUS(4) | SUN4M_INT_VME(4) | SUN4M_INT_MODULE_ERR, -/*10*/ SUN4M_INT_REALTIME, -/*11*/ SUN4M_INT_SBUS(5) | SUN4M_INT_VME(5) | SUN4M_INT_FLOPPY, -/*12*/ SUN4M_INT_SERIAL | SUN4M_INT_KBDMS, -/*13*/ SUN4M_INT_SBUS(6) | SUN4M_INT_VME(6) | SUN4M_INT_AUDIO, -/*14*/ SUN4M_INT_E14, -/*15*/ SUN4M_INT_ERROR, -}; +static unsigned int sun4m_startup_irq(struct irq_data *data) +{ + irq_link(data->irq); + sun4m_unmask_irq(data); + return 0; +} -/* We assume the caller has disabled local interrupts when these are called, - * or else very bizarre behavior will result. - */ -static void sun4m_disable_pil_irq(unsigned int pil) +static void sun4m_shutdown_irq(struct irq_data *data) { - sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_set); + sun4m_mask_irq(data); + irq_unlink(data->irq); } -static void sun4m_enable_pil_irq(unsigned int pil) +static struct irq_chip sun4m_irq = { + .name = "sun4m", + .irq_startup = sun4m_startup_irq, + .irq_shutdown = sun4m_shutdown_irq, + .irq_mask = sun4m_mask_irq, + .irq_unmask = sun4m_unmask_irq, +}; + + +static unsigned int sun4m_build_device_irq(struct platform_device *op, + unsigned int real_irq) { - sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_clear); + struct sun4m_handler_data *handler_data; + unsigned int irq; + unsigned int pil; + + if (real_irq >= OBP_INT_LEVEL_VME) { + prom_printf("Bogus sun4m IRQ %u\n", real_irq); + prom_halt(); + } + pil = (real_irq & 0xf); + irq = irq_alloc(real_irq, pil); + + if (irq == 0) + goto out; + + handler_data = irq_get_handler_data(irq); + if (unlikely(handler_data)) + goto out; + + handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC); + if (unlikely(!handler_data)) { + prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n"); + prom_halt(); + } + + handler_data->mask = sun4m_imask[real_irq]; + handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD; + irq_set_chip_and_handler_name(irq, &sun4m_irq, + handle_level_irq, "level"); + irq_set_handler_data(irq, handler_data); + +out: + return irq; } #ifdef CONFIG_SMP static void sun4m_send_ipi(int cpu, int level) { - unsigned long mask = sun4m_get_irqmask(level); - - sbus_writel(mask, &sun4m_irq_percpu[cpu]->set); + sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set); } static void sun4m_clear_ipi(int cpu, int level) { - unsigned long mask = sun4m_get_irqmask(level); - - sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear); + sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->clear); } static void sun4m_set_udt(int cpu) @@ -343,7 +353,15 @@ void sun4m_nmi(struct pt_regs *regs) prom_halt(); } -/* Exported for sun4m_smp.c */ +void sun4m_unmask_profile_irq(void) +{ + unsigned long flags; + + local_irq_save(flags); + sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear); + local_irq_restore(flags); +} + void sun4m_clear_profile_irq(int cpu) { sbus_readl(&timers_percpu[cpu]->l14_limit); @@ -358,6 +376,7 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn) { struct device_node *dp = of_find_node_by_name(NULL, "counter"); int i, err, len, num_cpu_timers; + unsigned int irq; const u32 *addr; if (!dp) { @@ -384,8 +403,9 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn) master_l10_counter = &timers_global->l10_count; - err = request_irq(SUN4M_TIMER_IRQ, counter_fn, - (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); + irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ); + + err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL); if (err) { printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n", err); @@ -452,14 +472,11 @@ void __init sun4m_init_IRQ(void) if (num_cpu_iregs == 4) sbus_writel(0, &sun4m_irq_global->interrupt_target); - BTFIXUPSET_CALL(enable_irq, sun4m_enable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_irq, sun4m_disable_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(enable_pil_irq, sun4m_enable_pil_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(disable_pil_irq, sun4m_disable_pil_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM); sparc_irq_config.init_timers = sun4m_init_timers; + sparc_irq_config.build_device_irq = sun4m_build_device_irq; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 5cc7dc51de3d..594768686525 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -15,6 +15,9 @@ #include "irq.h" #include "kernel.h" +#define IRQ_IPI_SINGLE 12 +#define IRQ_IPI_MASK 13 +#define IRQ_IPI_RESCHED 14 #define IRQ_CROSS_CALL 15 static inline unsigned long @@ -26,6 +29,7 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val) return val; } +static void smp4m_ipi_init(void); static void smp_setup_percpu_timer(void); void __cpuinit smp4m_callin(void) @@ -59,8 +63,6 @@ void __cpuinit smp4m_callin(void) local_flush_cache_all(); local_flush_tlb_all(); - cpu_probe(); - /* Fix idle thread fields. */ __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r" (¤t_set[cpuid]) @@ -70,7 +72,7 @@ void __cpuinit smp4m_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - while (!cpu_isset(cpuid, smp_commenced_mask)) + while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) mb(); local_irq_enable(); @@ -83,6 +85,7 @@ void __cpuinit smp4m_callin(void) */ void __init smp4m_boot_cpus(void) { + smp4m_ipi_init(); smp_setup_percpu_timer(); local_flush_cache_all(); } @@ -150,18 +153,25 @@ void __init smp4m_smp_done(void) /* Ok, they are spinning and ready to go. */ } -/* At each hardware IRQ, we get this called to forward IRQ reception - * to the next processor. The caller must disable the IRQ level being - * serviced globally so that there are no double interrupts received. - * - * XXX See sparc64 irq.c. - */ -void smp4m_irq_rotate(int cpu) + +/* Initialize IPIs on the SUN4M SMP machine */ +static void __init smp4m_ipi_init(void) +{ +} + +static void smp4m_ipi_resched(int cpu) +{ + set_cpu_int(cpu, IRQ_IPI_RESCHED); +} + +static void smp4m_ipi_single(int cpu) { - int next = cpu_data(cpu).next; + set_cpu_int(cpu, IRQ_IPI_SINGLE); +} - if (next != cpu) - set_irq_udt(next); +static void smp4m_ipi_mask_one(int cpu) +{ + set_cpu_int(cpu, IRQ_IPI_MASK); } static struct smp_funcall { @@ -199,10 +209,10 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { register int i; - cpu_clear(smp_processor_id(), mask); - cpus_and(mask, cpu_online_map, mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i < ncpus; i++) { - if (cpu_isset(i, mask)) { + if (cpumask_test_cpu(i, &mask)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; set_cpu_int(i, IRQ_CROSS_CALL); @@ -218,7 +228,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_in[i]) barrier(); @@ -226,7 +236,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpu_isset(i, mask)) + if (!cpumask_test_cpu(i, &mask)) continue; while (!ccall_info.processors_out[i]) barrier(); @@ -277,7 +287,7 @@ static void __cpuinit smp_setup_percpu_timer(void) load_profile_irq(cpu, lvl14_resolution); if (cpu == boot_cpu_id) - enable_pil_irq(14); + sun4m_unmask_profile_irq(); } static void __init smp4m_blackbox_id(unsigned *addr) @@ -306,4 +316,7 @@ void __init sun4m_init_smp(void) BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM); } diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index 1eb8b00aed75..7408201d7efb 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -103,9 +103,10 @@ static unsigned long run_on_cpu(unsigned long cpu, unsigned long (*func)(unsigned long), unsigned long arg) { - cpumask_t old_affinity = current->cpus_allowed; + cpumask_t old_affinity; unsigned long ret; + cpumask_copy(&old_affinity, tsk_cpus_allowed(current)); /* should return -EINVAL to userspace */ if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) return 0; diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 47ac73c32e88..332c83ff7701 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -84,4 +84,4 @@ sys_call_table: /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv /*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime -/*335*/ .long sys_syncfs +/*335*/ .long sys_syncfs, sys_sendmmsg diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 4f3170c1ef47..43887ca0be0e 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -85,7 +85,7 @@ sys_call_table32: /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime - .word sys_syncfs + .word sys_syncfs, compat_sys_sendmmsg #endif /* CONFIG_COMPAT */ @@ -162,4 +162,4 @@ sys_call_table: /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime - .word sys_syncfs + .word sys_syncfs, sys_sendmmsg diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 96046a4024c2..1060e0672a4b 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -228,14 +228,10 @@ static void __init sbus_time_init(void) void __init time_init(void) { -#ifdef CONFIG_PCI - extern void pci_time_init(void); - if (pcic_present()) { + if (pcic_present()) pci_time_init(); - return; - } -#endif - sbus_time_init(); + else + sbus_time_init(); } diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c index 8f982b76c712..531d54fc9829 100644 --- a/arch/sparc/kernel/us2e_cpufreq.c +++ b/arch/sparc/kernel/us2e_cpufreq.c @@ -237,7 +237,7 @@ static unsigned int us2e_freq_get(unsigned int cpu) if (!cpu_online(cpu)) return 0; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); clock_tick = sparc64_get_clock_tick(cpu) / 1000; @@ -258,7 +258,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index) if (!cpu_online(cpu)) return; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000; diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c index f35d1e794548..9a8ceb700833 100644 --- a/arch/sparc/kernel/us3_cpufreq.c +++ b/arch/sparc/kernel/us3_cpufreq.c @@ -85,7 +85,7 @@ static unsigned int us3_freq_get(unsigned int cpu) if (!cpu_online(cpu)) return 0; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); reg = read_safari_cfg(); @@ -105,7 +105,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index) if (!cpu_online(cpu)) return; - cpus_allowed = current->cpus_allowed; + cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = sparc64_get_clock_tick(cpu) / 1000; diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 846d1c4374ea..7f01b8fce8bc 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -15,7 +15,6 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o lib-$(CONFIG_SPARC32) += copy_user.o locks.o lib-y += atomic_$(BITS).o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o -lib-$(CONFIG_SPARC32) += rwsem_32.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S deleted file mode 100644 index 9675268e7fde..000000000000 --- a/arch/sparc/lib/rwsem_32.S +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Assembly part of rw semaphores. - * - * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) - */ - -#include <asm/ptrace.h> -#include <asm/psr.h> - - .section .sched.text, "ax" - .align 4 - - .globl ___down_read -___down_read: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - nop - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - sub %g7, 1, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - sub %g7, 1, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - add %g7, 1, %g7 - nop - nop - subcc %g7, 1, %g7 - bneg 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - bcs 4f - mov %g5, %l5 - call down_read_failed - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba ___down_read - restore %l5, %g0, %g5 -4: call down_read_failed_biased - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .globl ___down_write -___down_write: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - sethi %hi(0x01000000), %g2 - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - sub %g7, %g2, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - sub %g7, %g2, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - add %g7, %g2, %g7 - nop - nop - subcc %g7, %g2, %g7 - bne 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - bcs 4f - mov %g5, %l5 - call down_write_failed - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba ___down_write - restore %l5, %g0, %g5 -4: call down_write_failed_biased - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .text - .globl ___up_read -___up_read: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - nop - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - add %g7, 1, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - add %g7, 1, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - nop - nop - nop - cmp %g7, 0 - be 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - mov %g5, %l5 - clr %o1 - call __rwsem_wake - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 - - .globl ___up_write -___up_write: - rd %psr, %g3 - nop - nop - nop - or %g3, PSR_PIL, %g7 - wr %g7, 0, %psr - sethi %hi(0x01000000), %g2 - nop - nop -#ifdef CONFIG_SMP -1: ldstub [%g1 + 4], %g7 - tst %g7 - bne 1b - ld [%g1], %g7 - add %g7, %g2, %g7 - st %g7, [%g1] - stb %g0, [%g1 + 4] -#else - ld [%g1], %g7 - add %g7, %g2, %g7 - st %g7, [%g1] -#endif - wr %g3, 0, %psr - sub %g7, %g2, %g7 - nop - nop - addcc %g7, %g2, %g7 - bcs 3f - nop -2: jmpl %o7, %g0 - mov %g4, %o7 -3: save %sp, -64, %sp - mov %g1, %l1 - mov %g4, %l4 - mov %g5, %l5 - mov %g7, %o1 - call __rwsem_wake - mov %l1, %o0 - mov %l1, %g1 - mov %l4, %g4 - ba 2b - restore %l5, %g0, %g5 diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 2f6ae1d1fb6b..e10cd03fab80 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -862,7 +862,7 @@ static void init_node_masks_nonnuma(void) for (i = 0; i < NR_CPUS; i++) numa_cpu_lookup_table[i] = 0; - numa_cpumask_lookup_table[0] = CPU_MASK_ALL; + cpumask_setall(&numa_cpumask_lookup_table[0]); } #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -1080,7 +1080,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, { u64 arc; - cpus_clear(*mask); + cpumask_clear(mask); mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { u64 target = mdesc_arc_target(md, arc); @@ -1091,7 +1091,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, continue; id = mdesc_get_property(md, target, "id", NULL); if (*id < nr_cpu_ids) - cpu_set(*id, *mask); + cpumask_set_cpu(*id, mask); } } @@ -1153,13 +1153,13 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, numa_parse_mdesc_group_cpus(md, grp, &mask); - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, &mask) numa_cpu_lookup_table[cpu] = index; - numa_cpumask_lookup_table[index] = mask; + cpumask_copy(&numa_cpumask_lookup_table[index], &mask); if (numa_debug) { printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, &mask) printk("%d ", cpu); printk("]\n"); } @@ -1218,7 +1218,7 @@ static int __init numa_parse_jbus(void) index = 0; for_each_present_cpu(cpu) { numa_cpu_lookup_table[cpu] = index; - numa_cpumask_lookup_table[index] = cpumask_of_cpu(cpu); + cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu)); node_masks[index].mask = ~((1UL << 36UL) - 1UL); node_masks[index].val = cpu << 36UL; diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index a4293102ef81..c52224d5ed45 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end) /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ static irqreturn_t handle_reschedule_ipi(int irq, void *token) { - /* - * Nothing to do here; when we return from interrupt, the - * rescheduling will occur there. But do bump the interrupt - * profiler count in the meantime. - */ __get_cpu_var(irq_stat).irq_resched_count++; + scheduler_ipi(); return IRQ_HANDLED; } diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 106bf27e2a9a..eefb107d2d73 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -173,7 +173,7 @@ void IPI_handler(int cpu) break; case 'R': - set_tsk_need_resched(current); + scheduler_ipi(); break; case 'S': diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 6ea77979531c..42827cafa6af 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <stdlib.h> +#include <unistd.h> #include <errno.h> #include <signal.h> #include <string.h> @@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len) host.release, host.version, host.machine); } +/* + * We cannot use glibc's abort(). It makes use of tgkill() which + * has no effect within UML's kernel threads. + * After that glibc would execute an invalid instruction to kill + * the calling process and UML crashes with SIGSEGV. + */ +static inline void __attribute__ ((noreturn)) uml_abort(void) +{ + sigset_t sig; + + fflush(NULL); + + if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT)) + sigprocmask(SIG_UNBLOCK, &sig, 0); + + for (;;) + if (kill(getpid(), SIGABRT) < 0) + exit(127); +} + void os_dump_core(void) { int pid; @@ -116,5 +137,5 @@ void os_dump_core(void) while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) os_kill_ptraced_process(pid, 0); - abort(); + uml_abort(); } diff --git a/arch/unicore32/kernel/irq.c b/arch/unicore32/kernel/irq.c index 2aa30a364bbe..d4efa7d679ff 100644 --- a/arch/unicore32/kernel/irq.c +++ b/arch/unicore32/kernel/irq.c @@ -23,7 +23,7 @@ #include <linux/list.h> #include <linux/kallsyms.h> #include <linux/proc_fs.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/gpio.h> #include <asm/system.h> @@ -237,7 +237,7 @@ static struct puv3_irq_state { unsigned int iccr; } puv3_irq_state; -static int puv3_irq_suspend(struct sys_device *dev, pm_message_t state) +static int puv3_irq_suspend(void) { struct puv3_irq_state *st = &puv3_irq_state; @@ -265,7 +265,7 @@ static int puv3_irq_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int puv3_irq_resume(struct sys_device *dev) +static void puv3_irq_resume(void) { struct puv3_irq_state *st = &puv3_irq_state; @@ -278,27 +278,20 @@ static int puv3_irq_resume(struct sys_device *dev) writel(st->icmr, INTC_ICMR); } - return 0; } -static struct sysdev_class puv3_irq_sysclass = { - .name = "pkunity-irq", +static struct syscore_ops puv3_irq_syscore_ops = { .suspend = puv3_irq_suspend, .resume = puv3_irq_resume, }; -static struct sys_device puv3_irq_device = { - .id = 0, - .cls = &puv3_irq_sysclass, -}; - -static int __init puv3_irq_init_devicefs(void) +static int __init puv3_irq_init_syscore(void) { - sysdev_class_register(&puv3_irq_sysclass); - return sysdev_register(&puv3_irq_device); + register_syscore_ops(&puv3_irq_syscore_ops); + return 0; } -device_initcall(puv3_irq_init_devicefs); +device_initcall(puv3_irq_init_syscore); void __init init_IRQ(void) { diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 254e36fa9513..b9a26465e728 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -192,7 +192,6 @@ static int __die(const char *str, int err, struct thread_info *thread, printk(KERN_EMERG "Internal error: %s: %x [#%d]\n", str, err, ++die_counter); - sysfs_printk_last_file(); /* trap and error numbers are mostly meaningless on UniCore */ ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, \ diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 0e103236b754..0e9dec6cadd1 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -15,3 +15,4 @@ obj-y += vdso/ obj-$(CONFIG_IA32_EMULATION) += ia32/ obj-y += platform/ +obj-y += net/ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cc6c53a95bfd..880fcb6c86f4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -8,6 +8,7 @@ config 64BIT config X86_32 def_bool !64BIT + select CLKSRC_I8253 config X86_64 def_bool 64BIT @@ -71,7 +72,7 @@ config X86 select GENERIC_IRQ_SHOW select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP - select ARCH_NO_SYSDEV_OPS + select HAVE_BPF_JIT if (X86_64 && NET) config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) @@ -112,7 +113,14 @@ config MMU def_bool y config ZONE_DMA - def_bool y + bool "DMA memory allocation support" if EXPERT + default y + help + DMA memory allocation support allows devices with less than 32-bit + addressing to allocate within the first 16MB of address space. + Disable if no such devices will be used. + + If unsure, say Y. config SBUS bool @@ -365,17 +373,6 @@ config X86_UV # Following is an alphabetically sorted list of 32 bit extended platforms # Please maintain the alphabetic order if and when there are additions -config X86_ELAN - bool "AMD Elan" - depends on X86_32 - depends on X86_EXTENDED_PLATFORM - ---help--- - Select this for an AMD Elan processor. - - Do not use this option for K6/Athlon/Opteron processors! - - If unsure, choose "PC-compatible" instead. - config X86_INTEL_CE bool "CE4100 TV platform" depends on PCI @@ -690,6 +687,7 @@ config AMD_IOMMU bool "AMD IOMMU support" select SWIOTLB select PCI_MSI + select PCI_IOV depends on X86_64 && PCI && ACPI ---help--- With this option you can enable support for AMD IOMMU hardware in @@ -1174,7 +1172,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" config AMD_NUMA def_bool y prompt "Old style AMD Opteron NUMA detection" - depends on X86_64 && NUMA && PCI + depends on NUMA && PCI ---help--- Enable AMD NUMA node topology detection. You should say Y here if you have a multi processor AMD system. This uses an old method to @@ -1201,7 +1199,7 @@ config NODES_SPAN_OTHER_NODES config NUMA_EMU bool "NUMA emulation" - depends on X86_64 && NUMA + depends on NUMA ---help--- Enable NUMA emulation. A flat machine will be split into virtual nodes when booted with "numa=fake=N", where N is the @@ -1223,6 +1221,10 @@ config HAVE_ARCH_BOOTMEM def_bool y depends on X86_32 && NUMA +config HAVE_ARCH_ALLOC_REMAP + def_bool y + depends on X86_32 && NUMA + config ARCH_HAVE_MEMORY_PRESENT def_bool y depends on X86_32 && DISCONTIGMEM @@ -1231,13 +1233,9 @@ config NEED_NODE_MEMMAP_SIZE def_bool y depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) -config HAVE_ARCH_ALLOC_REMAP - def_bool y - depends on X86_32 && NUMA - config ARCH_FLATMEM_ENABLE def_bool y - depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA + depends on X86_32 && !NUMA config ARCH_DISCONTIGMEM_ENABLE def_bool y @@ -1247,20 +1245,16 @@ config ARCH_DISCONTIGMEM_DEFAULT def_bool y depends on NUMA && X86_32 -config ARCH_PROC_KCORE_TEXT - def_bool y - depends on X86_64 && PROC_KCORE - -config ARCH_SPARSEMEM_DEFAULT - def_bool y - depends on X86_64 - config ARCH_SPARSEMEM_ENABLE def_bool y depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 +config ARCH_SPARSEMEM_DEFAULT + def_bool y + depends on X86_64 + config ARCH_SELECT_MEMORY_MODEL def_bool y depends on ARCH_SPARSEMEM_ENABLE @@ -1269,6 +1263,10 @@ config ARCH_MEMORY_PROBE def_bool X86_64 depends on MEMORY_HOTPLUG +config ARCH_PROC_KCORE_TEXT + def_bool y + depends on X86_64 && PROC_KCORE + config ILLEGAL_POINTER_VALUE hex default 0 if X86_32 @@ -1703,10 +1701,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y depends on MEMORY_HOTPLUG -config HAVE_ARCH_EARLY_PFN_TO_NID - def_bool X86_64 - depends on NUMA - config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA @@ -1848,7 +1842,7 @@ config APM_ALLOW_INTS endif # APM -source "arch/x86/kernel/cpu/cpufreq/Kconfig" +source "drivers/cpufreq/Kconfig" source "drivers/cpuidle/Kconfig" @@ -2076,7 +2070,7 @@ config OLPC depends on !X86_PAE select GPIOLIB select OF - select OF_PROMTREE if PROC_DEVICETREE + select OF_PROMTREE ---help--- Add support for detecting the unique features of the OLPC XO hardware. diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index d161e939df62..6a7cfdf8ff69 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -1,6 +1,4 @@ # Put here option for CPU selection and depending optimization -if !X86_ELAN - choice prompt "Processor family" default M686 if X86_32 @@ -203,6 +201,14 @@ config MWINCHIP3D stores for this CPU, which can increase performance of some operations. +config MELAN + bool "AMD Elan" + depends on X86_32 + ---help--- + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + config MGEODEGX1 bool "GeodeGX1" depends on X86_32 @@ -292,8 +298,6 @@ config X86_GENERIC This is really intended for distributors who need more generic optimizations. -endif - # # Define implied options from the CPU selection here config X86_INTERNODE_CACHE_SHIFT @@ -312,7 +316,7 @@ config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU - default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 + default "4" if MELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX config X86_XADD @@ -358,7 +362,7 @@ config X86_POPAD_OK config X86_ALIGNMENT_16 def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 config X86_INTEL_USERCOPY def_bool y diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index f2ee1abb1df9..86cee7b749e1 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -37,7 +37,7 @@ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march= $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) # AMD Elan support -cflags-$(CONFIG_X86_ELAN) += -march=i486 +cflags-$(CONFIG_MELAN) += -march=i486 # Geode GX1 support cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 1a58ad89fdf7..c04f1b7a9139 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -2,8 +2,6 @@ # Arch-specific CryptoAPI modules. # -obj-$(CONFIG_CRYPTO_FPU) += fpu.o - obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o @@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o -aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o +aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2577613fb32b..feee8ff1d05e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -94,6 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); + +int crypto_fpu_init(void); +void crypto_fpu_exit(void); + #ifdef CONFIG_X86_64 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); @@ -1257,6 +1261,8 @@ static int __init aesni_init(void) return -ENODEV; } + if ((err = crypto_fpu_init())) + goto fpu_err; if ((err = crypto_register_alg(&aesni_alg))) goto aes_err; if ((err = crypto_register_alg(&__aesni_alg))) @@ -1334,6 +1340,7 @@ blk_ecb_err: __aes_err: crypto_unregister_alg(&aesni_alg); aes_err: +fpu_err: return err; } @@ -1363,6 +1370,8 @@ static void __exit aesni_exit(void) crypto_unregister_alg(&blk_ecb_alg); crypto_unregister_alg(&__aesni_alg); crypto_unregister_alg(&aesni_alg); + + crypto_fpu_exit(); } module_init(aesni_init); diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 1a8f8649c035..98d7a188f46b 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = { .module = THIS_MODULE, }; -static int __init crypto_fpu_module_init(void) +int __init crypto_fpu_init(void) { return crypto_register_template(&crypto_fpu_tmpl); } -static void __exit crypto_fpu_module_exit(void) +void __exit crypto_fpu_exit(void) { crypto_unregister_template(&crypto_fpu_tmpl); } - -module_init(crypto_fpu_module_init); -module_exit(crypto_fpu_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("FPU block cipher wrapper"); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 849a9d23c71d..95f5826be458 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -848,4 +848,5 @@ ia32_sys_call_table: .quad compat_sys_open_by_handle_at .quad compat_sys_clock_adjtime .quad sys_syncfs + .quad compat_sys_sendmmsg /* 345 */ ia32_syscall_end: diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 12e0e7dd869c..416d865eae39 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -183,8 +183,6 @@ static inline void disable_acpi(void) { } #define ARCH_HAS_POWER_INIT 1 -struct bootnode; - #ifdef CONFIG_ACPI_NUMA extern int acpi_numa; extern int x86_acpi_numa_init(void); diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index a63a68be1cce..94d420b360d1 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -15,4 +15,13 @@ .endm #endif +.macro altinstruction_entry orig alt feature orig_len alt_len + .align 8 + .quad \orig + .quad \alt + .word \feature + .byte \orig_len + .byte \alt_len +.endm + #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 13009d1af99a..bf535f947e8c 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -4,7 +4,6 @@ #include <linux/types.h> #include <linux/stddef.h> #include <linux/stringify.h> -#include <linux/jump_label.h> #include <asm/asm.h> /* @@ -191,12 +190,4 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); extern void text_poke_smp_batch(struct text_poke_param *params, int n); -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) -#define IDEAL_NOP_SIZE_5 5 -extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; -extern void arch_init_ideal_nop5(void); -#else -static inline void arch_init_ideal_nop5(void) {} -#endif - #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 916bc8111a01..55d95eb789b3 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -19,13 +19,12 @@ #ifndef _ASM_X86_AMD_IOMMU_PROTO_H #define _ASM_X86_AMD_IOMMU_PROTO_H -struct amd_iommu; +#include <asm/amd_iommu_types.h> extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_flush_all_domains(void); -extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_apply_erratum_63(u16 devid); extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); @@ -44,4 +43,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); } +static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +{ + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return false; + + return !!(iommu->features & f); +} + #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index e3509fc303bf..4c9982995414 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -68,12 +68,25 @@ #define MMIO_CONTROL_OFFSET 0x0018 #define MMIO_EXCL_BASE_OFFSET 0x0020 #define MMIO_EXCL_LIMIT_OFFSET 0x0028 +#define MMIO_EXT_FEATURES 0x0030 #define MMIO_CMD_HEAD_OFFSET 0x2000 #define MMIO_CMD_TAIL_OFFSET 0x2008 #define MMIO_EVT_HEAD_OFFSET 0x2010 #define MMIO_EVT_TAIL_OFFSET 0x2018 #define MMIO_STATUS_OFFSET 0x2020 + +/* Extended Feature Bits */ +#define FEATURE_PREFETCH (1ULL<<0) +#define FEATURE_PPR (1ULL<<1) +#define FEATURE_X2APIC (1ULL<<2) +#define FEATURE_NX (1ULL<<3) +#define FEATURE_GT (1ULL<<4) +#define FEATURE_IA (1ULL<<6) +#define FEATURE_GA (1ULL<<7) +#define FEATURE_HE (1ULL<<8) +#define FEATURE_PC (1ULL<<9) + /* MMIO status bits */ #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 @@ -113,7 +126,9 @@ /* command specific defines */ #define CMD_COMPL_WAIT 0x01 #define CMD_INV_DEV_ENTRY 0x02 -#define CMD_INV_IOMMU_PAGES 0x03 +#define CMD_INV_IOMMU_PAGES 0x03 +#define CMD_INV_IOTLB_PAGES 0x04 +#define CMD_INV_ALL 0x08 #define CMD_COMPL_WAIT_STORE_MASK 0x01 #define CMD_COMPL_WAIT_INT_MASK 0x02 @@ -215,6 +230,8 @@ #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) +#define DTE_FLAG_IOTLB 0x01 + #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) @@ -227,6 +244,7 @@ /* IOMMU capabilities */ #define IOMMU_CAP_IOTLB 24 #define IOMMU_CAP_NPCACHE 26 +#define IOMMU_CAP_EFR 27 #define MAX_DOMAIN_ID 65536 @@ -249,6 +267,8 @@ extern bool amd_iommu_dump; /* global flag if IOMMUs cache non-present entries */ extern bool amd_iommu_np_cache; +/* Only true if all IOMMUs support device IOTLBs */ +extern bool amd_iommu_iotlb_sup; /* * Make iterating over all IOMMUs easier @@ -371,6 +391,9 @@ struct amd_iommu { /* flags read from acpi table */ u8 acpi_flags; + /* Extended features */ + u64 features; + /* * Capability pointer. There could be more than one IOMMU per PCI * device function if there are more than one AMD IOMMU capability @@ -409,9 +432,6 @@ struct amd_iommu { /* if one, we need to send a completion wait command */ bool need_sync; - /* becomes true if a command buffer reset is running */ - bool reset_in_progress; - /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 331682231bb4..67f87f257611 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range { extern const struct pci_device_id amd_nb_misc_ids[]; extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; -struct bootnode; extern bool early_is_amd_nb(u32 value); extern int amd_cache_northbridges(void); diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2b7d573be549..a0c46f061210 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -363,7 +363,12 @@ struct apic { */ int (*x86_32_early_logical_apicid)(int cpu); - /* determine CPU -> NUMA node mapping */ + /* + * Optional method called from setup_local_APIC() after logical + * apicid is guaranteed to be known to initialize apicid -> node + * mapping if NUMA initialization hasn't done so already. Don't + * add new users. + */ int (*x86_32_numa_cpu_node)(int cpu); #endif }; @@ -537,8 +542,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -extern int default_x86_32_numa_cpu_node(int cpu); - #endif static inline unsigned int diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index d87988bacf3e..34595d5e1038 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -78,6 +78,7 @@ #define APIC_DEST_LOGICAL 0x00800 #define APIC_DEST_PHYSICAL 0x00000 #define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 #define APIC_DM_LOWEST 0x00100 #define APIC_DM_SMI 0x00200 #define APIC_DM_REMRD 0x00300 diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h index 3c7521063d3f..aa6a3170ab5a 100644 --- a/arch/x86/include/asm/bios_ebda.h +++ b/arch/x86/include/asm/bios_ebda.h @@ -4,16 +4,40 @@ #include <asm/io.h> /* - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E. + * Returns physical address of EBDA. Returns 0 if there is no EBDA. */ static inline unsigned int get_bios_ebda(void) { + /* + * There is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E. + */ unsigned int address = *(unsigned short *)phys_to_virt(0x40E); address <<= 4; return address; /* 0 means none */ } +/* + * Return the sanitized length of the EBDA in bytes, if it exists. + */ +static inline unsigned int get_bios_ebda_length(void) +{ + unsigned int address; + unsigned int length; + + address = get_bios_ebda(); + if (!address) + return 0; + + /* EBDA length is byte 0 of the EBDA (stored in KiB) */ + length = *(unsigned char *)phys_to_virt(address); + length <<= 10; + + /* Trim the length if it extends beyond 640KiB */ + length = min_t(unsigned int, (640 * 1024) - address, length); + return length; +} + void reserve_ebda_region(void); #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 91f3e087cf21..5dc6acc98dbd 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -195,6 +195,8 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -207,8 +209,7 @@ extern const char * const x86_power_flags[32]; #define test_cpu_cap(c, bit) \ test_bit(bit, (unsigned long *)((c)->x86_capability)) -#define cpu_has(c, bit) \ - (__builtin_constant_p(bit) && \ +#define REQUIRED_MASK_BIT_SET(bit) \ ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ @@ -218,10 +219,16 @@ extern const char * const x86_power_flags[32]; (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ - (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \ - ? 1 : \ + (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) + +#define cpu_has(c, bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) +#define this_cpu_has(bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) + #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 057099e5faba..0bdb0c54d9a1 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -69,22 +69,18 @@ #define MAX_DMA_CHANNELS 8 -#ifdef CONFIG_X86_32 - -/* The maximum address that we can perform a DMA transfer to on this platform */ -#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) - -#else - /* 16MB ISA DMA zone */ #define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) /* 4GB broken PCI/AGP hardware bus master zone */ #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) +#ifdef CONFIG_X86_32 +/* The maximum address that we can perform a DMA transfer to on this platform */ +#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) +#else /* Compat define for old dma zone */ #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) - #endif /* 8237 DMA controllers */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8e4a16508d4e..7093e4a6a0bc 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ extern int add_efi_memmap; +extern void efi_set_executable(efi_memory_desc_t *md, bool executable); extern void efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index db24c2278be0..268c783ab1c0 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -38,11 +38,10 @@ extern void mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* - * call mcount is "e8 <4 byte offset>" - * The addr points to the 4 byte offset and the caller of this - * function wants the pointer to e8. Simply subtract one. + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. */ - return addr - 1; + return addr; } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index fc1f579fb965..65aaa91d5850 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h @@ -6,6 +6,8 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 +#define PIT_LATCH LATCH + extern raw_spinlock_t i8253_lock; extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 574dbc22893a..a32b18ce6ead 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -5,20 +5,25 @@ #include <linux/types.h> #include <asm/nops.h> +#include <asm/asm.h> #define JUMP_LABEL_NOP_SIZE 5 -# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" - -# define JUMP_LABEL(key, label) \ - do { \ - asm goto("1:" \ - JUMP_LABEL_INITIAL_NOP \ - ".pushsection __jump_table, \"aw\" \n\t"\ - _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ - ".popsection \n\t" \ - : : "i" (key) : : label); \ - } while (0) +#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" + +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("1:" + JUMP_LABEL_INITIAL_NOP + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + ".popsection \n\t" + : : "i" (key) : : l_yes); + return false; +l_yes: + return true; +} #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0f5213564326..0049211959c0 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -14,6 +14,8 @@ #include <asm/desc_defs.h> struct x86_emulate_ctxt; +enum x86_intercept; +enum x86_intercept_stage; struct x86_exception { u8 vector; @@ -24,6 +26,24 @@ struct x86_exception { }; /* + * This struct is used to carry enough information from the instruction + * decoder to main KVM so that a decision can be made whether the + * instruction needs to be intercepted or not. + */ +struct x86_instruction_info { + u8 intercept; /* which intercept */ + u8 rep_prefix; /* rep prefix? */ + u8 modrm_mod; /* mod part of modrm */ + u8 modrm_reg; /* index of register used */ + u8 modrm_rm; /* rm part of modrm */ + u64 src_val; /* value of source operand */ + u8 src_bytes; /* size of source operand */ + u8 dst_bytes; /* size of destination operand */ + u8 ad_bytes; /* size of src/dst address */ + u64 next_rip; /* rip following the instruction */ +}; + +/* * x86_emulate_ops: * * These operations represent the instruction emulator's interface to memory. @@ -62,6 +82,7 @@ struct x86_exception { #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ +#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ struct x86_emulate_ops { /* @@ -71,8 +92,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*read_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, + unsigned int bytes, struct x86_exception *fault); /* @@ -82,8 +104,8 @@ struct x86_emulate_ops { * @val: [OUT] Value write to memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*write_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* * fetch: Read bytes of standard (non-emulated/special) memory. @@ -92,8 +114,8 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*fetch)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, + int (*fetch)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *fault); /* @@ -102,11 +124,9 @@ struct x86_emulate_ops { * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. */ - int (*read_emulated)(unsigned long addr, - void *val, - unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + int (*read_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, + struct x86_exception *fault); /* * write_emulated: Write bytes to emulated/special memory area. @@ -115,11 +135,10 @@ struct x86_emulate_ops { * required). * @bytes: [IN ] Number of bytes to write to memory. */ - int (*write_emulated)(unsigned long addr, - const void *val, + int (*write_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); + struct x86_exception *fault); /* * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an @@ -129,40 +148,54 @@ struct x86_emulate_ops { * @new: [IN ] Value to write to @addr. * @bytes: [IN ] Number of bytes to access using CMPXCHG. */ - int (*cmpxchg_emulated)(unsigned long addr, + int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *fault, - struct kvm_vcpu *vcpu); - - int (*pio_in_emulated)(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu); - - int (*pio_out_emulated)(int size, unsigned short port, const void *val, - unsigned int count, struct kvm_vcpu *vcpu); - - bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu); - void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu); - u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); - void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); - unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); - void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); - int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); - int (*cpl)(struct kvm_vcpu *vcpu); - int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); - int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); - int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); - int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + struct x86_exception *fault); + void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); + + int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count); + + int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, const void *val, + unsigned int count); + + bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, int seg); + void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, int seg); + unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, + int seg); + void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); + int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); + int (*cpl)(struct x86_emulate_ctxt *ctxt); + int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); + int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); + int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); + int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + void (*halt)(struct x86_emulate_ctxt *ctxt); + void (*wbinvd)(struct x86_emulate_ctxt *ctxt); + int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); + void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ + void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ + int (*intercept)(struct x86_emulate_ctxt *ctxt, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); }; +typedef u32 __attribute__((vector_size(16))) sse128_t; + /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; unsigned int bytes; union { unsigned long orig_val; @@ -174,11 +207,13 @@ struct operand { ulong ea; unsigned seg; } mem; + unsigned xmm; } addr; union { unsigned long val; u64 val64; char valptr[sizeof(unsigned long) + 2]; + sse128_t vec_val; }; }; @@ -197,6 +232,7 @@ struct read_cache { struct decode_cache { u8 twobyte; u8 b; + u8 intercept; u8 lock_prefix; u8 rep_prefix; u8 op_bytes; @@ -209,6 +245,7 @@ struct decode_cache { u8 seg_override; unsigned int d; int (*execute)(struct x86_emulate_ctxt *ctxt); + int (*check_perm)(struct x86_emulate_ctxt *ctxt); unsigned long regs[NR_VCPU_REGS]; unsigned long eip; /* modrm */ @@ -227,17 +264,15 @@ struct x86_emulate_ctxt { struct x86_emulate_ops *ops; /* Register state before/after emulation. */ - struct kvm_vcpu *vcpu; - unsigned long eflags; unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; + bool guest_mode; /* guest running a nested guest */ bool perm_ok; /* do not check permissions if true */ bool only_vendor_specific_insn; @@ -249,8 +284,8 @@ struct x86_emulate_ctxt { }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 0xf3 +#define REPNE_PREFIX 0xf2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -259,6 +294,69 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ +/* any protected mode */ +#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ + X86EMUL_MODE_PROT64) + +enum x86_intercept_stage { + X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ + X86_ICPT_PRE_EXCEPT, + X86_ICPT_POST_EXCEPT, + X86_ICPT_POST_MEMACCESS, +}; + +enum x86_intercept { + x86_intercept_none, + x86_intercept_cr_read, + x86_intercept_cr_write, + x86_intercept_clts, + x86_intercept_lmsw, + x86_intercept_smsw, + x86_intercept_dr_read, + x86_intercept_dr_write, + x86_intercept_lidt, + x86_intercept_sidt, + x86_intercept_lgdt, + x86_intercept_sgdt, + x86_intercept_lldt, + x86_intercept_sldt, + x86_intercept_ltr, + x86_intercept_str, + x86_intercept_rdtsc, + x86_intercept_rdpmc, + x86_intercept_pushf, + x86_intercept_popf, + x86_intercept_cpuid, + x86_intercept_rsm, + x86_intercept_iret, + x86_intercept_intn, + x86_intercept_invd, + x86_intercept_pause, + x86_intercept_hlt, + x86_intercept_invlpg, + x86_intercept_invlpga, + x86_intercept_vmrun, + x86_intercept_vmload, + x86_intercept_vmsave, + x86_intercept_vmmcall, + x86_intercept_stgi, + x86_intercept_clgi, + x86_intercept_skinit, + x86_intercept_rdtscp, + x86_intercept_icebp, + x86_intercept_wbinvd, + x86_intercept_monitor, + x86_intercept_mwait, + x86_intercept_rdmsr, + x86_intercept_wrmsr, + x86_intercept_in, + x86_intercept_ins, + x86_intercept_out, + x86_intercept_outs, + + nr_x86_intercepts +}; + /* Host execution mode. */ #if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 @@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 +#define EMULATION_INTERCEPTED 2 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int reason, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c8af0991fdf0..d2ac8e2ee897 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -30,14 +30,30 @@ #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_MMIO_SIZE 16 #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) + #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 0xFFFFFF0000000000ULL) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXSAVE \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) + + #define INVALID_PAGE (~(hpa_t)0) #define VALID_PAGE(x) ((x) != INVALID_PAGE) @@ -118,6 +134,9 @@ enum kvm_reg { enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, + VCPU_EXREG_RFLAGS, + VCPU_EXREG_CPL, + VCPU_EXREG_SEGMENTS, }; enum { @@ -256,7 +275,7 @@ struct kvm_mmu { struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq); + u64 *spte, const void *pte); hpa_t root_hpa; int root_level; int shadow_root_level; @@ -340,7 +359,6 @@ struct kvm_vcpu_arch { struct fpu guest_fpu; u64 xcr0; - gva_t mmio_fault_cr2; struct kvm_pio_request pio; void *pio_data; @@ -367,18 +385,22 @@ struct kvm_vcpu_arch { /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + bool emulate_regs_need_sync_to_vcpu; + bool emulate_regs_need_sync_from_vcpu; gpa_t time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; - u64 last_host_tsc; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; u64 last_tsc_write; + u32 virtual_tsc_khz; bool tsc_catchup; + u32 tsc_catchup_mult; + s8 tsc_catchup_shift; bool nmi_pending; bool nmi_injected; @@ -448,9 +470,6 @@ struct kvm_arch { u64 last_tsc_nsec; u64 last_tsc_offset; u64 last_tsc_write; - u32 virtual_tsc_khz; - u32 virtual_tsc_mult; - s8 virtual_tsc_shift; struct kvm_xen_hvm_config xen_hvm_config; @@ -502,6 +521,8 @@ struct kvm_vcpu_stat { u32 nmi_injections; }; +struct x86_instruction_info; + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -586,9 +607,17 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + + int (*check_intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); + const struct trace_print_flags *exit_reasons_str; }; @@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; +/* control of guest tsc rate supported? */ +extern bool kvm_has_tsc_control; +/* minimum supported tsc_khz for guests */ +extern u32 kvm_min_guest_tsc_khz; +/* maximum supported tsc_khz for guests */ +extern u32 kvm_max_guest_tsc_khz; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ @@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); -void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); - void kvm_enable_efer_bits(u64); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); @@ -657,8 +690,6 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); -int emulate_clts(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu); - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index eb16e94ae04f..021979a6e23f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -142,8 +142,6 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif -extern void (*x86_mce_decode_callback)(struct mce *m); - void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 91df7c51806c..5e83a416eca8 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -13,31 +13,11 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) #include <asm/numaq.h> -/* summit or generic arch */ -#include <asm/srat.h> - -extern int get_memcfg_numa_flat(void); -/* - * This allows any one NUMA architecture to be compiled - * for, and still fall back to the flat function if it - * fails. - */ -static inline void get_memcfg_numa(void) -{ - - if (get_memcfg_numaq()) - return; - if (get_memcfg_from_srat()) - return; - get_memcfg_numa_flat(); -} extern void resume_map_numa_kva(pgd_t *pgd); #else /* !CONFIG_NUMA */ -#define get_memcfg_numa get_memcfg_numa_flat - static inline void resume_map_numa_kva(pgd_t *pgd) {} #endif /* CONFIG_NUMA */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index 288b96f815a6..b3f88d7867c7 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -4,36 +4,13 @@ #ifndef _ASM_X86_MMZONE_64_H #define _ASM_X86_MMZONE_64_H - #ifdef CONFIG_NUMA #include <linux/mmdebug.h> - #include <asm/smp.h> -/* Simple perfect hash to map physical addresses to node numbers */ -struct memnode { - int shift; - unsigned int mapsize; - s16 *map; - s16 embedded_map[64 - 8]; -} ____cacheline_aligned; /* total size = 128 bytes */ -extern struct memnode memnode; -#define memnode_shift memnode.shift -#define memnodemap memnode.map -#define memnodemapsize memnode.mapsize - extern struct pglist_data *node_data[]; -static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) -{ - unsigned nid; - VIRTUAL_BUG_ON(!memnodemap); - nid = memnodemap[addr >> memnode_shift]; - VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); - return nid; -} - #define NODE_DATA(nid) (node_data[nid]) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 67763c5d8b4e..9eae7752ae9b 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -35,7 +35,7 @@ #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " -#elif defined CONFIG_X86_ELAN +#elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE #define MODULE_PROC_FAMILY "CRUSOE " diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3cce71413d0b..485b4f1f079b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -118,6 +118,7 @@ complete list. */ #define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index af788496020b..405b4032a60b 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -1,7 +1,13 @@ #ifndef _ASM_X86_NOPS_H #define _ASM_X86_NOPS_H -/* Define nops for use with alternative() */ +/* + * Define nops for use with alternative() and for tracing. + * + * *_NOP5_ATOMIC must be a single instruction. + */ + +#define NOP_DS_PREFIX 0x3e /* generic versions from gas 1: nop @@ -13,14 +19,15 @@ 6: leal 0x00000000(%esi),%esi 7: leal 0x00000000(,%esi,1),%esi */ -#define GENERIC_NOP1 ".byte 0x90\n" -#define GENERIC_NOP2 ".byte 0x89,0xf6\n" -#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" -#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" -#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 -#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" -#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" -#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 +#define GENERIC_NOP1 0x90 +#define GENERIC_NOP2 0x89,0xf6 +#define GENERIC_NOP3 0x8d,0x76,0x00 +#define GENERIC_NOP4 0x8d,0x74,0x26,0x00 +#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4 +#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 +#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 +#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7 +#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4 /* Opteron 64bit nops 1: nop @@ -29,13 +36,14 @@ 4: osp osp osp nop */ #define K8_NOP1 GENERIC_NOP1 -#define K8_NOP2 ".byte 0x66,0x90\n" -#define K8_NOP3 ".byte 0x66,0x66,0x90\n" -#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n" -#define K8_NOP5 K8_NOP3 K8_NOP2 -#define K8_NOP6 K8_NOP3 K8_NOP3 -#define K8_NOP7 K8_NOP4 K8_NOP3 -#define K8_NOP8 K8_NOP4 K8_NOP4 +#define K8_NOP2 0x66,K8_NOP1 +#define K8_NOP3 0x66,K8_NOP2 +#define K8_NOP4 0x66,K8_NOP3 +#define K8_NOP5 K8_NOP3,K8_NOP2 +#define K8_NOP6 K8_NOP3,K8_NOP3 +#define K8_NOP7 K8_NOP4,K8_NOP3 +#define K8_NOP8 K8_NOP4,K8_NOP4 +#define K8_NOP5_ATOMIC 0x66,K8_NOP4 /* K7 nops uses eax dependencies (arbitrary choice) @@ -47,13 +55,14 @@ 7: leal 0x00000000(,%eax,1),%eax */ #define K7_NOP1 GENERIC_NOP1 -#define K7_NOP2 ".byte 0x8b,0xc0\n" -#define K7_NOP3 ".byte 0x8d,0x04,0x20\n" -#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" -#define K7_NOP5 K7_NOP4 ASM_NOP1 -#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" -#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" -#define K7_NOP8 K7_NOP7 ASM_NOP1 +#define K7_NOP2 0x8b,0xc0 +#define K7_NOP3 0x8d,0x04,0x20 +#define K7_NOP4 0x8d,0x44,0x20,0x00 +#define K7_NOP5 K7_NOP4,K7_NOP1 +#define K7_NOP6 0x8d,0x80,0,0,0,0 +#define K7_NOP7 0x8D,0x04,0x05,0,0,0,0 +#define K7_NOP8 K7_NOP7,K7_NOP1 +#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4 /* P6 nops uses eax dependencies (Intel-recommended choice) @@ -69,52 +78,65 @@ There is kernel code that depends on this. */ #define P6_NOP1 GENERIC_NOP1 -#define P6_NOP2 ".byte 0x66,0x90\n" -#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" -#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" -#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" -#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" -#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" -#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" +#define P6_NOP2 0x66,0x90 +#define P6_NOP3 0x0f,0x1f,0x00 +#define P6_NOP4 0x0f,0x1f,0x40,0 +#define P6_NOP5 0x0f,0x1f,0x44,0x00,0 +#define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0 +#define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0 +#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0 +#define P6_NOP5_ATOMIC P6_NOP5 + +#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" #if defined(CONFIG_MK7) -#define ASM_NOP1 K7_NOP1 -#define ASM_NOP2 K7_NOP2 -#define ASM_NOP3 K7_NOP3 -#define ASM_NOP4 K7_NOP4 -#define ASM_NOP5 K7_NOP5 -#define ASM_NOP6 K7_NOP6 -#define ASM_NOP7 K7_NOP7 -#define ASM_NOP8 K7_NOP8 +#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1) +#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2) +#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3) +#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4) +#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5) +#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6) +#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7) +#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8) +#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC) #elif defined(CONFIG_X86_P6_NOP) -#define ASM_NOP1 P6_NOP1 -#define ASM_NOP2 P6_NOP2 -#define ASM_NOP3 P6_NOP3 -#define ASM_NOP4 P6_NOP4 -#define ASM_NOP5 P6_NOP5 -#define ASM_NOP6 P6_NOP6 -#define ASM_NOP7 P6_NOP7 -#define ASM_NOP8 P6_NOP8 +#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1) +#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2) +#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3) +#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4) +#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5) +#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6) +#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7) +#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8) +#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC) #elif defined(CONFIG_X86_64) -#define ASM_NOP1 K8_NOP1 -#define ASM_NOP2 K8_NOP2 -#define ASM_NOP3 K8_NOP3 -#define ASM_NOP4 K8_NOP4 -#define ASM_NOP5 K8_NOP5 -#define ASM_NOP6 K8_NOP6 -#define ASM_NOP7 K8_NOP7 -#define ASM_NOP8 K8_NOP8 +#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1) +#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2) +#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3) +#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4) +#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5) +#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6) +#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7) +#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) +#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC) #else -#define ASM_NOP1 GENERIC_NOP1 -#define ASM_NOP2 GENERIC_NOP2 -#define ASM_NOP3 GENERIC_NOP3 -#define ASM_NOP4 GENERIC_NOP4 -#define ASM_NOP5 GENERIC_NOP5 -#define ASM_NOP6 GENERIC_NOP6 -#define ASM_NOP7 GENERIC_NOP7 -#define ASM_NOP8 GENERIC_NOP8 +#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1) +#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2) +#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3) +#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4) +#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5) +#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6) +#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7) +#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8) +#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC) #endif #define ASM_NOP_MAX 8 +#define NOP_ATOMIC5 (ASM_NOP_MAX+1) /* Entry for the 5-byte atomic NOP */ + +#ifndef __ASSEMBLY__ +extern const unsigned char * const *ideal_nops; +extern void arch_init_ideal_nops(void); +#endif #endif /* _ASM_X86_NOPS_H */ diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index a50fc9f493b3..bfacd2ccf651 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -1,12 +1,24 @@ #ifndef _ASM_X86_NUMA_H #define _ASM_X86_NUMA_H +#include <linux/nodemask.h> + #include <asm/topology.h> #include <asm/apicdef.h> #ifdef CONFIG_NUMA #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) +#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) + +/* + * Too small node sizes may confuse the VM badly. Usually they + * result from BIOS bugs. So dont recognize nodes as standalone + * NUMA entities that have less than this amount of RAM listed: + */ +#define NODE_MIN_SIZE (4*1024*1024) + +extern int numa_off; /* * __apicid_to_node[] stores the raw mapping between physical apicid and @@ -17,15 +29,27 @@ * numa_cpu_node(). */ extern s16 __apicid_to_node[MAX_LOCAL_APIC]; +extern nodemask_t numa_nodes_parsed __initdata; + +extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); +extern void __init numa_set_distance(int from, int to, int distance); static inline void set_apicid_to_node(int apicid, s16 node) { __apicid_to_node[apicid] = node; } + +extern int __cpuinit numa_cpu_node(int cpu); + #else /* CONFIG_NUMA */ static inline void set_apicid_to_node(int apicid, s16 node) { } + +static inline int numa_cpu_node(int cpu) +{ + return NUMA_NO_NODE; +} #endif /* CONFIG_NUMA */ #ifdef CONFIG_X86_32 @@ -37,14 +61,12 @@ static inline void set_apicid_to_node(int apicid, s16 node) #ifdef CONFIG_NUMA extern void __cpuinit numa_set_node(int cpu, int node); extern void __cpuinit numa_clear_node(int cpu); -extern void __init numa_init_array(void); extern void __init init_cpu_to_node(void); extern void __cpuinit numa_add_cpu(int cpu); extern void __cpuinit numa_remove_cpu(int cpu); #else /* CONFIG_NUMA */ static inline void numa_set_node(int cpu, int node) { } static inline void numa_clear_node(int cpu) { } -static inline void numa_init_array(void) { } static inline void init_cpu_to_node(void) { } static inline void numa_add_cpu(int cpu) { } static inline void numa_remove_cpu(int cpu) { } @@ -54,4 +76,10 @@ static inline void numa_remove_cpu(int cpu) { } void debug_cpumask_set_cpu(int cpu, int node, bool enable); #endif +#ifdef CONFIG_NUMA_EMU +#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) +#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) +void numa_emu_cmdline(char *); +#endif /* CONFIG_NUMA_EMU */ + #endif /* _ASM_X86_NUMA_H */ diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index c6beed1ef103..e7d6b8254742 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h @@ -1,16 +1,6 @@ #ifndef _ASM_X86_NUMA_32_H #define _ASM_X86_NUMA_32_H -extern int numa_off; - -extern int pxm_to_nid(int pxm); - -#ifdef CONFIG_NUMA -extern int __cpuinit numa_cpu_node(int cpu); -#else /* CONFIG_NUMA */ -static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } -#endif /* CONFIG_NUMA */ - #ifdef CONFIG_HIGHMEM extern void set_highmem_pages_init(void); #else diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 344eb1790b46..0c05f7ae46e8 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -1,42 +1,6 @@ #ifndef _ASM_X86_NUMA_64_H #define _ASM_X86_NUMA_64_H -#include <linux/nodemask.h> - -struct bootnode { - u64 start; - u64 end; -}; - -#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) - -extern int numa_off; - extern unsigned long numa_free_all_bootmem(void); -extern void setup_node_bootmem(int nodeid, unsigned long start, - unsigned long end); - -#ifdef CONFIG_NUMA -/* - * Too small node sizes may confuse the VM badly. Usually they - * result from BIOS bugs. So dont recognize nodes as standalone - * NUMA entities that have less than this amount of RAM listed: - */ -#define NODE_MIN_SIZE (4*1024*1024) - -extern nodemask_t numa_nodes_parsed __initdata; - -extern int __cpuinit numa_cpu_node(int cpu); -extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); -extern void __init numa_set_distance(int from, int to, int distance); - -#ifdef CONFIG_NUMA_EMU -#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) -#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) -void numa_emu_cmdline(char *); -#endif /* CONFIG_NUMA_EMU */ -#else -static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } -#endif #endif /* _ASM_X86_NUMA_64_H */ diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h index 37c516545ec8..c3b3c322fd87 100644 --- a/arch/x86/include/asm/numaq.h +++ b/arch/x86/include/asm/numaq.h @@ -29,7 +29,7 @@ #ifdef CONFIG_X86_NUMAQ extern int found_numaq; -extern int get_memcfg_numaq(void); +extern int numaq_numa_init(void); extern int pci_numaq_init(void); extern void *xquad_portio; @@ -166,11 +166,6 @@ struct sys_cfg_data { void numaq_tsc_disable(void); -#else -static inline int get_memcfg_numaq(void) -{ - return 0; -} #endif /* CONFIG_X86_NUMAQ */ #endif /* _ASM_X86_NUMAQ_H */ diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h index c5d3a5abbb9f..24487712e0b1 100644 --- a/arch/x86/include/asm/olpc_ofw.h +++ b/arch/x86/include/asm/olpc_ofw.h @@ -26,15 +26,12 @@ extern void setup_olpc_ofw_pgd(void); /* check if OFW was detected during boot */ extern bool olpc_ofw_present(void); +extern void olpc_dt_build_devicetree(void); + #else /* !CONFIG_OLPC */ static inline void olpc_ofw_detect(void) { } static inline void setup_olpc_ofw_pgd(void) { } -#endif /* !CONFIG_OLPC */ - -#ifdef CONFIG_OF_PROMTREE -extern void olpc_dt_build_devicetree(void); -#else static inline void olpc_dt_build_devicetree(void) { } -#endif +#endif /* !CONFIG_OLPC */ #endif /* _ASM_X86_OLPC_OFW_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d475b4398d8b..53278b0dfdf6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -517,7 +517,7 @@ do { \ typeof(o2) __o2 = o2; \ typeof(o2) __n2 = n2; \ typeof(o2) __dummy; \ - alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ + alternative_io("call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP4, \ "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ X86_FEATURE_CX16, \ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ @@ -542,6 +542,33 @@ do { \ old__; \ }) +static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, + const unsigned long __percpu *addr) +{ + unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; + + return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0; +} + +static inline int x86_this_cpu_variable_test_bit(int nr, + const unsigned long __percpu *addr) +{ + int oldbit; + + asm volatile("bt "__percpu_arg(2)",%1\n\t" + "sbb %0,%0" + : "=r" (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr)); + + return oldbit; +} + +#define x86_this_cpu_test_bit(nr, addr) \ + (__builtin_constant_p((nr)) \ + ? x86_this_cpu_constant_test_bit((nr), (addr)) \ + : x86_this_cpu_variable_test_bit((nr), (addr))) + + #include <asm-generic/percpu.h> /* We can use this directly for local CPU (faster). */ diff --git a/arch/x86/include/asm/probe_roms.h b/arch/x86/include/asm/probe_roms.h new file mode 100644 index 000000000000..4950a0b1d09c --- /dev/null +++ b/arch/x86/include/asm/probe_roms.h @@ -0,0 +1,8 @@ +#ifndef _PROBE_ROMS_H_ +#define _PROBE_ROMS_H_ +struct pci_dev; + +extern void __iomem *pci_map_biosrom(struct pci_dev *pdev); +extern void pci_unmap_biosrom(void __iomem *rom); +extern size_t pci_biosrom_size(struct pci_dev *pdev); +#endif diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index a898a2b6e10c..59ab4dffa377 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -60,6 +60,7 @@ #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ +#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index db8aa19a08a2..9756551ec760 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(.discard.text) __used \ + static void __section(.discard.text) __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ @@ -104,10 +104,10 @@ void *extend_brk(size_t size, size_t align); type *name; \ RESERVE_BRK(name, sizeof(type) * entries) +extern void probe_roms(void); #ifdef __i386__ void __init i386_start_kernel(void); -extern void probe_roms(void); #else void __init x86_64_start_kernel(char *real_mode); diff --git a/arch/x86/include/asm/srat.h b/arch/x86/include/asm/srat.h deleted file mode 100644 index b508d639d1a7..000000000000 --- a/arch/x86/include/asm/srat.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Some of the code in this file has been gleaned from the 64 bit - * discontigmem support code base. - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to Pat Gaughen <gone@us.ibm.com> - */ - -#ifndef _ASM_X86_SRAT_H -#define _ASM_X86_SRAT_H - -#ifdef CONFIG_ACPI_NUMA -extern int get_memcfg_from_srat(void); -#else -static inline int get_memcfg_from_srat(void) -{ - return 0; -} -#endif - -#endif /* _ASM_X86_SRAT_H */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index d7e89c83645d..70bbe39043a9 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo, /* Generic stack tracer with callbacks */ struct stacktrace_ops { - void (*warning)(void *data, char *msg); - /* msg must contain %s for the symbol */ - void (*warning_symbol)(void *data, char *msg, unsigned long symbol); void (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 12569e691ce3..c2ff2a1d845e 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -303,24 +303,81 @@ static inline void native_wbinvd(void) #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else -#define read_cr0() (native_read_cr0()) -#define write_cr0(x) (native_write_cr0(x)) -#define read_cr2() (native_read_cr2()) -#define write_cr2(x) (native_write_cr2(x)) -#define read_cr3() (native_read_cr3()) -#define write_cr3(x) (native_write_cr3(x)) -#define read_cr4() (native_read_cr4()) -#define read_cr4_safe() (native_read_cr4_safe()) -#define write_cr4(x) (native_write_cr4(x)) -#define wbinvd() (native_wbinvd()) + +static inline unsigned long read_cr0(void) +{ + return native_read_cr0(); +} + +static inline void write_cr0(unsigned long x) +{ + native_write_cr0(x); +} + +static inline unsigned long read_cr2(void) +{ + return native_read_cr2(); +} + +static inline void write_cr2(unsigned long x) +{ + native_write_cr2(x); +} + +static inline unsigned long read_cr3(void) +{ + return native_read_cr3(); +} + +static inline void write_cr3(unsigned long x) +{ + native_write_cr3(x); +} + +static inline unsigned long read_cr4(void) +{ + return native_read_cr4(); +} + +static inline unsigned long read_cr4_safe(void) +{ + return native_read_cr4_safe(); +} + +static inline void write_cr4(unsigned long x) +{ + native_write_cr4(x); +} + +static inline void wbinvd(void) +{ + native_wbinvd(); +} + #ifdef CONFIG_X86_64 -#define read_cr8() (native_read_cr8()) -#define write_cr8(x) (native_write_cr8(x)) -#define load_gs_index native_load_gs_index + +static inline unsigned long read_cr8(void) +{ + return native_read_cr8(); +} + +static inline void write_cr8(unsigned long x) +{ + native_write_cr8(x); +} + +static inline void load_gs_index(unsigned selector) +{ + native_load_gs_index(selector); +} + #endif /* Clear the 'TS' bit */ -#define clts() (native_clts()) +static inline void clts(void) +{ + native_clts(); +} #endif/* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 910a7084f7f2..c00692476e9f 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -93,19 +93,11 @@ extern void setup_node_to_cpumask_map(void); #define pcibus_to_node(bus) __pcibus_to_node(bus) #ifdef CONFIG_X86_32 -extern unsigned long node_start_pfn[]; -extern unsigned long node_end_pfn[]; -extern unsigned long node_remap_size[]; -#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) - # define SD_CACHE_NICE_TRIES 1 # define SD_IDLE_IDX 1 - #else - # define SD_CACHE_NICE_TRIES 2 # define SD_IDLE_IDX 2 - #endif /* sched_domains SD_NODE_INIT for NUMA machines */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index abd3e0ea762a..99ddd148a760 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -6,7 +6,6 @@ #include <linux/errno.h> #include <linux/compiler.h> #include <linux/thread_info.h> -#include <linux/prefetch.h> #include <linux/string.h> #include <asm/asm.h> #include <asm/page.h> @@ -42,7 +41,7 @@ * Returns 0 if the range is valid, nonzero otherwise. * * This is equivalent to the following test: - * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) + * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) * * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 088d09fb1615..566e803cc602 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -6,7 +6,6 @@ */ #include <linux/errno.h> #include <linux/thread_info.h> -#include <linux/prefetch.h> #include <linux/string.h> #include <asm/asm.h> #include <asm/page.h> diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 316708d5af92..1c66d30971ad 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -6,7 +6,6 @@ */ #include <linux/compiler.h> #include <linux/errno.h> -#include <linux/prefetch.h> #include <linux/lockdep.h> #include <asm/alternative.h> #include <asm/cpufeature.h> diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index a755ef5e5977..fb6a625c99bf 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -350,10 +350,11 @@ #define __NR_open_by_handle_at 342 #define __NR_clock_adjtime 343 #define __NR_syncfs 344 +#define __NR_sendmmsg 345 #ifdef __KERNEL__ -#define NR_syscalls 345 +#define NR_syscalls 346 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 160fa76bd578..79f90eb15aad 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) #define __NR_syncfs 306 __SYSCALL(__NR_syncfs, sys_syncfs) +#define __NR_sendmmsg 307 +__SYSCALL(__NR_sendmmsg, sys_sendmmsg) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 3e094af443c3..130f1eeee5fe 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -94,6 +94,8 @@ /* after this # consecutive successes, bump up the throttle if it was lowered */ #define COMPLETE_THRESHOLD 5 +#define UV_LB_SUBNODEID 0x10 + /* * number of entries in the destination side payload queue */ @@ -124,7 +126,7 @@ * The distribution specification (32 bytes) is interpreted as a 256-bit * distribution vector. Adjacent bits correspond to consecutive even numbered * nodeIDs. The result of adding the index of a given bit to the 15-bit - * 'base_dest_nodeid' field of the header corresponds to the + * 'base_dest_nasid' field of the header corresponds to the * destination nodeID associated with that specified bit. */ struct bau_target_uvhubmask { @@ -176,7 +178,7 @@ struct bau_msg_payload { struct bau_msg_header { unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ /* bits 5:0 */ - unsigned int base_dest_nodeid:15; /* nasid of the */ + unsigned int base_dest_nasid:15; /* nasid of the */ /* bits 20:6 */ /* first bit in uvhub map */ unsigned int command:8; /* message type */ /* bits 28:21 */ @@ -378,6 +380,10 @@ struct ptc_stats { unsigned long d_rcanceled; /* number of messages canceled by resets */ }; +struct hub_and_pnode { + short uvhub; + short pnode; +}; /* * one per-cpu; to locate the software tables */ @@ -399,10 +405,12 @@ struct bau_control { int baudisabled; int set_bau_off; short cpu; + short osnode; short uvhub_cpu; short uvhub; short cpus_in_socket; short cpus_in_uvhub; + short partition_base_pnode; unsigned short message_number; unsigned short uvhub_quiesce; short socket_acknowledge_count[DEST_Q_SIZE]; @@ -422,15 +430,16 @@ struct bau_control { int congested_period; cycles_t period_time; long period_requests; + struct hub_and_pnode *target_hub_and_pnode; }; static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) { return constant_test_bit(uvhub, &dstp->bits[0]); } -static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) +static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) { - __set_bit(uvhub, &dstp->bits[0]); + __set_bit(pnode, &dstp->bits[0]); } static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, int nbits) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a501741c2335..4298002d0c83 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -398,6 +398,8 @@ struct uv_blade_info { unsigned short nr_online_cpus; unsigned short pnode; short memory_nid; + spinlock_t nmi_lock; + unsigned long nmi_count; }; extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 20cafeac7455..f5bb64a823d7 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,7 +5,7 @@ * * SGI UV MMR definitions * - * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_MMRS_H @@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { } s; }; +/* ========================================================================= */ +/* UVH_SCRATCH5 */ +/* ========================================================================= */ +#define UVH_SCRATCH5 0x2d0200UL +#define UVH_SCRATCH5_32 0x00778 + +#define UVH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL +union uvh_scratch5_u { + unsigned long v; + struct uvh_scratch5_s { + unsigned long scratch5 : 64; /* RW, W1CS */ + } s; +}; #endif /* __ASM_UV_MMRS_X86_H__ */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c61934fbf22a..64a619d47d34 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern unsigned long set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e); -extern int m2p_add_override(unsigned long mfn, struct page *page); -extern int m2p_remove_override(struct page *page); +extern int m2p_add_override(unsigned long mfn, struct page *page, + bool clear_pte); +extern int m2p_remove_override(struct page *page, bool clear_pte); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index aa8620989162..4fbda9a3f339 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void) #endif #if defined(CONFIG_XEN_DOM0) void __init xen_setup_pirqs(void); +int xen_find_device_domain_owner(struct pci_dev *dev); +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); +int xen_unregister_device_domain_owner(struct pci_dev *dev); #else static inline void __init xen_setup_pirqs(void) { } +static inline int xen_find_device_domain_owner(struct pci_dev *dev) +{ + return -1; +} +static inline int xen_register_device_domain_owner(struct pci_dev *dev, + uint16_t domain) +{ + return -1; +} +static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) +{ + return -1; +} #endif #if defined(CONFIG_PCI_MSI) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7338ef2218bc..250806472a7e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,7 +36,7 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o -obj-$(CONFIG_X86_32) += probe_roms_32.o +obj-y += probe_roms.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o @@ -117,7 +117,7 @@ obj-$(CONFIG_OF) += devicetree.o ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_AUDIT) += audit_64.o - obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o + obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ff93bc1b09c3..18a857ba7a25 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -112,11 +112,6 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); - if (strncmp(str, "s4_nonvs", 8) == 0) { - pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " - "please use acpi_sleep=nonvs instead"); - acpi_nvs_nosave(); - } #endif if (strncmp(str, "nonvs", 5) == 0) acpi_nvs_nosave(); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4a234677e213..a81f2d52f869 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -67,17 +67,30 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); #define DPRINTK(fmt, args...) if (debug_alternative) \ printk(KERN_DEBUG fmt, args) +/* + * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes + * that correspond to that nop. Getting from one nop to the next, we + * add to the array the offset that is equal to the sum of all sizes of + * nops preceding the one we are after. + * + * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the + * nice symmetry of sizes of the previous nops. + */ #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8 - "\t.previous"); -extern const unsigned char intelnops[]; -static const unsigned char *const __initconst_or_module -intel_nops[ASM_NOP_MAX+1] = { +static const unsigned char intelnops[] = +{ + GENERIC_NOP1, + GENERIC_NOP2, + GENERIC_NOP3, + GENERIC_NOP4, + GENERIC_NOP5, + GENERIC_NOP6, + GENERIC_NOP7, + GENERIC_NOP8, + GENERIC_NOP5_ATOMIC +}; +static const unsigned char * const intel_nops[ASM_NOP_MAX+2] = +{ NULL, intelnops, intelnops + 1, @@ -87,17 +100,25 @@ intel_nops[ASM_NOP_MAX+1] = { intelnops + 1 + 2 + 3 + 4 + 5, intelnops + 1 + 2 + 3 + 4 + 5 + 6, intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, + intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, }; #endif #ifdef K8_NOP1 -asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8 - "\t.previous"); -extern const unsigned char k8nops[]; -static const unsigned char *const __initconst_or_module -k8_nops[ASM_NOP_MAX+1] = { +static const unsigned char k8nops[] = +{ + K8_NOP1, + K8_NOP2, + K8_NOP3, + K8_NOP4, + K8_NOP5, + K8_NOP6, + K8_NOP7, + K8_NOP8, + K8_NOP5_ATOMIC +}; +static const unsigned char * const k8_nops[ASM_NOP_MAX+2] = +{ NULL, k8nops, k8nops + 1, @@ -107,17 +128,25 @@ k8_nops[ASM_NOP_MAX+1] = { k8nops + 1 + 2 + 3 + 4 + 5, k8nops + 1 + 2 + 3 + 4 + 5 + 6, k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, + k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, }; #endif #if defined(K7_NOP1) && !defined(CONFIG_X86_64) -asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8 - "\t.previous"); -extern const unsigned char k7nops[]; -static const unsigned char *const __initconst_or_module -k7_nops[ASM_NOP_MAX+1] = { +static const unsigned char k7nops[] = +{ + K7_NOP1, + K7_NOP2, + K7_NOP3, + K7_NOP4, + K7_NOP5, + K7_NOP6, + K7_NOP7, + K7_NOP8, + K7_NOP5_ATOMIC +}; +static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = +{ NULL, k7nops, k7nops + 1, @@ -127,17 +156,25 @@ k7_nops[ASM_NOP_MAX+1] = { k7nops + 1 + 2 + 3 + 4 + 5, k7nops + 1 + 2 + 3 + 4 + 5 + 6, k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, + k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, }; #endif #ifdef P6_NOP1 -asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: " - P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 - P6_NOP7 P6_NOP8 - "\t.previous"); -extern const unsigned char p6nops[]; -static const unsigned char *const __initconst_or_module -p6_nops[ASM_NOP_MAX+1] = { +static const unsigned char __initconst_or_module p6nops[] = +{ + P6_NOP1, + P6_NOP2, + P6_NOP3, + P6_NOP4, + P6_NOP5, + P6_NOP6, + P6_NOP7, + P6_NOP8, + P6_NOP5_ATOMIC +}; +static const unsigned char * const p6_nops[ASM_NOP_MAX+2] = +{ NULL, p6nops, p6nops + 1, @@ -147,47 +184,65 @@ p6_nops[ASM_NOP_MAX+1] = { p6nops + 1 + 2 + 3 + 4 + 5, p6nops + 1 + 2 + 3 + 4 + 5 + 6, p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, + p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, }; #endif +/* Initialize these to a safe default */ #ifdef CONFIG_X86_64 +const unsigned char * const *ideal_nops = p6_nops; +#else +const unsigned char * const *ideal_nops = intel_nops; +#endif -extern char __vsyscall_0; -static const unsigned char *const *__init_or_module find_nop_table(void) +void __init arch_init_ideal_nops(void) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_has(X86_FEATURE_NOPL)) - return p6_nops; - else - return k8_nops; -} - -#else /* CONFIG_X86_64 */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + /* + * Due to a decoder implementation quirk, some + * specific Intel CPUs actually perform better with + * the "k8_nops" than with the SDM-recommended NOPs. + */ + if (boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 0x0f && + boot_cpu_data.x86_model != 0x1c && + boot_cpu_data.x86_model != 0x26 && + boot_cpu_data.x86_model != 0x27 && + boot_cpu_data.x86_model < 0x30) { + ideal_nops = k8_nops; + } else if (boot_cpu_has(X86_FEATURE_NOPL)) { + ideal_nops = p6_nops; + } else { +#ifdef CONFIG_X86_64 + ideal_nops = k8_nops; +#else + ideal_nops = intel_nops; +#endif + } -static const unsigned char *const *__init_or_module find_nop_table(void) -{ - if (boot_cpu_has(X86_FEATURE_K8)) - return k8_nops; - else if (boot_cpu_has(X86_FEATURE_K7)) - return k7_nops; - else if (boot_cpu_has(X86_FEATURE_NOPL)) - return p6_nops; - else - return intel_nops; + default: +#ifdef CONFIG_X86_64 + ideal_nops = k8_nops; +#else + if (boot_cpu_has(X86_FEATURE_K8)) + ideal_nops = k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + ideal_nops = k7_nops; + else + ideal_nops = intel_nops; +#endif + } } -#endif /* CONFIG_X86_64 */ - /* Use this to add nops to a buffer, then text_poke the whole buffer. */ static void __init_or_module add_nops(void *insns, unsigned int len) { - const unsigned char *const *noptable = find_nop_table(); - while (len > 0) { unsigned int noplen = len; if (noplen > ASM_NOP_MAX) noplen = ASM_NOP_MAX; - memcpy(insns, noptable[noplen], noplen); + memcpy(insns, ideal_nops[noplen], noplen); insns += noplen; len -= noplen; } @@ -195,6 +250,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; +extern char __vsyscall_0; void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. @@ -210,6 +266,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start, u8 insnbuf[MAX_PATCH_LEN]; DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite a previous scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ for (a = start; a < end; a++) { u8 *instr = a->instr; BUG_ON(a->replacementlen > a->instrlen); @@ -678,29 +743,3 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) wrote_text = 0; __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); } - -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) - -#ifdef CONFIG_X86_64 -unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; -#else -unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; -#endif - -void __init arch_init_ideal_nop5(void) -{ - /* - * There is no good nop for all x86 archs. This selection - * algorithm should be unified with the one in find_nop_table(), - * but this should be good enough for now. - * - * For cases other than the ones below, use the safe (as in - * always functional) defaults above. - */ -#ifdef CONFIG_X86_64 - /* Don't use these on 32 bits due to broken virtualizers */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - memcpy(ideal_nop5, p6_nops[5], 5); -#endif -} -#endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/amd_gart_64.c index b117efd24f71..b117efd24f71 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 57ca77787220..cd8cbeb5fa34 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -18,6 +18,7 @@ */ #include <linux/pci.h> +#include <linux/pci-ats.h> #include <linux/bitmap.h> #include <linux/slab.h> #include <linux/debugfs.h> @@ -25,6 +26,7 @@ #include <linux/dma-mapping.h> #include <linux/iommu-helper.h> #include <linux/iommu.h> +#include <linux/delay.h> #include <asm/proto.h> #include <asm/iommu.h> #include <asm/gart.h> @@ -34,7 +36,7 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -#define EXIT_LOOP_COUNT 10000000 +#define LOOP_TIMEOUT 100000 static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -57,7 +59,6 @@ struct iommu_cmd { u32 data[4]; }; -static void reset_iommu_command_buffer(struct amd_iommu *iommu); static void update_domain(struct protection_domain *domain); /**************************************************************************** @@ -322,8 +323,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) break; case EVENT_TYPE_ILL_CMD: printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); - iommu->reset_in_progress = true; - reset_iommu_command_buffer(iommu); dump_command(address); break; case EVENT_TYPE_CMD_HARD_ERR: @@ -367,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) spin_unlock_irqrestore(&iommu->lock, flags); } -irqreturn_t amd_iommu_int_handler(int irq, void *data) +irqreturn_t amd_iommu_int_thread(int irq, void *data) { struct amd_iommu *iommu; @@ -377,192 +376,300 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) return IRQ_HANDLED; } +irqreturn_t amd_iommu_int_handler(int irq, void *data) +{ + return IRQ_WAKE_THREAD; +} + /**************************************************************************** * * IOMMU command queuing functions * ****************************************************************************/ -/* - * Writes the command to the IOMMUs command buffer and informs the - * hardware about the new command. Must be called with iommu->lock held. - */ -static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) +static int wait_on_sem(volatile u64 *sem) +{ + int i = 0; + + while (*sem == 0 && i < LOOP_TIMEOUT) { + udelay(1); + i += 1; + } + + if (i == LOOP_TIMEOUT) { + pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); + return -EIO; + } + + return 0; +} + +static void copy_cmd_to_buffer(struct amd_iommu *iommu, + struct iommu_cmd *cmd, + u32 tail) { - u32 tail, head; u8 *target; - WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); - tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); target = iommu->cmd_buf + tail; - memcpy_toio(target, cmd, sizeof(*cmd)); - tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; - head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - if (tail == head) - return -ENOMEM; + tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + + /* Copy command to buffer */ + memcpy(target, cmd, sizeof(*cmd)); + + /* Tell the IOMMU about it */ writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); +} - return 0; +static void build_completion_wait(struct iommu_cmd *cmd, u64 address) +{ + WARN_ON(address & 0x7ULL); + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; + cmd->data[1] = upper_32_bits(__pa(address)); + cmd->data[2] = 1; + CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); +} + +static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); +} + +static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, + size_t size, u16 domid, int pde) +{ + u64 pages; + int s; + + pages = iommu_num_pages(address, size, PAGE_SIZE); + s = 0; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = 1; + } + + address &= PAGE_MASK; + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[1] |= domid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); + if (s) /* size bit - we flush more than one 4kb page */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; +} + +static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, + u64 address, size_t size) +{ + u64 pages; + int s; + + pages = iommu_num_pages(address, size, PAGE_SIZE); + s = 0; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = 1; + } + + address &= PAGE_MASK; + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + cmd->data[0] |= (qdep & 0xff) << 24; + cmd->data[1] = devid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); + if (s) + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; +} + +static void build_inv_all(struct iommu_cmd *cmd) +{ + memset(cmd, 0, sizeof(*cmd)); + CMD_SET_TYPE(cmd, CMD_INV_ALL); } /* - * General queuing function for commands. Takes iommu->lock and calls - * __iommu_queue_command(). + * Writes the command to the IOMMUs command buffer and informs the + * hardware about the new command. */ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) { + u32 left, tail, head, next_tail; unsigned long flags; - int ret; + WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); + +again: spin_lock_irqsave(&iommu->lock, flags); - ret = __iommu_queue_command(iommu, cmd); - if (!ret) - iommu->need_sync = true; - spin_unlock_irqrestore(&iommu->lock, flags); - return ret; -} + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + left = (head - next_tail) % iommu->cmd_buf_size; -/* - * This function waits until an IOMMU has completed a completion - * wait command - */ -static void __iommu_wait_for_completion(struct amd_iommu *iommu) -{ - int ready = 0; - unsigned status = 0; - unsigned long i = 0; + if (left <= 2) { + struct iommu_cmd sync_cmd; + volatile u64 sem = 0; + int ret; + + build_completion_wait(&sync_cmd, (u64)&sem); + copy_cmd_to_buffer(iommu, &sync_cmd, tail); - INC_STATS_COUNTER(compl_wait); + spin_unlock_irqrestore(&iommu->lock, flags); + + if ((ret = wait_on_sem(&sem)) != 0) + return ret; - while (!ready && (i < EXIT_LOOP_COUNT)) { - ++i; - /* wait for the bit to become one */ - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; + goto again; } - /* set bit back to zero */ - status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; - writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); + copy_cmd_to_buffer(iommu, cmd, tail); + + /* We need to sync now to make sure all commands are processed */ + iommu->need_sync = true; + + spin_unlock_irqrestore(&iommu->lock, flags); - if (unlikely(i == EXIT_LOOP_COUNT)) - iommu->reset_in_progress = true; + return 0; } /* * This function queues a completion wait command into the command * buffer of an IOMMU */ -static int __iommu_completion_wait(struct amd_iommu *iommu) +static int iommu_completion_wait(struct amd_iommu *iommu) { struct iommu_cmd cmd; + volatile u64 sem = 0; + int ret; + + if (!iommu->need_sync) + return 0; - memset(&cmd, 0, sizeof(cmd)); - cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; - CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); + build_completion_wait(&cmd, (u64)&sem); - return __iommu_queue_command(iommu, &cmd); + ret = iommu_queue_command(iommu, &cmd); + if (ret) + return ret; + + return wait_on_sem(&sem); } -/* - * This function is called whenever we need to ensure that the IOMMU has - * completed execution of all commands we sent. It sends a - * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs - * us about that by writing a value to a physical address we pass with - * the command. - */ -static int iommu_completion_wait(struct amd_iommu *iommu) +static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) { - int ret = 0; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); + struct iommu_cmd cmd; - if (!iommu->need_sync) - goto out; + build_inv_dte(&cmd, devid); - ret = __iommu_completion_wait(iommu); + return iommu_queue_command(iommu, &cmd); +} - iommu->need_sync = false; +static void iommu_flush_dte_all(struct amd_iommu *iommu) +{ + u32 devid; - if (ret) - goto out; + for (devid = 0; devid <= 0xffff; ++devid) + iommu_flush_dte(iommu, devid); - __iommu_wait_for_completion(iommu); + iommu_completion_wait(iommu); +} -out: - spin_unlock_irqrestore(&iommu->lock, flags); +/* + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume. + */ +static void iommu_flush_tlb_all(struct amd_iommu *iommu) +{ + u32 dom_id; - if (iommu->reset_in_progress) - reset_iommu_command_buffer(iommu); + for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { + struct iommu_cmd cmd; + build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + dom_id, 1); + iommu_queue_command(iommu, &cmd); + } - return 0; + iommu_completion_wait(iommu); } -static void iommu_flush_complete(struct protection_domain *domain) +static void iommu_flush_all(struct amd_iommu *iommu) { - int i; + struct iommu_cmd cmd; - for (i = 0; i < amd_iommus_present; ++i) { - if (!domain->dev_iommu[i]) - continue; + build_inv_all(&cmd); - /* - * Devices of this domain are behind this IOMMU - * We need to wait for completion of all commands. - */ - iommu_completion_wait(amd_iommus[i]); + iommu_queue_command(iommu, &cmd); + iommu_completion_wait(iommu); +} + +void iommu_flush_all_caches(struct amd_iommu *iommu) +{ + if (iommu_feature(iommu, FEATURE_IA)) { + iommu_flush_all(iommu); + } else { + iommu_flush_dte_all(iommu); + iommu_flush_tlb_all(iommu); } } /* - * Command send function for invalidating a device table entry + * Command send function for flushing on-device TLB */ -static int iommu_flush_device(struct device *dev) +static int device_flush_iotlb(struct device *dev, u64 address, size_t size) { + struct pci_dev *pdev = to_pci_dev(dev); struct amd_iommu *iommu; struct iommu_cmd cmd; u16 devid; + int qdep; + qdep = pci_ats_queue_depth(pdev); devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; - /* Build command */ - memset(&cmd, 0, sizeof(cmd)); - CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); - cmd.data[0] = devid; + build_inv_iotlb_pages(&cmd, devid, qdep, address, size); return iommu_queue_command(iommu, &cmd); } -static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, - u16 domid, int pde, int s) -{ - memset(cmd, 0, sizeof(*cmd)); - address &= PAGE_MASK; - CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); - cmd->data[1] |= domid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); - if (s) /* size bit - we flush more than one 4kb page */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; -} - /* - * Generic command send function for invalidaing TLB entries + * Command send function for invalidating a device table entry */ -static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, - u64 address, u16 domid, int pde, int s) +static int device_flush_dte(struct device *dev) { - struct iommu_cmd cmd; + struct amd_iommu *iommu; + struct pci_dev *pdev; + u16 devid; int ret; - __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); + pdev = to_pci_dev(dev); + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; - ret = iommu_queue_command(iommu, &cmd); + ret = iommu_flush_dte(iommu, devid); + if (ret) + return ret; + + if (pci_ats_enabled(pdev)) + ret = device_flush_iotlb(dev, 0, ~0UL); return ret; } @@ -572,23 +679,14 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, * It invalidates a single PTE if the range to flush is within a single * page. Otherwise it flushes the whole TLB of the IOMMU. */ -static void __iommu_flush_pages(struct protection_domain *domain, - u64 address, size_t size, int pde) +static void __domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size, int pde) { - int s = 0, i; - unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); - - address &= PAGE_MASK; - - if (pages > 1) { - /* - * If we have to flush more than one page, flush all - * TLB entries for this domain - */ - address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = 1; - } + struct iommu_dev_data *dev_data; + struct iommu_cmd cmd; + int ret = 0, i; + build_inv_iommu_pages(&cmd, address, size, domain->id, pde); for (i = 0; i < amd_iommus_present; ++i) { if (!domain->dev_iommu[i]) @@ -598,101 +696,70 @@ static void __iommu_flush_pages(struct protection_domain *domain, * Devices of this domain are behind this IOMMU * We need a TLB flush */ - iommu_queue_inv_iommu_pages(amd_iommus[i], address, - domain->id, pde, s); + ret |= iommu_queue_command(amd_iommus[i], &cmd); + } + + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct pci_dev *pdev = to_pci_dev(dev_data->dev); + + if (!pci_ats_enabled(pdev)) + continue; + + ret |= device_flush_iotlb(dev_data->dev, address, size); } - return; + WARN_ON(ret); } -static void iommu_flush_pages(struct protection_domain *domain, - u64 address, size_t size) +static void domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size) { - __iommu_flush_pages(domain, address, size, 0); + __domain_flush_pages(domain, address, size, 0); } /* Flush the whole IO/TLB for a given protection domain */ -static void iommu_flush_tlb(struct protection_domain *domain) +static void domain_flush_tlb(struct protection_domain *domain) { - __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); + __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void iommu_flush_tlb_pde(struct protection_domain *domain) +static void domain_flush_tlb_pde(struct protection_domain *domain) { - __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); -} - - -/* - * This function flushes the DTEs for all devices in domain - */ -static void iommu_flush_domain_devices(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); - - list_for_each_entry(dev_data, &domain->dev_list, list) - iommu_flush_device(dev_data->dev); - - spin_unlock_irqrestore(&domain->lock, flags); + __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } -static void iommu_flush_all_domain_devices(void) +static void domain_flush_complete(struct protection_domain *domain) { - struct protection_domain *domain; - unsigned long flags; + int i; - spin_lock_irqsave(&amd_iommu_pd_lock, flags); + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; - list_for_each_entry(domain, &amd_iommu_pd_list, list) { - iommu_flush_domain_devices(domain); - iommu_flush_complete(domain); + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); } - - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } -void amd_iommu_flush_all_devices(void) -{ - iommu_flush_all_domain_devices(); -} /* - * This function uses heavy locking and may disable irqs for some time. But - * this is no issue because it is only called during resume. + * This function flushes the DTEs for all devices in domain */ -void amd_iommu_flush_all_domains(void) +static void domain_flush_devices(struct protection_domain *domain) { - struct protection_domain *domain; + struct iommu_dev_data *dev_data; unsigned long flags; - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - - list_for_each_entry(domain, &amd_iommu_pd_list, list) { - spin_lock(&domain->lock); - iommu_flush_tlb_pde(domain); - iommu_flush_complete(domain); - spin_unlock(&domain->lock); - } - - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); -} - -static void reset_iommu_command_buffer(struct amd_iommu *iommu) -{ - pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); - - if (iommu->reset_in_progress) - panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); + spin_lock_irqsave(&domain->lock, flags); - amd_iommu_reset_cmd_buffer(iommu); - amd_iommu_flush_all_devices(); - amd_iommu_flush_all_domains(); + list_for_each_entry(dev_data, &domain->dev_list, list) + device_flush_dte(dev_data->dev); - iommu->reset_in_progress = false; + spin_unlock_irqrestore(&domain->lock, flags); } /**************************************************************************** @@ -1410,17 +1477,22 @@ static bool dma_ops_domain(struct protection_domain *domain) return domain->flags & PD_DMA_OPS_MASK; } -static void set_dte_entry(u16 devid, struct protection_domain *domain) +static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) { u64 pte_root = virt_to_phys(domain->pt_root); + u32 flags = 0; pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[2] = domain->id; - amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); - amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); + if (ats) + flags |= DTE_FLAG_IOTLB; + + amd_iommu_dev_table[devid].data[3] |= flags; + amd_iommu_dev_table[devid].data[2] = domain->id; + amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); + amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); } static void clear_dte_entry(u16 devid) @@ -1437,23 +1509,29 @@ static void do_attach(struct device *dev, struct protection_domain *domain) { struct iommu_dev_data *dev_data; struct amd_iommu *iommu; + struct pci_dev *pdev; + bool ats = false; u16 devid; devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; dev_data = get_dev_data(dev); + pdev = to_pci_dev(dev); + + if (amd_iommu_iotlb_sup) + ats = pci_ats_enabled(pdev); /* Update data structures */ dev_data->domain = domain; list_add(&dev_data->list, &domain->dev_list); - set_dte_entry(devid, domain); + set_dte_entry(devid, domain, ats); /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; /* Flush the DTE entry */ - iommu_flush_device(dev); + device_flush_dte(dev); } static void do_detach(struct device *dev) @@ -1476,7 +1554,7 @@ static void do_detach(struct device *dev) clear_dte_entry(devid); /* Flush the DTE entry */ - iommu_flush_device(dev); + device_flush_dte(dev); } /* @@ -1539,9 +1617,13 @@ out_unlock: static int attach_device(struct device *dev, struct protection_domain *domain) { + struct pci_dev *pdev = to_pci_dev(dev); unsigned long flags; int ret; + if (amd_iommu_iotlb_sup) + pci_enable_ats(pdev, PAGE_SHIFT); + write_lock_irqsave(&amd_iommu_devtable_lock, flags); ret = __attach_device(dev, domain); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); @@ -1551,7 +1633,7 @@ static int attach_device(struct device *dev, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - iommu_flush_tlb_pde(domain); + domain_flush_tlb_pde(domain); return ret; } @@ -1598,12 +1680,16 @@ static void __detach_device(struct device *dev) */ static void detach_device(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); unsigned long flags; /* lock device table */ write_lock_irqsave(&amd_iommu_devtable_lock, flags); __detach_device(dev); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev)) + pci_disable_ats(pdev); } /* @@ -1615,10 +1701,9 @@ static struct protection_domain *domain_for_device(struct device *dev) struct protection_domain *dom; struct iommu_dev_data *dev_data, *alias_data; unsigned long flags; - u16 devid, alias; + u16 devid; devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; dev_data = get_dev_data(dev); alias_data = get_dev_data(dev_data->alias); if (!alias_data) @@ -1692,7 +1777,7 @@ static int device_change_notifier(struct notifier_block *nb, goto out; } - iommu_flush_device(dev); + device_flush_dte(dev); iommu_completion_wait(iommu); out: @@ -1753,8 +1838,9 @@ static void update_device_table(struct protection_domain *domain) struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { + struct pci_dev *pdev = to_pci_dev(dev_data->dev); u16 devid = get_device_id(dev_data->dev); - set_dte_entry(devid, domain); + set_dte_entry(devid, domain, pci_ats_enabled(pdev)); } } @@ -1764,8 +1850,9 @@ static void update_domain(struct protection_domain *domain) return; update_device_table(domain); - iommu_flush_domain_devices(domain); - iommu_flush_tlb_pde(domain); + + domain_flush_devices(domain); + domain_flush_tlb_pde(domain); domain->updated = false; } @@ -1924,10 +2011,10 @@ retry: ADD_STATS_COUNTER(alloced_io_mem, size); if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { - iommu_flush_tlb(&dma_dom->domain); + domain_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; } else if (unlikely(amd_iommu_np_cache)) - iommu_flush_pages(&dma_dom->domain, address, size); + domain_flush_pages(&dma_dom->domain, address, size); out: return address; @@ -1976,7 +2063,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(&dma_dom->domain, flush_addr, size); + domain_flush_pages(&dma_dom->domain, flush_addr, size); dma_dom->need_flush = false; } } @@ -2012,7 +2099,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, if (addr == DMA_ERROR_CODE) goto out; - iommu_flush_complete(domain); + domain_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -2039,7 +2126,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, __unmap_single(domain->priv, dma_addr, size, dir); - iommu_flush_complete(domain); + domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2104,7 +2191,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - iommu_flush_complete(domain); + domain_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -2150,7 +2237,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - iommu_flush_complete(domain); + domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2200,7 +2287,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out_free; } - iommu_flush_complete(domain); + domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -2232,7 +2319,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_flush_complete(domain); + domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -2476,7 +2563,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, if (!iommu) return; - iommu_flush_device(dev); + device_flush_dte(dev); iommu_completion_wait(iommu); } @@ -2542,7 +2629,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, unmap_size = iommu_unmap_page(domain, iova, page_size); mutex_unlock(&domain->api_lock); - iommu_flush_tlb_pde(domain); + domain_flush_tlb_pde(domain); return get_order(unmap_size); } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 246d727b65b7..9179c21120a8 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -137,6 +137,7 @@ int amd_iommus_present; /* IOMMUs have a non-present cache? */ bool amd_iommu_np_cache __read_mostly; +bool amd_iommu_iotlb_sup __read_mostly = true; /* * The ACPI table parsing functions set this variable on an error @@ -180,6 +181,12 @@ static u32 dev_table_size; /* size of the device table */ static u32 alias_table_size; /* size of the alias table */ static u32 rlookup_table_size; /* size if the rlookup table */ +/* + * This function flushes all internal caches of + * the IOMMU used by this driver. + */ +extern void iommu_flush_all_caches(struct amd_iommu *iommu); + static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -293,9 +300,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) /* Function to enable the hardware */ static void iommu_enable(struct amd_iommu *iommu) { - printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", + static const char * const feat_str[] = { + "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", + "IA", "GA", "HE", "PC", NULL + }; + int i; + + printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx", dev_name(&iommu->dev->dev), iommu->cap_ptr); + if (iommu->cap & (1 << IOMMU_CAP_EFR)) { + printk(KERN_CONT " extended features: "); + for (i = 0; feat_str[i]; ++i) + if (iommu_feature(iommu, (1ULL << i))) + printk(KERN_CONT " %s", feat_str[i]); + } + printk(KERN_CONT "\n"); + iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } @@ -651,7 +672,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) static void __init init_iommu_from_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; - u32 range, misc; + u32 range, misc, low, high; int i, j; pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, @@ -667,6 +688,15 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) MMIO_GET_LD(range)); iommu->evt_msi_num = MMIO_MSI_NUM(misc); + if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) + amd_iommu_iotlb_sup = false; + + /* read extended feature bits */ + low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); + high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); + + iommu->features = ((u64)high << 32) | low; + if (!is_rd890_iommu(iommu->dev)) return; @@ -1004,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu) if (pci_enable_msi(iommu->dev)) return 1; - r = request_irq(iommu->dev->irq, amd_iommu_int_handler, - IRQF_SAMPLE_RANDOM, - "AMD-Vi", - NULL); + r = request_threaded_irq(iommu->dev->irq, + amd_iommu_int_handler, + amd_iommu_int_thread, + 0, "AMD-Vi", + iommu->dev); if (r) { pci_disable_msi(iommu->dev); @@ -1244,6 +1275,7 @@ static void enable_iommus(void) iommu_set_exclusion_range(iommu); iommu_init_msi(iommu); iommu_enable(iommu); + iommu_flush_all_caches(iommu); } } @@ -1274,8 +1306,8 @@ static void amd_iommu_resume(void) * we have to flush after the IOMMUs are enabled because a * disabled IOMMU will never execute the commands we send */ - amd_iommu_flush_all_devices(); - amd_iommu_flush_all_domains(); + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); } static int amd_iommu_suspend(void) diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index cd1ffed4ee22..289e92862fd9 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -177,7 +177,6 @@ static struct clocksource clocksource_apbt = { .rating = APBT_CLOCKSOURCE_RATING, .read = apbt_read_clocksource, .mask = APBT_MASK, - .shift = APBT_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .resume = apbt_restart_clocksource, }; @@ -543,14 +542,7 @@ static int apbt_clocksource_register(void) if (t1 == apbt_read_clocksource(&clocksource_apbt)) panic("APBT counter not counting. APBT disabled\n"); - /* - * initialize and register APBT clocksource - * convert that to ns/clock cycle - * mult = (ns/c) * 2^APBT_SHIFT - */ - clocksource_apbt.mult = div_sc(MSEC_PER_SEC, - (unsigned long) apbt_freq, APBT_SHIFT); - clocksource_register(&clocksource_apbt); + clocksource_register_khz(&clocksource_apbt, (u32)apbt_freq*1000); return 0; } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 73fb469908c6..3d2661ca6542 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -30,6 +30,22 @@ #include <asm/amd_nb.h> #include <asm/x86_init.h> +/* + * Using 512M as goal, in case kexec will load kernel_big + * that will do the on-position decompress, and could overlap with + * with the gart aperture that is used. + * Sequence: + * kernel_small + * ==> kexec (with kdump trigger path or gart still enabled) + * ==> kernel_small (gart area become e820_reserved) + * ==> kexec (with kdump trigger path or gart still enabled) + * ==> kerne_big (uncompressed size will be big than 64M or 128M) + * So don't use 512M below as gart iommu, leave the space for kernel + * code for safe. + */ +#define GART_MIN_ADDR (512ULL << 20) +#define GART_MAX_ADDR (1ULL << 32) + int gart_iommu_aperture; int gart_iommu_aperture_disabled __initdata; int gart_iommu_aperture_allowed __initdata; @@ -70,21 +86,9 @@ static u32 __init allocate_aperture(void) * memory. Unfortunately we cannot move it up because that would * make the IOMMU useless. */ - /* - * using 512M as goal, in case kexec will load kernel_big - * that will do the on position decompress, and could overlap with - * that position with gart that is used. - * sequende: - * kernel_small - * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) - * ==> kernel_small(gart area become e820_reserved) - * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) - * ==> kerne_big (uncompressed size will be big than 64M or 128M) - * so don't use 512M below as gart iommu, leave the space for kernel - * code for safe - */ - addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20); - if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) { + addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, + aper_size, aper_size); + if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { printk(KERN_ERR "Cannot allocate aperture memory hole (%lx,%uK)\n", addr, aper_size>>10); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fabf01eff771..f92a8e5d1e21 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); - if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) { + if (this_cpu_has(X86_FEATURE_ARAT)) { lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; /* Make LAPIC timer preferrable over percpu HPET */ lapic_clockevent.rating = 150; @@ -1237,6 +1237,17 @@ void __cpuinit setup_local_APIC(void) /* always use the value from LDR */ early_per_cpu(x86_cpu_to_logical_apicid, cpu) = logical_smp_processor_id(); + + /* + * Some NUMA implementations (NUMAQ) don't initialize apicid to + * node mapping during NUMA init. Now that logical apicid is + * guaranteed to be known, give it another chance. This is already + * a bit too late - percpu allocation has already happened without + * proper NUMA affinity. + */ + if (apic->x86_32_numa_cpu_node) + set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu), + apic->x86_32_numa_cpu_node(cpu)); #endif /* @@ -1812,30 +1823,41 @@ void smp_spurious_interrupt(struct pt_regs *regs) */ void smp_error_interrupt(struct pt_regs *regs) { - u32 v, v1; + u32 v0, v1; + u32 i = 0; + static const char * const error_interrupt_reason[] = { + "Send CS error", /* APIC Error Bit 0 */ + "Receive CS error", /* APIC Error Bit 1 */ + "Send accept error", /* APIC Error Bit 2 */ + "Receive accept error", /* APIC Error Bit 3 */ + "Redirectable IPI", /* APIC Error Bit 4 */ + "Send illegal vector", /* APIC Error Bit 5 */ + "Received illegal vector", /* APIC Error Bit 6 */ + "Illegal register address", /* APIC Error Bit 7 */ + }; exit_idle(); irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); + v0 = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); v1 = apic_read(APIC_ESR); ack_APIC_irq(); atomic_inc(&irq_err_count); - /* - * Here is what the APIC error bits mean: - * 0: Send CS error - * 1: Receive CS error - * 2: Send accept error - * 3: Receive accept error - * 4: Reserved - * 5: Send illegal vector - * 6: Received illegal vector - * 7: Illegal register address - */ - pr_debug("APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); + apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)", + smp_processor_id(), v0 , v1); + + v1 = v1 & 0xff; + while (v1) { + if (v1 & 0x1) + apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); + i++; + v1 >>= 1; + }; + + apic_printk(APIC_DEBUG, KERN_CONT "\n"); + irq_exit(); } @@ -2003,21 +2025,6 @@ void default_init_apic_ldr(void) apic_write(APIC_LDR, val); } -#ifdef CONFIG_X86_32 -int default_x86_32_numa_cpu_node(int cpu) -{ -#ifdef CONFIG_NUMA - int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); - - if (apicid != BAD_APICID) - return __apicid_to_node[apicid]; - return NUMA_NO_NODE; -#else - return 0; -#endif -} -#endif - /* * Power management */ diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index f1baa2dc087a..775b82bc655c 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v) WARN_ON_ONCE(cpu_has_apic && !disable_apic); } -#ifdef CONFIG_X86_32 -static int noop_x86_32_numa_cpu_node(int cpu) -{ - /* we're always on node 0 */ - return 0; -} -#endif - struct apic apic_noop = { .name = "noop", .probe = noop_probe, @@ -195,6 +187,5 @@ struct apic apic_noop = { #ifdef CONFIG_X86_32 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, - .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node, #endif }; diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 541a2e431659..d84ac5a584b5 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -253,5 +253,4 @@ struct apic apic_bigsmp = { .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, - .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, }; diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 3e9de4854c5b..70533de5bd29 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void) nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); } -static int es7000_numa_cpu_node(int cpu) -{ - return 0; -} - static int es7000_cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) @@ -688,7 +683,6 @@ struct apic __refdata apic_es7000_cluster = { .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, .x86_32_early_logical_apicid = es7000_early_logical_apicid, - .x86_32_numa_cpu_node = es7000_numa_cpu_node, }; struct apic __refdata apic_es7000 = { @@ -752,5 +746,4 @@ struct apic __refdata apic_es7000 = { .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, .x86_32_early_logical_apicid = es7000_early_logical_apicid, - .x86_32_numa_cpu_node = es7000_numa_cpu_node, }; diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 6273eee5134b..30f13319e24b 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -48,8 +48,6 @@ #include <asm/e820.h> #include <asm/ipi.h> -#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) - int found_numaq; /* @@ -79,31 +77,20 @@ int quad_local_to_mp_bus_id[NR_CPUS/4][4]; static inline void numaq_register_node(int node, struct sys_cfg_data *scd) { struct eachquadmem *eq = scd->eq + node; + u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20; + u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20; + int ret; - node_set_online(node); - - /* Convert to pages */ - node_start_pfn[node] = - MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size); - - node_end_pfn[node] = - MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); - - memblock_x86_register_active_regions(node, node_start_pfn[node], - node_end_pfn[node]); - - memory_present(node, node_start_pfn[node], node_end_pfn[node]); - - node_remap_size[node] = node_memmap_size_bytes(node, - node_start_pfn[node], - node_end_pfn[node]); + node_set(node, numa_nodes_parsed); + ret = numa_add_memblk(node, start, end); + BUG_ON(ret < 0); } /* * Function: smp_dump_qct() * * Description: gets memory layout from the quad config table. This - * function also updates node_online_map with the nodes (quads) present. + * function also updates numa_nodes_parsed with the nodes (quads) present. */ static void __init smp_dump_qct(void) { @@ -112,7 +99,6 @@ static void __init smp_dump_qct(void) scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); - nodes_clear(node_online_map); for_each_node(node) { if (scd->quads_present31_0 & (1 << node)) numaq_register_node(node, scd); @@ -282,14 +268,14 @@ static __init void early_check_numaq(void) } } -int __init get_memcfg_numaq(void) +int __init numaq_numa_init(void) { early_check_numaq(); if (!found_numaq) - return 0; + return -ENOENT; smp_dump_qct(); - return 1; + return 0; } #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index fc84c7b61108..6541e471fd91 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -172,7 +172,6 @@ struct apic apic_default = { .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, - .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, }; extern struct apic apic_numaq; diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index e4b8059b414a..35bcd7d995a1 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -551,5 +551,4 @@ struct apic apic_summit = { .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, .x86_32_early_logical_apicid = summit_early_logical_apicid, - .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, }; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0fc095..7acd2d2ac965 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -37,6 +37,13 @@ #include <asm/smp.h> #include <asm/x86_init.h> #include <asm/emergency-restart.h> +#include <asm/nmi.h> + +/* BMC sets a bit this MMR non-zero before sending an NMI */ +#define UVH_NMI_MMR UVH_SCRATCH5 +#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) +#define UV_NMI_PENDING_MASK (1UL << 63) +DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); DEFINE_PER_CPU(int, x2apic_extra_bits); @@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { + unsigned long real_uv_nmi; + int bid; + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) /* do nothing if entering the crash kernel */ return NOTIFY_OK; + /* - * Use a lock so only one cpu prints at a time - * to prevent intermixed output. + * Each blade has an MMR that indicates when an NMI has been sent + * to cpus on the blade. If an NMI is detected, atomically + * clear the MMR and update a per-blade NMI count used to + * cause each cpu on the blade to notice a new NMI. + */ + bid = uv_numa_blade_id(); + real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); + + if (unlikely(real_uv_nmi)) { + spin_lock(&uv_blade_info[bid].nmi_lock); + real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); + if (real_uv_nmi) { + uv_blade_info[bid].nmi_count++; + uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); + } + spin_unlock(&uv_blade_info[bid].nmi_lock); + } + + if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) + return NOTIFY_DONE; + + __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; + + /* + * Use a lock so only one cpu prints at a time. + * This prevents intermixed output. */ spin_lock(&uv_nmi_lock); - pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); + pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); dump_stack(); spin_unlock(&uv_nmi_lock); @@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) } static struct notifier_block uv_dump_stack_nmi_nb = { - .notifier_call = uv_handle_nmi + .notifier_call = uv_handle_nmi, + .priority = NMI_LOCAL_LOW_PRIOR - 1, }; void uv_register_nmi_notifier(void) @@ -720,8 +756,9 @@ void __init uv_system_init(void) printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = kmalloc(bytes, GFP_KERNEL); + uv_blade_info = kzalloc(bytes, GFP_KERNEL); BUG_ON(!uv_blade_info); + for (blade = 0; blade < uv_num_possible_blades(); blade++) uv_blade_info[blade].memory_nid = -1; @@ -747,6 +784,7 @@ void __init uv_system_init(void) uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + spin_lock_init(&uv_blade_info[blade].nmi_lock); max_pnode = max(pnode, max_pnode); blade++; } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index adee12e0da1f..3bfa02235965 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1238,7 +1238,6 @@ static int suspend(int vetoable) dpm_suspend_noirq(PMSG_SUSPEND); local_irq_disable(); - sysdev_suspend(PMSG_SUSPEND); syscore_suspend(); local_irq_enable(); @@ -1258,7 +1257,6 @@ static int suspend(int vetoable) err = (err == APM_SUCCESS) ? 0 : -EIO; syscore_resume(); - sysdev_resume(); local_irq_enable(); dpm_resume_noirq(PMSG_RESUME); @@ -1282,7 +1280,6 @@ static void standby(void) dpm_suspend_noirq(PMSG_SUSPEND); local_irq_disable(); - sysdev_suspend(PMSG_SUSPEND); syscore_suspend(); local_irq_enable(); @@ -1292,7 +1289,6 @@ static void standby(void) local_irq_disable(); syscore_resume(); - sysdev_resume(); local_irq_enable(); dpm_resume_noirq(PMSG_RESUME); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3f0ebe429a01..6042981d0309 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bb9eb29a52dd..8f5cabb3c5b0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #endif /* As a rule processors have APIC timer running in deep C states */ - if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) + if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) set_cpu_cap(c, X86_FEATURE_ARAT); /* @@ -629,10 +629,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 */ u64 mask; + int err; - rdmsrl(MSR_AMD64_MCx_MASK(4), mask); - mask |= (1 << 10); - wrmsrl(MSR_AMD64_MCx_MASK(4), mask); + err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); + if (err == 0) { + mask |= (1 << 10); + checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); + } } } @@ -698,7 +701,7 @@ cpu_dev_register(amd_cpu_dev); */ const int amd_erratum_400[] = - AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), + AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); EXPORT_SYMBOL_GPL(amd_erratum_400); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2ced0074a45..c8b41623377f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -254,6 +254,25 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } #endif +static int disable_smep __cpuinitdata; +static __init int setup_disable_smep(char *arg) +{ + disable_smep = 1; + return 1; +} +__setup("nosmep", setup_disable_smep); + +static __cpuinit void setup_smep(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_SMEP)) { + if (unlikely(disable_smep)) { + setup_clear_cpu_cap(X86_FEATURE_SMEP); + clear_in_cr4(X86_CR4_SMEP); + } else + set_in_cr4(X86_CR4_SMEP); + } +} + /* * Some CPU features depend on higher CPUID levels, which may not always * be available due to CPUID level capping or broken virtualization @@ -565,8 +584,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - if (eax > 0) - c->x86_capability[9] = ebx; + c->x86_capability[9] = ebx; } /* AMD-defined flags: level 0x80000001 */ @@ -668,6 +686,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) c->cpu_index = 0; #endif filter_cpuid_features(c, false); + + setup_smep(c); } void __init early_cpu_init(void) @@ -753,6 +773,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) #endif } + setup_smep(c); + get_model_name(c); /* Default name */ detect_nopl(c); diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig deleted file mode 100644 index 870e6cc6ad28..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ /dev/null @@ -1,266 +0,0 @@ -# -# CPU Frequency scaling -# - -menu "CPU Frequency scaling" - -source "drivers/cpufreq/Kconfig" - -if CPU_FREQ - -comment "CPUFreq processor drivers" - -config X86_PCC_CPUFREQ - tristate "Processor Clocking Control interface driver" - depends on ACPI && ACPI_PROCESSOR - help - This driver adds support for the PCC interface. - - For details, take a look at: - <file:Documentation/cpu-freq/pcc-cpufreq.txt>. - - To compile this driver as a module, choose M here: the - module will be called pcc-cpufreq. - - If in doubt, say N. - -config X86_ACPI_CPUFREQ - tristate "ACPI Processor P-States driver" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This driver adds a CPUFreq driver which utilizes the ACPI - Processor Performance States. - This driver also supports Intel Enhanced Speedstep. - - To compile this driver as a module, choose M here: the - module will be called acpi-cpufreq. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config ELAN_CPUFREQ - tristate "AMD Elan SC400 and SC410" - select CPU_FREQ_TABLE - depends on X86_ELAN - ---help--- - This adds the CPUFreq driver for AMD Elan SC400 and SC410 - processors. - - You need to specify the processor maximum speed as boot - parameter: elanfreq=maxspeed (in kHz) or as module - parameter "max_freq". - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config SC520_CPUFREQ - tristate "AMD Elan SC520" - select CPU_FREQ_TABLE - depends on X86_ELAN - ---help--- - This adds the CPUFreq driver for AMD Elan SC520 processor. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - - -config X86_POWERNOW_K6 - tristate "AMD Mobile K6-2/K6-3 PowerNow!" - select CPU_FREQ_TABLE - depends on X86_32 - help - This adds the CPUFreq driver for mobile AMD K6-2+ and mobile - AMD K6-3+ processors. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_POWERNOW_K7 - tristate "AMD Mobile Athlon/Duron PowerNow!" - select CPU_FREQ_TABLE - depends on X86_32 - help - This adds the CPUFreq driver for mobile AMD K7 mobile processors. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_POWERNOW_K7_ACPI - bool - depends on X86_POWERNOW_K7 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) - depends on X86_32 - default y - -config X86_POWERNOW_K8 - tristate "AMD Opteron/Athlon64 PowerNow!" - select CPU_FREQ_TABLE - depends on ACPI && ACPI_PROCESSOR - help - This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors. - - To compile this driver as a module, choose M here: the - module will be called powernow-k8. - - For details, take a look at <file:Documentation/cpu-freq/>. - -config X86_GX_SUSPMOD - tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" - depends on X86_32 && PCI - help - This add the CPUFreq driver for NatSemi Geode processors which - support suspend modulation. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep (deprecated)" - select CPU_FREQ_TABLE - select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32 - depends on X86_32 || (X86_64 && ACPI_PROCESSOR) - help - This is deprecated and this functionality is now merged into - acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of - speedstep_centrino. - This adds the CPUFreq driver for Enhanced SpeedStep enabled - mobile CPUs. This means Intel Pentium M (Centrino) CPUs - or 64bit enabled Intel Xeons. - - To compile this driver as a module, choose M here: the - module will be called speedstep-centrino. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_SPEEDSTEP_CENTRINO_TABLE - bool "Built-in tables for Banias CPUs" - depends on X86_32 && X86_SPEEDSTEP_CENTRINO - default y - help - Use built-in tables for Banias CPUs if ACPI encoding - is not available. - - If in doubt, say N. - -config X86_SPEEDSTEP_ICH - tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" - select CPU_FREQ_TABLE - depends on X86_32 - help - This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all - mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, - ICH3 or ICH4 southbridge. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_SPEEDSTEP_SMI - tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" - select CPU_FREQ_TABLE - depends on X86_32 && EXPERIMENTAL - help - This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) - on systems which have an Intel 440BX/ZX/MX southbridge. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_P4_CLOCKMOD - tristate "Intel Pentium 4 clock modulation" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for Intel Pentium 4 / XEON - processors. When enabled it will lower CPU temperature by skipping - clocks. - - This driver should be only used in exceptional - circumstances when very low power is needed because it causes severe - slowdowns and noticeable latencies. Normally Speedstep should be used - instead. - - To compile this driver as a module, choose M here: the - module will be called p4-clockmod. - - For details, take a look at <file:Documentation/cpu-freq/>. - - Unless you are absolutely sure say N. - -config X86_CPUFREQ_NFORCE2 - tristate "nVidia nForce2 FSB changing" - depends on X86_32 && EXPERIMENTAL - help - This adds the CPUFreq driver for FSB changing on nVidia nForce2 - platforms. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_LONGRUN - tristate "Transmeta LongRun" - depends on X86_32 - help - This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors - which support LongRun. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_LONGHAUL - tristate "VIA Cyrix III Longhaul" - select CPU_FREQ_TABLE - depends on X86_32 && ACPI_PROCESSOR - help - This adds the CPUFreq driver for VIA Samuel/CyrixIII, - VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T - processors. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)" - select CPU_FREQ_TABLE - depends on X86_32 && EXPERIMENTAL - help - This adds the CPUFreq driver for VIA C7 processors. However, this driver - does not have any safeguards to prevent operating the CPU out of spec - and is thus considered dangerous. Please use the regular ACPI cpufreq - driver, enabled by CONFIG_X86_ACPI_CPUFREQ. - - If in doubt, say N. - -comment "shared options" - -config X86_SPEEDSTEP_LIB - tristate - default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) - -config X86_SPEEDSTEP_RELAXED_CAP_CHECK - bool "Relaxed speedstep capability checks" - depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) - help - Don't perform all checks for a speedstep capable system which would - normally be done. Some ancient or strange systems, though speedstep - capable, don't always indicate that they are speedstep capable. This - option lets the probing code bypass some of those checks if the - parameter "relaxed_check=1" is passed to the module. - -endif # CPU_FREQ - -endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile deleted file mode 100644 index bd54bf67e6fb..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# Link order matters. K8 is preferred to ACPI because of firmware bugs in early -# K8 systems. ACPI is preferred to all other hardware-specific drivers. -# speedstep-* is preferred over p4-clockmod. - -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o -obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o -obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o -obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o -obj-$(CONFIG_X86_LONGHAUL) += longhaul.o -obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o -obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o -obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o -obj-$(CONFIG_X86_LONGRUN) += longrun.o -obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o -obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o -obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o -obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o -obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o -obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o -obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c deleted file mode 100644 index a2baafb2fe6d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ /dev/null @@ -1,776 +0,0 @@ -/* - * acpi-cpufreq.c - ACPI Processor P-States Driver - * - * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> - * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> - * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> - * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/sched.h> -#include <linux/cpufreq.h> -#include <linux/compiler.h> -#include <linux/dmi.h> -#include <linux/slab.h> - -#include <linux/acpi.h> -#include <linux/io.h> -#include <linux/delay.h> -#include <linux/uaccess.h> - -#include <acpi/processor.h> - -#include <asm/msr.h> -#include <asm/processor.h> -#include <asm/cpufeature.h> -#include "mperf.h" - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "acpi-cpufreq", msg) - -MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); -MODULE_DESCRIPTION("ACPI Processor P-States Driver"); -MODULE_LICENSE("GPL"); - -enum { - UNDEFINED_CAPABLE = 0, - SYSTEM_INTEL_MSR_CAPABLE, - SYSTEM_IO_CAPABLE, -}; - -#define INTEL_MSR_RANGE (0xffff) - -struct acpi_cpufreq_data { - struct acpi_processor_performance *acpi_data; - struct cpufreq_frequency_table *freq_table; - unsigned int resume; - unsigned int cpu_feature; -}; - -static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); - -/* acpi_perf_data is a pointer to percpu data. */ -static struct acpi_processor_performance __percpu *acpi_perf_data; - -static struct cpufreq_driver acpi_cpufreq_driver; - -static unsigned int acpi_pstate_strict; - -static int check_est_cpu(unsigned int cpuid) -{ - struct cpuinfo_x86 *cpu = &cpu_data(cpuid); - - return cpu_has(cpu, X86_FEATURE_EST); -} - -static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) -{ - struct acpi_processor_performance *perf; - int i; - - perf = data->acpi_data; - - for (i = 0; i < perf->state_count; i++) { - if (value == perf->states[i].status) - return data->freq_table[i].frequency; - } - return 0; -} - -static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) -{ - int i; - struct acpi_processor_performance *perf; - - msr &= INTEL_MSR_RANGE; - perf = data->acpi_data; - - for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { - if (msr == perf->states[data->freq_table[i].index].status) - return data->freq_table[i].frequency; - } - return data->freq_table[0].frequency; -} - -static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) -{ - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - return extract_msr(val, data); - case SYSTEM_IO_CAPABLE: - return extract_io(val, data); - default: - return 0; - } -} - -struct msr_addr { - u32 reg; -}; - -struct io_addr { - u16 port; - u8 bit_width; -}; - -struct drv_cmd { - unsigned int type; - const struct cpumask *mask; - union { - struct msr_addr msr; - struct io_addr io; - } addr; - u32 val; -}; - -/* Called via smp_call_function_single(), on the target CPU */ -static void do_drv_read(void *_cmd) -{ - struct drv_cmd *cmd = _cmd; - u32 h; - - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, cmd->val, h); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_read_port((acpi_io_address)cmd->addr.io.port, - &cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } -} - -/* Called via smp_call_function_many(), on the target CPUs */ -static void do_drv_write(void *_cmd) -{ - struct drv_cmd *cmd = _cmd; - u32 lo, hi; - - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, lo, hi); - lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); - wrmsr(cmd->addr.msr.reg, lo, hi); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_write_port((acpi_io_address)cmd->addr.io.port, - cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } -} - -static void drv_read(struct drv_cmd *cmd) -{ - int err; - cmd->val = 0; - - err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); - WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ -} - -static void drv_write(struct drv_cmd *cmd) -{ - int this_cpu; - - this_cpu = get_cpu(); - if (cpumask_test_cpu(this_cpu, cmd->mask)) - do_drv_write(cmd); - smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); - put_cpu(); -} - -static u32 get_cur_val(const struct cpumask *mask) -{ - struct acpi_processor_performance *perf; - struct drv_cmd cmd; - - if (unlikely(cpumask_empty(mask))) - return 0; - - switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - break; - default: - return 0; - } - - cmd.mask = mask; - drv_read(&cmd); - - dprintk("get_cur_val = %u\n", cmd.val); - - return cmd.val; -} - -static unsigned int get_cur_freq_on_cpu(unsigned int cpu) -{ - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); - unsigned int freq; - unsigned int cached_freq; - - dprintk("get_cur_freq_on_cpu (%d)\n", cpu); - - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { - return 0; - } - - cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); - if (freq != cached_freq) { - /* - * The dreaded BIOS frequency change behind our back. - * Force set the frequency on next target call. - */ - data->resume = 1; - } - - dprintk("cur freq = %u\n", freq); - - return freq; -} - -static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, - struct acpi_cpufreq_data *data) -{ - unsigned int cur_freq; - unsigned int i; - - for (i = 0; i < 100; i++) { - cur_freq = extract_freq(get_cur_val(mask), data); - if (cur_freq == freq) - return 1; - udelay(10); - } - return 0; -} - -static int acpi_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); - struct acpi_processor_performance *perf; - struct cpufreq_freqs freqs; - struct drv_cmd cmd; - unsigned int next_state = 0; /* Index into freq_table */ - unsigned int next_perf_state = 0; /* Index into perf table */ - unsigned int i; - int result = 0; - - dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); - - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { - return -ENODEV; - } - - perf = data->acpi_data; - result = cpufreq_frequency_table_target(policy, - data->freq_table, - target_freq, - relation, &next_state); - if (unlikely(result)) { - result = -ENODEV; - goto out; - } - - next_perf_state = data->freq_table[next_state].index; - if (perf->state == next_perf_state) { - if (unlikely(data->resume)) { - dprintk("Called after resume, resetting to P%d\n", - next_perf_state); - data->resume = 0; - } else { - dprintk("Already at target state (P%d)\n", - next_perf_state); - goto out; - } - } - - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - default: - result = -ENODEV; - goto out; - } - - /* cpufreq holds the hotplug lock, so we are safe from here on */ - if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cmd.mask = policy->cpus; - else - cmd.mask = cpumask_of(policy->cpu); - - freqs.old = perf->states[perf->state].core_frequency * 1000; - freqs.new = data->freq_table[next_state].frequency; - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - drv_write(&cmd); - - if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, freqs.new, data)) { - dprintk("acpi_cpufreq_target failed (%d)\n", - policy->cpu); - result = -EAGAIN; - goto out; - } - } - - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - perf->state = next_perf_state; - -out: - return result; -} - -static int acpi_cpufreq_verify(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); - - dprintk("acpi_cpufreq_verify\n"); - - return cpufreq_frequency_table_verify(policy, data->freq_table); -} - -static unsigned long -acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) -{ - struct acpi_processor_performance *perf = data->acpi_data; - - if (cpu_khz) { - /* search the closest match to cpu_khz */ - unsigned int i; - unsigned long freq; - unsigned long freqn = perf->states[0].core_frequency * 1000; - - for (i = 0; i < (perf->state_count-1); i++) { - freq = freqn; - freqn = perf->states[i+1].core_frequency * 1000; - if ((2 * cpu_khz) > (freqn + freq)) { - perf->state = i; - return freq; - } - } - perf->state = perf->state_count-1; - return freqn; - } else { - /* assume CPU is at P0... */ - perf->state = 0; - return perf->states[0].core_frequency * 1000; - } -} - -static void free_acpi_perf_data(void) -{ - unsigned int i; - - /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ - for_each_possible_cpu(i) - free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) - ->shared_cpu_map); - free_percpu(acpi_perf_data); -} - -/* - * acpi_cpufreq_early_init - initialize ACPI P-States library - * - * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) - * in order to determine correct frequency and voltage pairings. We can - * do _PDC and _PSD and find out the processor dependency for the - * actual init that will happen later... - */ -static int __init acpi_cpufreq_early_init(void) -{ - unsigned int i; - dprintk("acpi_cpufreq_early_init\n"); - - acpi_perf_data = alloc_percpu(struct acpi_processor_performance); - if (!acpi_perf_data) { - dprintk("Memory allocation error for acpi_perf_data.\n"); - return -ENOMEM; - } - for_each_possible_cpu(i) { - if (!zalloc_cpumask_var_node( - &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, - GFP_KERNEL, cpu_to_node(i))) { - - /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ - free_acpi_perf_data(); - return -ENOMEM; - } - } - - /* Do initialization in ACPI core */ - acpi_processor_preregister_performance(acpi_perf_data); - return 0; -} - -#ifdef CONFIG_SMP -/* - * Some BIOSes do SW_ANY coordination internally, either set it up in hw - * or do it in BIOS firmware and won't inform about it to OS. If not - * detected, this has a side effect of making CPU run at a different speed - * than OS intended it to run at. Detect it and handle it cleanly. - */ -static int bios_with_sw_any_bug; - -static int sw_any_bug_found(const struct dmi_system_id *d) -{ - bios_with_sw_any_bug = 1; - return 0; -} - -static const struct dmi_system_id sw_any_bug_dmi_table[] = { - { - .callback = sw_any_bug_found, - .ident = "Supermicro Server X6DLP", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), - DMI_MATCH(DMI_BIOS_VERSION, "080010"), - DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), - }, - }, - { } -}; - -static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) -{ - /* Intel Xeon Processor 7100 Series Specification Update - * http://www.intel.com/Assets/PDF/specupdate/314554.pdf - * AL30: A Machine Check Exception (MCE) Occurring during an - * Enhanced Intel SpeedStep Technology Ratio Change May Cause - * Both Processor Cores to Lock Up. */ - if (c->x86_vendor == X86_VENDOR_INTEL) { - if ((c->x86 == 15) && - (c->x86_model == 6) && - (c->x86_mask == 8)) { - printk(KERN_INFO "acpi-cpufreq: Intel(R) " - "Xeon(R) 7100 Errata AL30, processors may " - "lock up on frequency changes: disabling " - "acpi-cpufreq.\n"); - return -ENODEV; - } - } - return 0; -} -#endif - -static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i; - unsigned int valid_states = 0; - unsigned int cpu = policy->cpu; - struct acpi_cpufreq_data *data; - unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data(policy->cpu); - struct acpi_processor_performance *perf; -#ifdef CONFIG_SMP - static int blacklisted; -#endif - - dprintk("acpi_cpufreq_cpu_init\n"); - -#ifdef CONFIG_SMP - if (blacklisted) - return blacklisted; - blacklisted = acpi_cpufreq_blacklist(c); - if (blacklisted) - return blacklisted; -#endif - - data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); - per_cpu(acfreq_data, cpu) = data; - - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) - acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; - - result = acpi_processor_register_performance(data->acpi_data, cpu); - if (result) - goto err_free; - - perf = data->acpi_data; - policy->shared_type = perf->shared_type; - - /* - * Will let policy->cpus know about dependency only when software - * coordination is required. - */ - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - cpumask_copy(policy->cpus, perf->shared_cpu_map); - } - cpumask_copy(policy->related_cpus, perf->shared_cpu_map); - -#ifdef CONFIG_SMP - dmi_check_system(sw_any_bug_dmi_table); - if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - cpumask_copy(policy->cpus, cpu_core_mask(cpu)); - } -#endif - - /* capability check */ - if (perf->state_count <= 1) { - dprintk("No P-States\n"); - result = -ENODEV; - goto err_unreg; - } - - if (perf->control_register.space_id != perf->status_register.space_id) { - result = -ENODEV; - goto err_unreg; - } - - switch (perf->control_register.space_id) { - case ACPI_ADR_SPACE_SYSTEM_IO: - dprintk("SYSTEM IO addr space\n"); - data->cpu_feature = SYSTEM_IO_CAPABLE; - break; - case ACPI_ADR_SPACE_FIXED_HARDWARE: - dprintk("HARDWARE addr space\n"); - if (!check_est_cpu(cpu)) { - result = -ENODEV; - goto err_unreg; - } - data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; - break; - default: - dprintk("Unknown addr space %d\n", - (u32) (perf->control_register.space_id)); - result = -ENODEV; - goto err_unreg; - } - - data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * - (perf->state_count+1), GFP_KERNEL); - if (!data->freq_table) { - result = -ENOMEM; - goto err_unreg; - } - - /* detect transition latency */ - policy->cpuinfo.transition_latency = 0; - for (i = 0; i < perf->state_count; i++) { - if ((perf->states[i].transition_latency * 1000) > - policy->cpuinfo.transition_latency) - policy->cpuinfo.transition_latency = - perf->states[i].transition_latency * 1000; - } - - /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ - if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && - policy->cpuinfo.transition_latency > 20 * 1000) { - policy->cpuinfo.transition_latency = 20 * 1000; - printk_once(KERN_INFO - "P-state transition latency capped at 20 uS\n"); - } - - /* table init */ - for (i = 0; i < perf->state_count; i++) { - if (i > 0 && perf->states[i].core_frequency >= - data->freq_table[valid_states-1].frequency / 1000) - continue; - - data->freq_table[valid_states].index = i; - data->freq_table[valid_states].frequency = - perf->states[i].core_frequency * 1000; - valid_states++; - } - data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; - perf->state = 0; - - result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); - if (result) - goto err_freqfree; - - if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) - printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); - - switch (perf->control_register.space_id) { - case ACPI_ADR_SPACE_SYSTEM_IO: - /* Current speed is unknown and not detectable by IO port */ - policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); - break; - case ACPI_ADR_SPACE_FIXED_HARDWARE: - acpi_cpufreq_driver.get = get_cur_freq_on_cpu; - policy->cur = get_cur_freq_on_cpu(cpu); - break; - default: - break; - } - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - /* Check for APERF/MPERF support in hardware */ - if (cpu_has(c, X86_FEATURE_APERFMPERF)) - acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; - - dprintk("CPU%u - ACPI performance management activated.\n", cpu); - for (i = 0; i < perf->state_count; i++) - dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", - (i == perf->state ? '*' : ' '), i, - (u32) perf->states[i].core_frequency, - (u32) perf->states[i].power, - (u32) perf->states[i].transition_latency); - - cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); - - /* - * the first call to ->target() should result in us actually - * writing something to the appropriate registers. - */ - data->resume = 1; - - return result; - -err_freqfree: - kfree(data->freq_table); -err_unreg: - acpi_processor_unregister_performance(perf, cpu); -err_free: - kfree(data); - per_cpu(acfreq_data, cpu) = NULL; - - return result; -} - -static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); - - dprintk("acpi_cpufreq_cpu_exit\n"); - - if (data) { - cpufreq_frequency_table_put_attr(policy->cpu); - per_cpu(acfreq_data, policy->cpu) = NULL; - acpi_processor_unregister_performance(data->acpi_data, - policy->cpu); - kfree(data->freq_table); - kfree(data); - } - - return 0; -} - -static int acpi_cpufreq_resume(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); - - dprintk("acpi_cpufreq_resume\n"); - - data->resume = 1; - - return 0; -} - -static struct freq_attr *acpi_cpufreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .bios_limit = acpi_processor_get_bios_limit, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, -}; - -static int __init acpi_cpufreq_init(void) -{ - int ret; - - if (acpi_disabled) - return 0; - - dprintk("acpi_cpufreq_init\n"); - - ret = acpi_cpufreq_early_init(); - if (ret) - return ret; - - ret = cpufreq_register_driver(&acpi_cpufreq_driver); - if (ret) - free_acpi_perf_data(); - - return ret; -} - -static void __exit acpi_cpufreq_exit(void) -{ - dprintk("acpi_cpufreq_exit\n"); - - cpufreq_unregister_driver(&acpi_cpufreq_driver); - - free_percpu(acpi_perf_data); -} - -module_param(acpi_pstate_strict, uint, 0644); -MODULE_PARM_DESC(acpi_pstate_strict, - "value 0 or non-zero. non-zero -> strict ACPI checks are " - "performed during frequency changes."); - -late_initcall(acpi_cpufreq_init); -module_exit(acpi_cpufreq_exit); - -MODULE_ALIAS("acpi"); diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c deleted file mode 100644 index 141abebc4516..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ /dev/null @@ -1,446 +0,0 @@ -/* - * (C) 2004-2006 Sebastian Witt <se.witt@gmx.net> - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon reverse engineered information - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/pci.h> -#include <linux/delay.h> - -#define NFORCE2_XTAL 25 -#define NFORCE2_BOOTFSB 0x48 -#define NFORCE2_PLLENABLE 0xa8 -#define NFORCE2_PLLREG 0xa4 -#define NFORCE2_PLLADR 0xa0 -#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) - -#define NFORCE2_MIN_FSB 50 -#define NFORCE2_SAFE_DISTANCE 50 - -/* Delay in ms between FSB changes */ -/* #define NFORCE2_DELAY 10 */ - -/* - * nforce2_chipset: - * FSB is changed using the chipset - */ -static struct pci_dev *nforce2_dev; - -/* fid: - * multiplier * 10 - */ -static int fid; - -/* min_fsb, max_fsb: - * minimum and maximum FSB (= FSB at boot time) - */ -static int min_fsb; -static int max_fsb; - -MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); -MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); -MODULE_LICENSE("GPL"); - -module_param(fid, int, 0444); -module_param(min_fsb, int, 0444); - -MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); -MODULE_PARM_DESC(min_fsb, - "Minimum FSB to use, if not defined: current FSB - 50"); - -#define PFX "cpufreq-nforce2: " -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "cpufreq-nforce2", msg) - -/** - * nforce2_calc_fsb - calculate FSB - * @pll: PLL value - * - * Calculates FSB from PLL value - */ -static int nforce2_calc_fsb(int pll) -{ - unsigned char mul, div; - - mul = (pll >> 8) & 0xff; - div = pll & 0xff; - - if (div > 0) - return NFORCE2_XTAL * mul / div; - - return 0; -} - -/** - * nforce2_calc_pll - calculate PLL value - * @fsb: FSB - * - * Calculate PLL value for given FSB - */ -static int nforce2_calc_pll(unsigned int fsb) -{ - unsigned char xmul, xdiv; - unsigned char mul = 0, div = 0; - int tried = 0; - - /* Try to calculate multiplier and divider up to 4 times */ - while (((mul == 0) || (div == 0)) && (tried <= 3)) { - for (xdiv = 2; xdiv <= 0x80; xdiv++) - for (xmul = 1; xmul <= 0xfe; xmul++) - if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == - fsb + tried) { - mul = xmul; - div = xdiv; - } - tried++; - } - - if ((mul == 0) || (div == 0)) - return -1; - - return NFORCE2_PLL(mul, div); -} - -/** - * nforce2_write_pll - write PLL value to chipset - * @pll: PLL value - * - * Writes new FSB PLL value to chipset - */ -static void nforce2_write_pll(int pll) -{ - int temp; - - /* Set the pll addr. to 0x00 */ - pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0); - - /* Now write the value in all 64 registers */ - for (temp = 0; temp <= 0x3f; temp++) - pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll); - - return; -} - -/** - * nforce2_fsb_read - Read FSB - * - * Read FSB from chipset - * If bootfsb != 0, return FSB at boot-time - */ -static unsigned int nforce2_fsb_read(int bootfsb) -{ - struct pci_dev *nforce2_sub5; - u32 fsb, temp = 0; - - /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ - nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF, - PCI_ANY_ID, PCI_ANY_ID, NULL); - if (!nforce2_sub5) - return 0; - - pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); - fsb /= 1000000; - - /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); - - if (bootfsb || !temp) - return fsb; - - /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp); - fsb = nforce2_calc_fsb(temp); - - return fsb; -} - -/** - * nforce2_set_fsb - set new FSB - * @fsb: New FSB - * - * Sets new FSB - */ -static int nforce2_set_fsb(unsigned int fsb) -{ - u32 temp = 0; - unsigned int tfsb; - int diff; - int pll = 0; - - if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { - printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb); - return -EINVAL; - } - - tfsb = nforce2_fsb_read(0); - if (!tfsb) { - printk(KERN_ERR PFX "Error while reading the FSB\n"); - return -EINVAL; - } - - /* First write? Then set actual value */ - pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); - if (!temp) { - pll = nforce2_calc_pll(tfsb); - - if (pll < 0) - return -EINVAL; - - nforce2_write_pll(pll); - } - - /* Enable write access */ - temp = 0x01; - pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp); - - diff = tfsb - fsb; - - if (!diff) - return 0; - - while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { - if (diff < 0) - tfsb++; - else - tfsb--; - - /* Calculate the PLL reg. value */ - pll = nforce2_calc_pll(tfsb); - if (pll == -1) - return -EINVAL; - - nforce2_write_pll(pll); -#ifdef NFORCE2_DELAY - mdelay(NFORCE2_DELAY); -#endif - } - - temp = 0x40; - pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp); - - return 0; -} - -/** - * nforce2_get - get the CPU frequency - * @cpu: CPU number - * - * Returns the CPU frequency - */ -static unsigned int nforce2_get(unsigned int cpu) -{ - if (cpu) - return 0; - return nforce2_fsb_read(0) * fid * 100; -} - -/** - * nforce2_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency - * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int nforce2_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ -/* unsigned long flags; */ - struct cpufreq_freqs freqs; - unsigned int target_fsb; - - if ((target_freq > policy->max) || (target_freq < policy->min)) - return -EINVAL; - - target_fsb = target_freq / (fid * 100); - - freqs.old = nforce2_get(policy->cpu); - freqs.new = target_fsb * fid * 100; - freqs.cpu = 0; /* Only one CPU on nForce2 platforms */ - - if (freqs.old == freqs.new) - return 0; - - dprintk("Old CPU frequency %d kHz, new %d kHz\n", - freqs.old, freqs.new); - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Disable IRQs */ - /* local_irq_save(flags); */ - - if (nforce2_set_fsb(target_fsb) < 0) - printk(KERN_ERR PFX "Changing FSB to %d failed\n", - target_fsb); - else - dprintk("Changed FSB successfully to %d\n", - target_fsb); - - /* Enable IRQs */ - /* local_irq_restore(flags); */ - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return 0; -} - -/** - * nforce2_verify - verifies a new CPUFreq policy - * @policy: new policy - */ -static int nforce2_verify(struct cpufreq_policy *policy) -{ - unsigned int fsb_pol_max; - - fsb_pol_max = policy->max / (fid * 100); - - if (policy->min < (fsb_pol_max * fid * 100)) - policy->max = (fsb_pol_max + 1) * fid * 100; - - cpufreq_verify_within_limits(policy, - policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - return 0; -} - -static int nforce2_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int fsb; - unsigned int rfid; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - /* Get current FSB */ - fsb = nforce2_fsb_read(0); - - if (!fsb) - return -EIO; - - /* FIX: Get FID from CPU */ - if (!fid) { - if (!cpu_khz) { - printk(KERN_WARNING PFX - "cpu_khz not set, can't calculate multiplier!\n"); - return -ENODEV; - } - - fid = cpu_khz / (fsb * 100); - rfid = fid % 5; - - if (rfid) { - if (rfid > 2) - fid += 5 - rfid; - else - fid -= rfid; - } - } - - printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb, - fid / 10, fid % 10); - - /* Set maximum FSB to FSB at boot time */ - max_fsb = nforce2_fsb_read(1); - - if (!max_fsb) - return -EIO; - - if (!min_fsb) - min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; - - if (min_fsb < NFORCE2_MIN_FSB) - min_fsb = NFORCE2_MIN_FSB; - - /* cpuinfo and default policy values */ - policy->cpuinfo.min_freq = min_fsb * fid * 100; - policy->cpuinfo.max_freq = max_fsb * fid * 100; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = nforce2_get(policy->cpu); - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; - - return 0; -} - -static int nforce2_cpu_exit(struct cpufreq_policy *policy) -{ - return 0; -} - -static struct cpufreq_driver nforce2_driver = { - .name = "nforce2", - .verify = nforce2_verify, - .target = nforce2_target, - .get = nforce2_get, - .init = nforce2_cpu_init, - .exit = nforce2_cpu_exit, - .owner = THIS_MODULE, -}; - -/** - * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic - * - * Detects nForce2 A2 and C1 stepping - * - */ -static int nforce2_detect_chipset(void) -{ - nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - PCI_DEVICE_ID_NVIDIA_NFORCE2, - PCI_ANY_ID, PCI_ANY_ID, NULL); - - if (nforce2_dev == NULL) - return -ENODEV; - - printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n", - nforce2_dev->revision); - printk(KERN_INFO PFX - "FSB changing is maybe unstable and can lead to " - "crashes and data loss.\n"); - - return 0; -} - -/** - * nforce2_init - initializes the nForce2 CPUFreq driver - * - * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported - * devices, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init nforce2_init(void) -{ - /* TODO: do we need to detect the processor? */ - - /* detect chipset */ - if (nforce2_detect_chipset()) { - printk(KERN_INFO PFX "No nForce2 chipset.\n"); - return -ENODEV; - } - - return cpufreq_register_driver(&nforce2_driver); -} - -/** - * nforce2_exit - unregisters cpufreq module - * - * Unregisters nForce2 FSB change support. - */ -static void __exit nforce2_exit(void) -{ - cpufreq_unregister_driver(&nforce2_driver); -} - -module_init(nforce2_init); -module_exit(nforce2_exit); - diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c deleted file mode 100644 index 35a257dd4bb7..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * Based on documentation provided by Dave Jones. Thanks! - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/ioport.h> -#include <linux/slab.h> -#include <linux/timex.h> -#include <linux/io.h> -#include <linux/delay.h> - -#include <asm/msr.h> -#include <asm/tsc.h> - -#define EPS_BRAND_C7M 0 -#define EPS_BRAND_C7 1 -#define EPS_BRAND_EDEN 2 -#define EPS_BRAND_C3 3 -#define EPS_BRAND_C7D 4 - -struct eps_cpu_data { - u32 fsb; - struct cpufreq_frequency_table freq_table[]; -}; - -static struct eps_cpu_data *eps_cpu[NR_CPUS]; - - -static unsigned int eps_get(unsigned int cpu) -{ - struct eps_cpu_data *centaur; - u32 lo, hi; - - if (cpu) - return 0; - centaur = eps_cpu[cpu]; - if (centaur == NULL) - return 0; - - /* Return current frequency */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - return centaur->fsb * ((lo >> 8) & 0xff); -} - -static int eps_set_state(struct eps_cpu_data *centaur, - unsigned int cpu, - u32 dest_state) -{ - struct cpufreq_freqs freqs; - u32 lo, hi; - int err = 0; - int i; - - freqs.old = eps_get(cpu); - freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Wait while CPU is busy */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i = 0; - while (lo & ((1 << 16) | (1 << 17))) { - udelay(16); - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i++; - if (unlikely(i > 64)) { - err = -ENODEV; - goto postchange; - } - } - /* Set new multiplier and voltage */ - wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); - /* Wait until transition end */ - i = 0; - do { - udelay(16); - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i++; - if (unlikely(i > 64)) { - err = -ENODEV; - goto postchange; - } - } while (lo & ((1 << 16) | (1 << 17))); - - /* Return current frequency */ -postchange: - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - freqs.new = centaur->fsb * ((lo >> 8) & 0xff); - -#ifdef DEBUG - { - u8 current_multiplier, current_voltage; - - /* Print voltage and multiplier */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - current_voltage = lo & 0xff; - printk(KERN_INFO "eps: Current voltage = %dmV\n", - current_voltage * 16 + 700); - current_multiplier = (lo >> 8) & 0xff; - printk(KERN_INFO "eps: Current multiplier = %d\n", - current_multiplier); - } -#endif - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - return err; -} - -static int eps_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - struct eps_cpu_data *centaur; - unsigned int newstate = 0; - unsigned int cpu = policy->cpu; - unsigned int dest_state; - int ret; - - if (unlikely(eps_cpu[cpu] == NULL)) - return -ENODEV; - centaur = eps_cpu[cpu]; - - if (unlikely(cpufreq_frequency_table_target(policy, - &eps_cpu[cpu]->freq_table[0], - target_freq, - relation, - &newstate))) { - return -EINVAL; - } - - /* Make frequency transition */ - dest_state = centaur->freq_table[newstate].index & 0xffff; - ret = eps_set_state(centaur, cpu, dest_state); - if (ret) - printk(KERN_ERR "eps: Timeout!\n"); - return ret; -} - -static int eps_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, - &eps_cpu[policy->cpu]->freq_table[0]); -} - -static int eps_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i; - u32 lo, hi; - u64 val; - u8 current_multiplier, current_voltage; - u8 max_multiplier, max_voltage; - u8 min_multiplier, min_voltage; - u8 brand = 0; - u32 fsb; - struct eps_cpu_data *centaur; - struct cpuinfo_x86 *c = &cpu_data(0); - struct cpufreq_frequency_table *f_table; - int k, step, voltage; - int ret; - int states; - - if (policy->cpu != 0) - return -ENODEV; - - /* Check brand */ - printk(KERN_INFO "eps: Detected VIA "); - - switch (c->x86_model) { - case 10: - rdmsr(0x1153, lo, hi); - brand = (((lo >> 2) ^ lo) >> 18) & 3; - printk(KERN_CONT "Model A "); - break; - case 13: - rdmsr(0x1154, lo, hi); - brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff; - printk(KERN_CONT "Model D "); - break; - } - - switch (brand) { - case EPS_BRAND_C7M: - printk(KERN_CONT "C7-M\n"); - break; - case EPS_BRAND_C7: - printk(KERN_CONT "C7\n"); - break; - case EPS_BRAND_EDEN: - printk(KERN_CONT "Eden\n"); - break; - case EPS_BRAND_C7D: - printk(KERN_CONT "C7-D\n"); - break; - case EPS_BRAND_C3: - printk(KERN_CONT "C3\n"); - return -ENODEV; - break; - } - /* Enable Enhanced PowerSaver */ - rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { - val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; - wrmsrl(MSR_IA32_MISC_ENABLE, val); - /* Can be locked at 0 */ - rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { - printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); - return -ENODEV; - } - } - - /* Print voltage and multiplier */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - current_voltage = lo & 0xff; - printk(KERN_INFO "eps: Current voltage = %dmV\n", - current_voltage * 16 + 700); - current_multiplier = (lo >> 8) & 0xff; - printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); - - /* Print limits */ - max_voltage = hi & 0xff; - printk(KERN_INFO "eps: Highest voltage = %dmV\n", - max_voltage * 16 + 700); - max_multiplier = (hi >> 8) & 0xff; - printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier); - min_voltage = (hi >> 16) & 0xff; - printk(KERN_INFO "eps: Lowest voltage = %dmV\n", - min_voltage * 16 + 700); - min_multiplier = (hi >> 24) & 0xff; - printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier); - - /* Sanity checks */ - if (current_multiplier == 0 || max_multiplier == 0 - || min_multiplier == 0) - return -EINVAL; - if (current_multiplier > max_multiplier - || max_multiplier <= min_multiplier) - return -EINVAL; - if (current_voltage > 0x1f || max_voltage > 0x1f) - return -EINVAL; - if (max_voltage < min_voltage) - return -EINVAL; - - /* Calc FSB speed */ - fsb = cpu_khz / current_multiplier; - /* Calc number of p-states supported */ - if (brand == EPS_BRAND_C7M) - states = max_multiplier - min_multiplier + 1; - else - states = 2; - - /* Allocate private data and frequency table for current cpu */ - centaur = kzalloc(sizeof(struct eps_cpu_data) - + (states + 1) * sizeof(struct cpufreq_frequency_table), - GFP_KERNEL); - if (!centaur) - return -ENOMEM; - eps_cpu[0] = centaur; - - /* Copy basic values */ - centaur->fsb = fsb; - - /* Fill frequency and MSR value table */ - f_table = ¢aur->freq_table[0]; - if (brand != EPS_BRAND_C7M) { - f_table[0].frequency = fsb * min_multiplier; - f_table[0].index = (min_multiplier << 8) | min_voltage; - f_table[1].frequency = fsb * max_multiplier; - f_table[1].index = (max_multiplier << 8) | max_voltage; - f_table[2].frequency = CPUFREQ_TABLE_END; - } else { - k = 0; - step = ((max_voltage - min_voltage) * 256) - / (max_multiplier - min_multiplier); - for (i = min_multiplier; i <= max_multiplier; i++) { - voltage = (k * step) / 256 + min_voltage; - f_table[k].frequency = fsb * i; - f_table[k].index = (i << 8) | voltage; - k++; - } - f_table[k].frequency = CPUFREQ_TABLE_END; - } - - policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ - policy->cur = fsb * current_multiplier; - - ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); - if (ret) { - kfree(centaur); - return ret; - } - - cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); - return 0; -} - -static int eps_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int cpu = policy->cpu; - struct eps_cpu_data *centaur; - u32 lo, hi; - - if (eps_cpu[cpu] == NULL) - return -ENODEV; - centaur = eps_cpu[cpu]; - - /* Get max frequency */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - /* Set max frequency */ - eps_set_state(centaur, cpu, hi & 0xffff); - /* Bye */ - cpufreq_frequency_table_put_attr(policy->cpu); - kfree(eps_cpu[cpu]); - eps_cpu[cpu] = NULL; - return 0; -} - -static struct freq_attr *eps_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver eps_driver = { - .verify = eps_verify, - .target = eps_target, - .init = eps_cpu_init, - .exit = eps_cpu_exit, - .get = eps_get, - .name = "e_powersaver", - .owner = THIS_MODULE, - .attr = eps_attr, -}; - -static int __init eps_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - /* This driver will work only on Centaur C7 processors with - * Enhanced SpeedStep/PowerSaver registers */ - if (c->x86_vendor != X86_VENDOR_CENTAUR - || c->x86 != 6 || c->x86_model < 10) - return -ENODEV; - if (!cpu_has(c, X86_FEATURE_EST)) - return -ENODEV; - - if (cpufreq_register_driver(&eps_driver)) - return -EINVAL; - return 0; -} - -static void __exit eps_exit(void) -{ - cpufreq_unregister_driver(&eps_driver); -} - -MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>"); -MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); -MODULE_LICENSE("GPL"); - -module_init(eps_init); -module_exit(eps_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c deleted file mode 100644 index c587db472a75..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * elanfreq: cpufreq driver for the AMD ELAN family - * - * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de> - * - * Parts of this code are (c) Sven Geggus <sven@geggus.net> - * - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> - -#include <linux/delay.h> -#include <linux/cpufreq.h> - -#include <asm/msr.h> -#include <linux/timex.h> -#include <linux/io.h> - -#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ -#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ - -/* Module parameter */ -static int max_freq; - -struct s_elan_multiplier { - int clock; /* frequency in kHz */ - int val40h; /* PMU Force Mode register */ - int val80h; /* CPU Clock Speed Register */ -}; - -/* - * It is important that the frequencies - * are listed in ascending order here! - */ -static struct s_elan_multiplier elan_multiplier[] = { - {1000, 0x02, 0x18}, - {2000, 0x02, 0x10}, - {4000, 0x02, 0x08}, - {8000, 0x00, 0x00}, - {16000, 0x00, 0x02}, - {33000, 0x00, 0x04}, - {66000, 0x01, 0x04}, - {99000, 0x01, 0x05} -}; - -static struct cpufreq_frequency_table elanfreq_table[] = { - {0, 1000}, - {1, 2000}, - {2, 4000}, - {3, 8000}, - {4, 16000}, - {5, 33000}, - {6, 66000}, - {7, 99000}, - {0, CPUFREQ_TABLE_END}, -}; - - -/** - * elanfreq_get_cpu_frequency: determine current cpu speed - * - * Finds out at which frequency the CPU of the Elan SOC runs - * at the moment. Frequencies from 1 to 33 MHz are generated - * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" - * and have the rest of the chip running with 33 MHz. - */ - -static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) -{ - u8 clockspeed_reg; /* Clock Speed Register */ - - local_irq_disable(); - outb_p(0x80, REG_CSCIR); - clockspeed_reg = inb_p(REG_CSCDR); - local_irq_enable(); - - if ((clockspeed_reg & 0xE0) == 0xE0) - return 0; - - /* Are we in CPU clock multiplied mode (66/99 MHz)? */ - if ((clockspeed_reg & 0xE0) == 0xC0) { - if ((clockspeed_reg & 0x01) == 0) - return 66000; - else - return 99000; - } - - /* 33 MHz is not 32 MHz... */ - if ((clockspeed_reg & 0xE0) == 0xA0) - return 33000; - - return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000; -} - - -/** - * elanfreq_set_cpu_frequency: Change the CPU core frequency - * @cpu: cpu number - * @freq: frequency in kHz - * - * This function takes a frequency value and changes the CPU frequency - * according to this. Note that the frequency has to be checked by - * elanfreq_validatespeed() for correctness! - * - * There is no return value. - */ - -static void elanfreq_set_cpu_state(unsigned int state) -{ - struct cpufreq_freqs freqs; - - freqs.old = elanfreq_get_cpu_frequency(0); - freqs.new = elan_multiplier[state].clock; - freqs.cpu = 0; /* elanfreq.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", - elan_multiplier[state].clock); - - - /* - * Access to the Elan's internal registers is indexed via - * 0x22: Chip Setup & Control Register Index Register (CSCI) - * 0x23: Chip Setup & Control Register Data Register (CSCD) - * - */ - - /* - * 0x40 is the Power Management Unit's Force Mode Register. - * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) - */ - - local_irq_disable(); - outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */ - outb_p(0x00, REG_CSCDR); - local_irq_enable(); /* wait till internal pipelines and */ - udelay(1000); /* buffers have cleaned up */ - - local_irq_disable(); - - /* now, set the CPU clock speed register (0x80) */ - outb_p(0x80, REG_CSCIR); - outb_p(elan_multiplier[state].val80h, REG_CSCDR); - - /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ - outb_p(0x40, REG_CSCIR); - outb_p(elan_multiplier[state].val40h, REG_CSCDR); - udelay(10000); - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -}; - - -/** - * elanfreq_validatespeed: test if frequency range is valid - * @policy: the policy to validate - * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. - */ - -static int elanfreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); -} - -static int elanfreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], - target_freq, relation, &newstate)) - return -EINVAL; - - elanfreq_set_cpu_state(newstate); - - return 0; -} - - -/* - * Module init and exit code - */ - -static int elanfreq_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - unsigned int i; - int result; - - /* capability check */ - if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model != 10)) - return -ENODEV; - - /* max freq */ - if (!max_freq) - max_freq = elanfreq_get_cpu_frequency(0); - - /* table init */ - for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { - if (elanfreq_table[i].frequency > max_freq) - elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; - } - - /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = elanfreq_get_cpu_frequency(0); - - result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); - if (result) - return result; - - cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); - return 0; -} - - -static int elanfreq_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - - -#ifndef MODULE -/** - * elanfreq_setup - elanfreq command line parameter parsing - * - * elanfreq command line parameter. Use: - * elanfreq=66000 - * to set the maximum CPU frequency to 66 MHz. Note that in - * case you do not give this boot parameter, the maximum - * frequency will fall back to _current_ CPU frequency which - * might be lower. If you build this as a module, use the - * max_freq module parameter instead. - */ -static int __init elanfreq_setup(char *str) -{ - max_freq = simple_strtoul(str, &str, 0); - printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); - return 1; -} -__setup("elanfreq=", elanfreq_setup); -#endif - - -static struct freq_attr *elanfreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver elanfreq_driver = { - .get = elanfreq_get_cpu_frequency, - .verify = elanfreq_verify, - .target = elanfreq_target, - .init = elanfreq_cpu_init, - .exit = elanfreq_cpu_exit, - .name = "elanfreq", - .owner = THIS_MODULE, - .attr = elanfreq_attr, -}; - - -static int __init elanfreq_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - /* Test if we have the right hardware */ - if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model != 10)) { - printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); - return -ENODEV; - } - return cpufreq_register_driver(&elanfreq_driver); -} - - -static void __exit elanfreq_exit(void) -{ - cpufreq_unregister_driver(&elanfreq_driver); -} - - -module_param(max_freq, int, 0444); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, " - "Sven Geggus <sven@geggus.net>"); -MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); - -module_init(elanfreq_init); -module_exit(elanfreq_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c deleted file mode 100644 index 32974cf84232..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ /dev/null @@ -1,517 +0,0 @@ -/* - * Cyrix MediaGX and NatSemi Geode Suspend Modulation - * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> - * (C) 2002 Hiroshi Miura <miura@da-cha.org> - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation - * - * The author(s) of this software shall not be held liable for damages - * of any nature resulting due to the use of this software. This - * software is provided AS-IS with no warranties. - * - * Theoretical note: - * - * (see Geode(tm) CS5530 manual (rev.4.1) page.56) - * - * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 - * are based on Suspend Modulation. - * - * Suspend Modulation works by asserting and de-asserting the SUSP# pin - * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# - * the CPU enters an idle state. GX1 stops its core clock when SUSP# is - * asserted then power consumption is reduced. - * - * Suspend Modulation's OFF/ON duration are configurable - * with 'Suspend Modulation OFF Count Register' - * and 'Suspend Modulation ON Count Register'. - * These registers are 8bit counters that represent the number of - * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) - * to the processor. - * - * These counters define a ratio which is the effective frequency - * of operation of the system. - * - * OFF Count - * F_eff = Fgx * ---------------------- - * OFF Count + ON Count - * - * 0 <= On Count, Off Count <= 255 - * - * From these limits, we can get register values - * - * off_duration + on_duration <= MAX_DURATION - * on_duration = off_duration * (stock_freq - freq) / freq - * - * off_duration = (freq * DURATION) / stock_freq - * on_duration = DURATION - off_duration - * - * - *--------------------------------------------------------------------------- - * - * ChangeLog: - * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org> - * - fix on/off register mistake - * - fix cpu_khz calc when it stops cpu modulation. - * - * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org> - * - rewrite for Cyrix MediaGX Cx5510/5520 and - * NatSemi Geode Cs5530(A). - * - * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com> - * - cs5530_mod patch for 2.4.19-rc1. - * - *--------------------------------------------------------------------------- - * - * Todo - * Test on machines with 5510, 5530, 5530A - */ - -/************************************************************************ - * Suspend Modulation - Definitions * - ************************************************************************/ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/cpufreq.h> -#include <linux/pci.h> -#include <linux/errno.h> -#include <linux/slab.h> - -#include <asm/processor-cyrix.h> - -/* PCI config registers, all at F0 */ -#define PCI_PMER1 0x80 /* power management enable register 1 */ -#define PCI_PMER2 0x81 /* power management enable register 2 */ -#define PCI_PMER3 0x82 /* power management enable register 3 */ -#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ -#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ -#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ -#define PCI_MODON 0x95 /* suspend modulation ON counter register */ -#define PCI_SUSCFG 0x96 /* suspend configuration register */ - -/* PMER1 bits */ -#define GPM (1<<0) /* global power management */ -#define GIT (1<<1) /* globally enable PM device idle timers */ -#define GTR (1<<2) /* globally enable IO traps */ -#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ -#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ - -/* SUSCFG bits */ -#define SUSMOD (1<<0) /* enable/disable suspend modulation */ -/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */ -#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ - /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ -#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ -/* the below is supported only with cs5530A */ -#define PWRSVE_ISA (1<<3) /* stop ISA clock */ -#define PWRSVE (1<<4) /* active idle */ - -struct gxfreq_params { - u8 on_duration; - u8 off_duration; - u8 pci_suscfg; - u8 pci_pmer1; - u8 pci_pmer2; - struct pci_dev *cs55x0; -}; - -static struct gxfreq_params *gx_params; -static int stock_freq; - -/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ -static int pci_busclk; -module_param(pci_busclk, int, 0444); - -/* maximum duration for which the cpu may be suspended - * (32us * MAX_DURATION). If no parameter is given, this defaults - * to 255. - * Note that this leads to a maximum of 8 ms(!) where the CPU clock - * is suspended -- processing power is just 0.39% of what it used to be, - * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ -static int max_duration = 255; -module_param(max_duration, int, 0444); - -/* For the default policy, we want at least some processing power - * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) - */ -#define POLICY_MIN_DIV 20 - - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "gx-suspmod", msg) - -/** - * we can detect a core multipiler from dir0_lsb - * from GX1 datasheet p.56, - * MULT[3:0]: - * 0000 = SYSCLK multiplied by 4 (test only) - * 0001 = SYSCLK multiplied by 10 - * 0010 = SYSCLK multiplied by 4 - * 0011 = SYSCLK multiplied by 6 - * 0100 = SYSCLK multiplied by 9 - * 0101 = SYSCLK multiplied by 5 - * 0110 = SYSCLK multiplied by 7 - * 0111 = SYSCLK multiplied by 8 - * of 33.3MHz - **/ -static int gx_freq_mult[16] = { - 4, 10, 4, 6, 9, 5, 7, 8, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - - -/**************************************************************** - * Low Level chipset interface * - ****************************************************************/ -static struct pci_device_id gx_chipset_tbl[] __initdata = { - { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, - { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, - { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, - { 0, }, -}; - -static void gx_write_byte(int reg, int value) -{ - pci_write_config_byte(gx_params->cs55x0, reg, value); -} - -/** - * gx_detect_chipset: - * - **/ -static __init struct pci_dev *gx_detect_chipset(void) -{ - struct pci_dev *gx_pci = NULL; - - /* check if CPU is a MediaGX or a Geode. */ - if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) && - (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { - dprintk("error: no MediaGX/Geode processor found!\n"); - return NULL; - } - - /* detect which companion chip is used */ - for_each_pci_dev(gx_pci) { - if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) - return gx_pci; - } - - dprintk("error: no supported chipset found!\n"); - return NULL; -} - -/** - * gx_get_cpuspeed: - * - * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi - * Geode CPU runs. - */ -static unsigned int gx_get_cpuspeed(unsigned int cpu) -{ - if ((gx_params->pci_suscfg & SUSMOD) == 0) - return stock_freq; - - return (stock_freq * gx_params->off_duration) - / (gx_params->on_duration + gx_params->off_duration); -} - -/** - * gx_validate_speed: - * determine current cpu speed - * - **/ - -static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, - u8 *off_duration) -{ - unsigned int i; - u8 tmp_on, tmp_off; - int old_tmp_freq = stock_freq; - int tmp_freq; - - *off_duration = 1; - *on_duration = 0; - - for (i = max_duration; i > 0; i--) { - tmp_off = ((khz * i) / stock_freq) & 0xff; - tmp_on = i - tmp_off; - tmp_freq = (stock_freq * tmp_off) / i; - /* if this relation is closer to khz, use this. If it's equal, - * prefer it, too - lower latency */ - if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { - *on_duration = tmp_on; - *off_duration = tmp_off; - old_tmp_freq = tmp_freq; - } - } - - return old_tmp_freq; -} - - -/** - * gx_set_cpuspeed: - * set cpu speed in khz. - **/ - -static void gx_set_cpuspeed(unsigned int khz) -{ - u8 suscfg, pmer1; - unsigned int new_khz; - unsigned long flags; - struct cpufreq_freqs freqs; - - freqs.cpu = 0; - freqs.old = gx_get_cpuspeed(0); - - new_khz = gx_validate_speed(khz, &gx_params->on_duration, - &gx_params->off_duration); - - freqs.new = new_khz; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - local_irq_save(flags); - - - - if (new_khz != stock_freq) { - /* if new khz == 100% of CPU speed, it is special case */ - switch (gx_params->cs55x0->device) { - case PCI_DEVICE_ID_CYRIX_5530_LEGACY: - pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; - /* FIXME: need to test other values -- Zwane,Miura */ - /* typical 2 to 4ms */ - gx_write_byte(PCI_IRQTC, 4); - /* typical 50 to 100ms */ - gx_write_byte(PCI_VIDTC, 100); - gx_write_byte(PCI_PMER1, pmer1); - - if (gx_params->cs55x0->revision < 0x10) { - /* CS5530(rev 1.2, 1.3) */ - suscfg = gx_params->pci_suscfg|SUSMOD; - } else { - /* CS5530A,B.. */ - suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE; - } - break; - case PCI_DEVICE_ID_CYRIX_5520: - case PCI_DEVICE_ID_CYRIX_5510: - suscfg = gx_params->pci_suscfg | SUSMOD; - break; - default: - local_irq_restore(flags); - dprintk("fatal: try to set unknown chipset.\n"); - return; - } - } else { - suscfg = gx_params->pci_suscfg & ~(SUSMOD); - gx_params->off_duration = 0; - gx_params->on_duration = 0; - dprintk("suspend modulation disabled: cpu runs 100%% speed.\n"); - } - - gx_write_byte(PCI_MODOFF, gx_params->off_duration); - gx_write_byte(PCI_MODON, gx_params->on_duration); - - gx_write_byte(PCI_SUSCFG, suscfg); - pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); - - local_irq_restore(flags); - - gx_params->pci_suscfg = suscfg; - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", - gx_params->on_duration * 32, gx_params->off_duration * 32); - dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); -} - -/**************************************************************** - * High level functions * - ****************************************************************/ - -/* - * cpufreq_gx_verify: test if frequency range is valid - * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. - */ - -static int cpufreq_gx_verify(struct cpufreq_policy *policy) -{ - unsigned int tmp_freq = 0; - u8 tmp1, tmp2; - - if (!stock_freq || !policy) - return -EINVAL; - - policy->cpu = 0; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), - stock_freq); - - /* it needs to be assured that at least one supported frequency is - * within policy->min and policy->max. If it is not, policy->max - * needs to be increased until one freuqency is supported. - * policy->min may not be decreased, though. This way we guarantee a - * specific processing capacity. - */ - tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); - if (tmp_freq < policy->min) - tmp_freq += stock_freq / max_duration; - policy->min = tmp_freq; - if (policy->min > policy->max) - policy->max = tmp_freq; - tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); - if (tmp_freq > policy->max) - tmp_freq -= stock_freq / max_duration; - policy->max = tmp_freq; - if (policy->max < policy->min) - policy->max = policy->min; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), - stock_freq); - - return 0; -} - -/* - * cpufreq_gx_target: - * - */ -static int cpufreq_gx_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - u8 tmp1, tmp2; - unsigned int tmp_freq; - - if (!stock_freq || !policy) - return -EINVAL; - - policy->cpu = 0; - - tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); - while (tmp_freq < policy->min) { - tmp_freq += stock_freq / max_duration; - tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); - } - while (tmp_freq > policy->max) { - tmp_freq -= stock_freq / max_duration; - tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); - } - - gx_set_cpuspeed(tmp_freq); - - return 0; -} - -static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int maxfreq, curfreq; - - if (!policy || policy->cpu != 0) - return -ENODEV; - - /* determine maximum frequency */ - if (pci_busclk) - maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - else if (cpu_khz) - maxfreq = cpu_khz; - else - maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - - stock_freq = maxfreq; - curfreq = gx_get_cpuspeed(0); - - dprintk("cpu max frequency is %d.\n", maxfreq); - dprintk("cpu current frequency is %dkHz.\n", curfreq); - - /* setup basic struct for cpufreq API */ - policy->cpu = 0; - - if (max_duration < POLICY_MIN_DIV) - policy->min = maxfreq / max_duration; - else - policy->min = maxfreq / POLICY_MIN_DIV; - policy->max = maxfreq; - policy->cur = curfreq; - policy->cpuinfo.min_freq = maxfreq / max_duration; - policy->cpuinfo.max_freq = maxfreq; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - - return 0; -} - -/* - * cpufreq_gx_init: - * MediaGX/Geode GX initialize cpufreq driver - */ -static struct cpufreq_driver gx_suspmod_driver = { - .get = gx_get_cpuspeed, - .verify = cpufreq_gx_verify, - .target = cpufreq_gx_target, - .init = cpufreq_gx_cpu_init, - .name = "gx-suspmod", - .owner = THIS_MODULE, -}; - -static int __init cpufreq_gx_init(void) -{ - int ret; - struct gxfreq_params *params; - struct pci_dev *gx_pci; - - /* Test if we have the right hardware */ - gx_pci = gx_detect_chipset(); - if (gx_pci == NULL) - return -ENODEV; - - /* check whether module parameters are sane */ - if (max_duration > 0xff) - max_duration = 0xff; - - dprintk("geode suspend modulation available.\n"); - - params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); - if (params == NULL) - return -ENOMEM; - - params->cs55x0 = gx_pci; - gx_params = params; - - /* keep cs55x0 configurations */ - pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); - pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); - pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); - pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); - pci_read_config_byte(params->cs55x0, PCI_MODOFF, - &(params->off_duration)); - - ret = cpufreq_register_driver(&gx_suspmod_driver); - if (ret) { - kfree(params); - return ret; /* register error! */ - } - - return 0; -} - -static void __exit cpufreq_gx_exit(void) -{ - cpufreq_unregister_driver(&gx_suspmod_driver); - pci_dev_put(gx_params->cs55x0); - kfree(gx_params); -} - -MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>"); -MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); -MODULE_LICENSE("GPL"); - -module_init(cpufreq_gx_init); -module_exit(cpufreq_gx_exit); - diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c deleted file mode 100644 index cf48cdd6907d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ /dev/null @@ -1,1029 +0,0 @@ -/* - * (C) 2001-2004 Dave Jones. <davej@redhat.com> - * (C) 2002 Padraig Brady. <padraig@antefacto.com> - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by VIA. - * - * VIA have currently 3 different versions of Longhaul. - * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. - * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. - * Version 2 of longhaul is backward compatible with v1, but adds - * LONGHAUL MSR for purpose of both frequency and voltage scaling. - * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). - * Version 3 of longhaul got renamed to Powersaver and redesigned - * to use only the POWERSAVER MSR at 0x110a. - * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. - * It's pretty much the same feature wise to longhaul v2, though - * there is provision for scaling FSB too, but this doesn't work - * too well in practice so we don't even try to use this. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/pci.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/delay.h> -#include <linux/timex.h> -#include <linux/io.h> -#include <linux/acpi.h> - -#include <asm/msr.h> -#include <acpi/processor.h> - -#include "longhaul.h" - -#define PFX "longhaul: " - -#define TYPE_LONGHAUL_V1 1 -#define TYPE_LONGHAUL_V2 2 -#define TYPE_POWERSAVER 3 - -#define CPU_SAMUEL 1 -#define CPU_SAMUEL2 2 -#define CPU_EZRA 3 -#define CPU_EZRA_T 4 -#define CPU_NEHEMIAH 5 -#define CPU_NEHEMIAH_C 6 - -/* Flags */ -#define USE_ACPI_C3 (1 << 1) -#define USE_NORTHBRIDGE (1 << 2) - -static int cpu_model; -static unsigned int numscales = 16; -static unsigned int fsb; - -static const struct mV_pos *vrm_mV_table; -static const unsigned char *mV_vrm_table; - -static unsigned int highest_speed, lowest_speed; /* kHz */ -static unsigned int minmult, maxmult; -static int can_scale_voltage; -static struct acpi_processor *pr; -static struct acpi_processor_cx *cx; -static u32 acpi_regs_addr; -static u8 longhaul_flags; -static unsigned int longhaul_index; - -/* Module parameters */ -static int scale_voltage; -static int disable_acpi_c3; -static int revid_errata; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "longhaul", msg) - - -/* Clock ratios multiplied by 10 */ -static int mults[32]; -static int eblcr[32]; -static int longhaul_version; -static struct cpufreq_frequency_table *longhaul_table; - -#ifdef CONFIG_CPU_FREQ_DEBUG -static char speedbuffer[8]; - -static char *print_speed(int speed) -{ - if (speed < 1000) { - snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed); - return speedbuffer; - } - - if (speed%1000 == 0) - snprintf(speedbuffer, sizeof(speedbuffer), - "%dGHz", speed/1000); - else - snprintf(speedbuffer, sizeof(speedbuffer), - "%d.%dGHz", speed/1000, (speed%1000)/100); - - return speedbuffer; -} -#endif - - -static unsigned int calc_speed(int mult) -{ - int khz; - khz = (mult/10)*fsb; - if (mult%10) - khz += fsb/2; - khz *= 1000; - return khz; -} - - -static int longhaul_get_cpu_mult(void) -{ - unsigned long invalue = 0, lo, hi; - - rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi); - invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22; - if (longhaul_version == TYPE_LONGHAUL_V2 || - longhaul_version == TYPE_POWERSAVER) { - if (lo & (1<<27)) - invalue += 16; - } - return eblcr[invalue]; -} - -/* For processor with BCR2 MSR */ - -static void do_longhaul1(unsigned int mults_index) -{ - union msr_bcr2 bcr2; - - rdmsrl(MSR_VIA_BCR2, bcr2.val); - /* Enable software clock multiplier */ - bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = mults_index & 0xff; - - /* Sync to timer tick */ - safe_halt(); - /* Change frequency on next halt or sleep */ - wrmsrl(MSR_VIA_BCR2, bcr2.val); - /* Invoke transition */ - ACPI_FLUSH_CPU_CACHE(); - halt(); - - /* Disable software clock multiplier */ - local_irq_disable(); - rdmsrl(MSR_VIA_BCR2, bcr2.val); - bcr2.bits.ESOFTBF = 0; - wrmsrl(MSR_VIA_BCR2, bcr2.val); -} - -/* For processor with Longhaul MSR */ - -static void do_powersaver(int cx_address, unsigned int mults_index, - unsigned int dir) -{ - union msr_longhaul longhaul; - u32 t; - - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Setup new frequency */ - if (!revid_errata) - longhaul.bits.RevisionKey = longhaul.bits.RevisionID; - else - longhaul.bits.RevisionKey = 0; - longhaul.bits.SoftBusRatio = mults_index & 0xf; - longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4; - /* Setup new voltage */ - if (can_scale_voltage) - longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f; - /* Sync to timer tick */ - safe_halt(); - /* Raise voltage if necessary */ - if (can_scale_voltage && dir) { - longhaul.bits.EnableSoftVID = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Change voltage */ - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 - * read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - longhaul.bits.EnableSoftVID = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - } - - /* Change frequency on next halt or sleep */ - longhaul.bits.EnableSoftBusRatio = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - /* Disable bus ratio bit */ - longhaul.bits.EnableSoftBusRatio = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - - /* Reduce voltage if necessary */ - if (can_scale_voltage && !dir) { - longhaul.bits.EnableSoftVID = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Change voltage */ - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 - * read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - longhaul.bits.EnableSoftVID = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - } -} - -/** - * longhaul_set_cpu_frequency() - * @mults_index : bitpattern of the new multiplier. - * - * Sets a new clock ratio. - */ - -static void longhaul_setstate(unsigned int table_index) -{ - unsigned int mults_index; - int speed, mult; - struct cpufreq_freqs freqs; - unsigned long flags; - unsigned int pic1_mask, pic2_mask; - u16 bm_status = 0; - u32 bm_timeout = 1000; - unsigned int dir = 0; - - mults_index = longhaul_table[table_index].index; - /* Safety precautions */ - mult = mults[mults_index & 0x1f]; - if (mult == -1) - return; - speed = calc_speed(mult); - if ((speed > highest_speed) || (speed < lowest_speed)) - return; - /* Voltage transition before frequency transition? */ - if (can_scale_voltage && longhaul_index < table_index) - dir = 1; - - freqs.old = calc_speed(longhaul_get_cpu_mult()); - freqs.new = speed; - freqs.cpu = 0; /* longhaul.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", - fsb, mult/10, mult%10, print_speed(speed/1000)); -retry_loop: - preempt_disable(); - local_irq_save(flags); - - pic2_mask = inb(0xA1); - pic1_mask = inb(0x21); /* works on C3. save mask. */ - outb(0xFF, 0xA1); /* Overkill */ - outb(0xFE, 0x21); /* TMR0 only */ - - /* Wait while PCI bus is busy. */ - if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE - || ((pr != NULL) && pr->flags.bm_control))) { - bm_status = inw(acpi_regs_addr); - bm_status &= 1 << 4; - while (bm_status && bm_timeout) { - outw(1 << 4, acpi_regs_addr); - bm_timeout--; - bm_status = inw(acpi_regs_addr); - bm_status &= 1 << 4; - } - } - - if (longhaul_flags & USE_NORTHBRIDGE) { - /* Disable AGP and PCI arbiters */ - outb(3, 0x22); - } else if ((pr != NULL) && pr->flags.bm_control) { - /* Disable bus master arbitration */ - acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1); - } - switch (longhaul_version) { - - /* - * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) - * Software controlled multipliers only. - */ - case TYPE_LONGHAUL_V1: - do_longhaul1(mults_index); - break; - - /* - * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] - * - * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) - * Nehemiah can do FSB scaling too, but this has never been proven - * to work in practice. - */ - case TYPE_LONGHAUL_V2: - case TYPE_POWERSAVER: - if (longhaul_flags & USE_ACPI_C3) { - /* Don't allow wakeup */ - acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - do_powersaver(cx->address, mults_index, dir); - } else { - do_powersaver(0, mults_index, dir); - } - break; - } - - if (longhaul_flags & USE_NORTHBRIDGE) { - /* Enable arbiters */ - outb(0, 0x22); - } else if ((pr != NULL) && pr->flags.bm_control) { - /* Enable bus master arbitration */ - acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0); - } - outb(pic2_mask, 0xA1); /* restore mask */ - outb(pic1_mask, 0x21); - - local_irq_restore(flags); - preempt_enable(); - - freqs.new = calc_speed(longhaul_get_cpu_mult()); - /* Check if requested frequency is set. */ - if (unlikely(freqs.new != speed)) { - printk(KERN_INFO PFX "Failed to set requested frequency!\n"); - /* Revision ID = 1 but processor is expecting revision key - * equal to 0. Jumpers at the bottom of processor will change - * multiplier and FSB, but will not change bits in Longhaul - * MSR nor enable voltage scaling. */ - if (!revid_errata) { - printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " - "option.\n"); - revid_errata = 1; - msleep(200); - goto retry_loop; - } - /* Why ACPI C3 sometimes doesn't work is a mystery for me. - * But it does happen. Processor is entering ACPI C3 state, - * but it doesn't change frequency. I tried poking various - * bits in northbridge registers, but without success. */ - if (longhaul_flags & USE_ACPI_C3) { - printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); - longhaul_flags &= ~USE_ACPI_C3; - if (revid_errata) { - printk(KERN_INFO PFX "Disabling \"Ignore " - "Revision ID\" option.\n"); - revid_errata = 0; - } - msleep(200); - goto retry_loop; - } - /* This shouldn't happen. Longhaul ver. 2 was reported not - * working on processors without voltage scaling, but with - * RevID = 1. RevID errata will make things right. Just - * to be 100% sure. */ - if (longhaul_version == TYPE_LONGHAUL_V2) { - printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); - longhaul_version = TYPE_LONGHAUL_V1; - msleep(200); - goto retry_loop; - } - } - /* Report true CPU frequency */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - if (!bm_timeout) - printk(KERN_INFO PFX "Warning: Timeout while waiting for " - "idle PCI bus.\n"); -} - -/* - * Centaur decided to make life a little more tricky. - * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. - * Samuel2 and above have to try and guess what the FSB is. - * We do this by assuming we booted at maximum multiplier, and interpolate - * between that value multiplied by possible FSBs and cpu_mhz which - * was calculated at boot time. Really ugly, but no other way to do this. - */ - -#define ROUNDING 0xf - -static int guess_fsb(int mult) -{ - int speed = cpu_khz / 1000; - int i; - int speeds[] = { 666, 1000, 1333, 2000 }; - int f_max, f_min; - - for (i = 0; i < 4; i++) { - f_max = ((speeds[i] * mult) + 50) / 100; - f_max += (ROUNDING / 2); - f_min = f_max - ROUNDING; - if ((speed <= f_max) && (speed >= f_min)) - return speeds[i] / 10; - } - return 0; -} - - -static int __cpuinit longhaul_get_ranges(void) -{ - unsigned int i, j, k = 0; - unsigned int ratio; - int mult; - - /* Get current frequency */ - mult = longhaul_get_cpu_mult(); - if (mult == -1) { - printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); - return -EINVAL; - } - fsb = guess_fsb(mult); - if (fsb == 0) { - printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); - return -EINVAL; - } - /* Get max multiplier - as we always did. - * Longhaul MSR is useful only when voltage scaling is enabled. - * C3 is booting at max anyway. */ - maxmult = mult; - /* Get min multiplier */ - switch (cpu_model) { - case CPU_NEHEMIAH: - minmult = 50; - break; - case CPU_NEHEMIAH_C: - minmult = 40; - break; - default: - minmult = 30; - break; - } - - dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n", - minmult/10, minmult%10, maxmult/10, maxmult%10); - - highest_speed = calc_speed(maxmult); - lowest_speed = calc_speed(minmult); - dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, - print_speed(lowest_speed/1000), - print_speed(highest_speed/1000)); - - if (lowest_speed == highest_speed) { - printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n"); - return -EINVAL; - } - if (lowest_speed > highest_speed) { - printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", - lowest_speed, highest_speed); - return -EINVAL; - } - - longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table), - GFP_KERNEL); - if (!longhaul_table) - return -ENOMEM; - - for (j = 0; j < numscales; j++) { - ratio = mults[j]; - if (ratio == -1) - continue; - if (ratio > maxmult || ratio < minmult) - continue; - longhaul_table[k].frequency = calc_speed(ratio); - longhaul_table[k].index = j; - k++; - } - if (k <= 1) { - kfree(longhaul_table); - return -ENODEV; - } - /* Sort */ - for (j = 0; j < k - 1; j++) { - unsigned int min_f, min_i; - min_f = longhaul_table[j].frequency; - min_i = j; - for (i = j + 1; i < k; i++) { - if (longhaul_table[i].frequency < min_f) { - min_f = longhaul_table[i].frequency; - min_i = i; - } - } - if (min_i != j) { - swap(longhaul_table[j].frequency, - longhaul_table[min_i].frequency); - swap(longhaul_table[j].index, - longhaul_table[min_i].index); - } - } - - longhaul_table[k].frequency = CPUFREQ_TABLE_END; - - /* Find index we are running on */ - for (j = 0; j < k; j++) { - if (mults[longhaul_table[j].index & 0x1f] == mult) { - longhaul_index = j; - break; - } - } - return 0; -} - - -static void __cpuinit longhaul_setup_voltagescaling(void) -{ - union msr_longhaul longhaul; - struct mV_pos minvid, maxvid, vid; - unsigned int j, speed, pos, kHz_step, numvscales; - int min_vid_speed; - - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); - if (!(longhaul.bits.RevisionID & 1)) { - printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); - return; - } - - if (!longhaul.bits.VRMRev) { - printk(KERN_INFO PFX "VRM 8.5\n"); - vrm_mV_table = &vrm85_mV[0]; - mV_vrm_table = &mV_vrm85[0]; - } else { - printk(KERN_INFO PFX "Mobile VRM\n"); - if (cpu_model < CPU_NEHEMIAH) - return; - vrm_mV_table = &mobilevrm_mV[0]; - mV_vrm_table = &mV_mobilevrm[0]; - } - - minvid = vrm_mV_table[longhaul.bits.MinimumVID]; - maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; - - if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { - printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " - "Voltage scaling disabled.\n", - minvid.mV/1000, minvid.mV%1000, - maxvid.mV/1000, maxvid.mV%1000); - return; - } - - if (minvid.mV == maxvid.mV) { - printk(KERN_INFO PFX "Claims to support voltage scaling but " - "min & max are both %d.%03d. " - "Voltage scaling disabled\n", - maxvid.mV/1000, maxvid.mV%1000); - return; - } - - /* How many voltage steps*/ - numvscales = maxvid.pos - minvid.pos + 1; - printk(KERN_INFO PFX - "Max VID=%d.%03d " - "Min VID=%d.%03d, " - "%d possible voltage scales\n", - maxvid.mV/1000, maxvid.mV%1000, - minvid.mV/1000, minvid.mV%1000, - numvscales); - - /* Calculate max frequency at min voltage */ - j = longhaul.bits.MinMHzBR; - if (longhaul.bits.MinMHzBR4) - j += 16; - min_vid_speed = eblcr[j]; - if (min_vid_speed == -1) - return; - switch (longhaul.bits.MinMHzFSB) { - case 0: - min_vid_speed *= 13333; - break; - case 1: - min_vid_speed *= 10000; - break; - case 3: - min_vid_speed *= 6666; - break; - default: - return; - break; - } - if (min_vid_speed >= highest_speed) - return; - /* Calculate kHz for one voltage step */ - kHz_step = (highest_speed - min_vid_speed) / numvscales; - - j = 0; - while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { - speed = longhaul_table[j].frequency; - if (speed > min_vid_speed) - pos = (speed - min_vid_speed) / kHz_step + minvid.pos; - else - pos = minvid.pos; - longhaul_table[j].index |= mV_vrm_table[pos] << 8; - vid = vrm_mV_table[mV_vrm_table[pos]]; - printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", - speed, j, vid.mV); - j++; - } - - can_scale_voltage = 1; - printk(KERN_INFO PFX "Voltage scaling enabled.\n"); -} - - -static int longhaul_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, longhaul_table); -} - - -static int longhaul_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int table_index = 0; - unsigned int i; - unsigned int dir = 0; - u8 vid, current_vid; - - if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, - relation, &table_index)) - return -EINVAL; - - /* Don't set same frequency again */ - if (longhaul_index == table_index) - return 0; - - if (!can_scale_voltage) - longhaul_setstate(table_index); - else { - /* On test system voltage transitions exceeding single - * step up or down were turning motherboard off. Both - * "ondemand" and "userspace" are unsafe. C7 is doing - * this in hardware, C3 is old and we need to do this - * in software. */ - i = longhaul_index; - current_vid = (longhaul_table[longhaul_index].index >> 8); - current_vid &= 0x1f; - if (table_index > longhaul_index) - dir = 1; - while (i != table_index) { - vid = (longhaul_table[i].index >> 8) & 0x1f; - if (vid != current_vid) { - longhaul_setstate(i); - current_vid = vid; - msleep(200); - } - if (dir) - i++; - else - i--; - } - longhaul_setstate(table_index); - } - longhaul_index = table_index; - return 0; -} - - -static unsigned int longhaul_get(unsigned int cpu) -{ - if (cpu) - return 0; - return calc_speed(longhaul_get_cpu_mult()); -} - -static acpi_status longhaul_walk_callback(acpi_handle obj_handle, - u32 nesting_level, - void *context, void **return_value) -{ - struct acpi_device *d; - - if (acpi_bus_get_device(obj_handle, &d)) - return 0; - - *return_value = acpi_driver_data(d); - return 1; -} - -/* VIA don't support PM2 reg, but have something similar */ -static int enable_arbiter_disable(void) -{ - struct pci_dev *dev; - int status = 1; - int reg; - u8 pci_cmd; - - /* Find PLE133 host bridge */ - reg = 0x78; - dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, - NULL); - /* Find PM133/VT8605 host bridge */ - if (dev == NULL) - dev = pci_get_device(PCI_VENDOR_ID_VIA, - PCI_DEVICE_ID_VIA_8605_0, NULL); - /* Find CLE266 host bridge */ - if (dev == NULL) { - reg = 0x76; - dev = pci_get_device(PCI_VENDOR_ID_VIA, - PCI_DEVICE_ID_VIA_862X_0, NULL); - /* Find CN400 V-Link host bridge */ - if (dev == NULL) - dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); - } - if (dev != NULL) { - /* Enable access to port 0x22 */ - pci_read_config_byte(dev, reg, &pci_cmd); - if (!(pci_cmd & 1<<7)) { - pci_cmd |= 1<<7; - pci_write_config_byte(dev, reg, pci_cmd); - pci_read_config_byte(dev, reg, &pci_cmd); - if (!(pci_cmd & 1<<7)) { - printk(KERN_ERR PFX - "Can't enable access to port 0x22.\n"); - status = 0; - } - } - pci_dev_put(dev); - return status; - } - return 0; -} - -static int longhaul_setup_southbridge(void) -{ - struct pci_dev *dev; - u8 pci_cmd; - - /* Find VT8235 southbridge */ - dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); - if (dev == NULL) - /* Find VT8237 southbridge */ - dev = pci_get_device(PCI_VENDOR_ID_VIA, - PCI_DEVICE_ID_VIA_8237, NULL); - if (dev != NULL) { - /* Set transition time to max */ - pci_read_config_byte(dev, 0xec, &pci_cmd); - pci_cmd &= ~(1 << 2); - pci_write_config_byte(dev, 0xec, pci_cmd); - pci_read_config_byte(dev, 0xe4, &pci_cmd); - pci_cmd &= ~(1 << 7); - pci_write_config_byte(dev, 0xe4, pci_cmd); - pci_read_config_byte(dev, 0xe5, &pci_cmd); - pci_cmd |= 1 << 7; - pci_write_config_byte(dev, 0xe5, pci_cmd); - /* Get address of ACPI registers block*/ - pci_read_config_byte(dev, 0x81, &pci_cmd); - if (pci_cmd & 1 << 7) { - pci_read_config_dword(dev, 0x88, &acpi_regs_addr); - acpi_regs_addr &= 0xff00; - printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", - acpi_regs_addr); - } - - pci_dev_put(dev); - return 1; - } - return 0; -} - -static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - char *cpuname = NULL; - int ret; - u32 lo, hi; - - /* Check what we have on this motherboard */ - switch (c->x86_model) { - case 6: - cpu_model = CPU_SAMUEL; - cpuname = "C3 'Samuel' [C5A]"; - longhaul_version = TYPE_LONGHAUL_V1; - memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); - memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr)); - break; - - case 7: - switch (c->x86_mask) { - case 0: - longhaul_version = TYPE_LONGHAUL_V1; - cpu_model = CPU_SAMUEL2; - cpuname = "C3 'Samuel 2' [C5B]"; - /* Note, this is not a typo, early Samuel2's had - * Samuel1 ratios. */ - memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); - memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); - break; - case 1 ... 15: - longhaul_version = TYPE_LONGHAUL_V2; - if (c->x86_mask < 8) { - cpu_model = CPU_SAMUEL2; - cpuname = "C3 'Samuel 2' [C5B]"; - } else { - cpu_model = CPU_EZRA; - cpuname = "C3 'Ezra' [C5C]"; - } - memcpy(mults, ezra_mults, sizeof(ezra_mults)); - memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr)); - break; - } - break; - - case 8: - cpu_model = CPU_EZRA_T; - cpuname = "C3 'Ezra-T' [C5M]"; - longhaul_version = TYPE_POWERSAVER; - numscales = 32; - memcpy(mults, ezrat_mults, sizeof(ezrat_mults)); - memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr)); - break; - - case 9: - longhaul_version = TYPE_POWERSAVER; - numscales = 32; - memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); - memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); - switch (c->x86_mask) { - case 0 ... 1: - cpu_model = CPU_NEHEMIAH; - cpuname = "C3 'Nehemiah A' [C5XLOE]"; - break; - case 2 ... 4: - cpu_model = CPU_NEHEMIAH; - cpuname = "C3 'Nehemiah B' [C5XLOH]"; - break; - case 5 ... 15: - cpu_model = CPU_NEHEMIAH_C; - cpuname = "C3 'Nehemiah C' [C5P]"; - break; - } - break; - - default: - cpuname = "Unknown"; - break; - } - /* Check Longhaul ver. 2 */ - if (longhaul_version == TYPE_LONGHAUL_V2) { - rdmsr(MSR_VIA_LONGHAUL, lo, hi); - if (lo == 0 && hi == 0) - /* Looks like MSR isn't present */ - longhaul_version = TYPE_LONGHAUL_V1; - } - - printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname); - switch (longhaul_version) { - case TYPE_LONGHAUL_V1: - case TYPE_LONGHAUL_V2: - printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version); - break; - case TYPE_POWERSAVER: - printk(KERN_CONT "Powersaver supported.\n"); - break; - }; - - /* Doesn't hurt */ - longhaul_setup_southbridge(); - - /* Find ACPI data for processor */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, - NULL, (void *)&pr); - - /* Check ACPI support for C3 state */ - if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { - cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address > 0 && cx->latency <= 1000) - longhaul_flags |= USE_ACPI_C3; - } - /* Disable if it isn't working */ - if (disable_acpi_c3) - longhaul_flags &= ~USE_ACPI_C3; - /* Check if northbridge is friendly */ - if (enable_arbiter_disable()) - longhaul_flags |= USE_NORTHBRIDGE; - - /* Check ACPI support for bus master arbiter disable */ - if (!(longhaul_flags & USE_ACPI_C3 - || longhaul_flags & USE_NORTHBRIDGE) - && ((pr == NULL) || !(pr->flags.bm_control))) { - printk(KERN_ERR PFX - "No ACPI support. Unsupported northbridge.\n"); - return -ENODEV; - } - - if (longhaul_flags & USE_NORTHBRIDGE) - printk(KERN_INFO PFX "Using northbridge support.\n"); - if (longhaul_flags & USE_ACPI_C3) - printk(KERN_INFO PFX "Using ACPI support.\n"); - - ret = longhaul_get_ranges(); - if (ret != 0) - return ret; - - if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) - longhaul_setup_voltagescaling(); - - policy->cpuinfo.transition_latency = 200000; /* nsec */ - policy->cur = calc_speed(longhaul_get_cpu_mult()); - - ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); - if (ret) - return ret; - - cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); - - return 0; -} - -static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static struct freq_attr *longhaul_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver longhaul_driver = { - .verify = longhaul_verify, - .target = longhaul_target, - .get = longhaul_get, - .init = longhaul_cpu_init, - .exit = __devexit_p(longhaul_cpu_exit), - .name = "longhaul", - .owner = THIS_MODULE, - .attr = longhaul_attr, -}; - - -static int __init longhaul_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) - return -ENODEV; - -#ifdef CONFIG_SMP - if (num_online_cpus() > 1) { - printk(KERN_ERR PFX "More than 1 CPU detected, " - "longhaul disabled.\n"); - return -ENODEV; - } -#endif -#ifdef CONFIG_X86_IO_APIC - if (cpu_has_apic) { - printk(KERN_ERR PFX "APIC detected. Longhaul is currently " - "broken in this configuration.\n"); - return -ENODEV; - } -#endif - switch (c->x86_model) { - case 6 ... 9: - return cpufreq_register_driver(&longhaul_driver); - case 10: - printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); - default: - ; - } - - return -ENODEV; -} - - -static void __exit longhaul_exit(void) -{ - int i; - - for (i = 0; i < numscales; i++) { - if (mults[i] == maxmult) { - longhaul_setstate(i); - break; - } - } - - cpufreq_unregister_driver(&longhaul_driver); - kfree(longhaul_table); -} - -/* Even if BIOS is exporting ACPI C3 state, and it is used - * with success when CPU is idle, this state doesn't - * trigger frequency transition in some cases. */ -module_param(disable_acpi_c3, int, 0644); -MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); -/* Change CPU voltage with frequency. Very useful to save - * power, but most VIA C3 processors aren't supporting it. */ -module_param(scale_voltage, int, 0644); -MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); -/* Force revision key to 0 for processors which doesn't - * support voltage scaling, but are introducing itself as - * such. */ -module_param(revid_errata, int, 0644); -MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); - -MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); -MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors."); -MODULE_LICENSE("GPL"); - -late_initcall(longhaul_init); -module_exit(longhaul_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h deleted file mode 100644 index cbf48fbca881..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ /dev/null @@ -1,353 +0,0 @@ -/* - * longhaul.h - * (C) 2003 Dave Jones. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * VIA-specific information - */ - -union msr_bcr2 { - struct { - unsigned Reseved:19, // 18:0 - ESOFTBF:1, // 19 - Reserved2:3, // 22:20 - CLOCKMUL:4, // 26:23 - Reserved3:5; // 31:27 - } bits; - unsigned long val; -}; - -union msr_longhaul { - struct { - unsigned RevisionID:4, // 3:0 - RevisionKey:4, // 7:4 - EnableSoftBusRatio:1, // 8 - EnableSoftVID:1, // 9 - EnableSoftBSEL:1, // 10 - Reserved:3, // 11:13 - SoftBusRatio4:1, // 14 - VRMRev:1, // 15 - SoftBusRatio:4, // 19:16 - SoftVID:5, // 24:20 - Reserved2:3, // 27:25 - SoftBSEL:2, // 29:28 - Reserved3:2, // 31:30 - MaxMHzBR:4, // 35:32 - MaximumVID:5, // 40:36 - MaxMHzFSB:2, // 42:41 - MaxMHzBR4:1, // 43 - Reserved4:4, // 47:44 - MinMHzBR:4, // 51:48 - MinimumVID:5, // 56:52 - MinMHzFSB:2, // 58:57 - MinMHzBR4:1, // 59 - Reserved5:4; // 63:60 - } bits; - unsigned long long val; -}; - -/* - * Clock ratio tables. Div/Mod by 10 to get ratio. - * The eblcr values specify the ratio read from the CPU. - * The mults values specify what to write to the CPU. - */ - -/* - * VIA C3 Samuel 1 & Samuel 2 (stepping 0) - */ -static const int __cpuinitdata samuel1_mults[16] = { - -1, /* 0000 -> RESERVED */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - -1, /* 0011 -> RESERVED */ - -1, /* 0100 -> RESERVED */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - -1, /* 1110 -> RESERVED */ - -1, /* 1111 -> RESERVED */ -}; - -static const int __cpuinitdata samuel1_eblcr[16] = { - 50, /* 0000 -> RESERVED */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - -1, /* 0011 -> RESERVED */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - -1, /* 0111 -> RESERVED */ - -1, /* 1000 -> RESERVED */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - -1, /* 1100 -> RESERVED */ - 75, /* 1101 -> 7.5x */ - -1, /* 1110 -> RESERVED */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 Samuel2 Stepping 1->15 - */ -static const int __cpuinitdata samuel2_eblcr[16] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 110, /* 0111 -> 11.0x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 130, /* 1110 -> 13.0x */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 Ezra - */ -static const int __cpuinitdata ezra_mults[16] = { - 100, /* 0000 -> 10.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ -}; - -static const int __cpuinitdata ezra_eblcr[16] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 (Ezra-T) [C5M]. - */ -static const int __cpuinitdata ezrat_mults[32] = { - 100, /* 0000 -> 10.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ - - -1, /* 0000 -> RESERVED (10.0x) */ - 110, /* 0001 -> 11.0x */ - -1, /* 0010 -> 12.0x */ - -1, /* 0011 -> RESERVED (9.0x)*/ - 105, /* 0100 -> 10.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 135, /* 0111 -> 13.5x */ - 140, /* 1000 -> 14.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 130, /* 1011 -> 13.0x */ - 145, /* 1100 -> 14.5x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - -1, /* 1111 -> RESERVED (12.0x) */ -}; - -static const int __cpuinitdata ezrat_eblcr[32] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ - - -1, /* 0000 -> RESERVED (9.0x) */ - 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ - -1, /* 0011 -> RESERVED (10.0x)*/ - 135, /* 0100 -> 13.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 105, /* 0111 -> 10.5x */ - 130, /* 1000 -> 13.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 140, /* 1011 -> 14.0x */ - -1, /* 1100 -> RESERVED (12.0x) */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - 145, /* 1111 -> 14.5x */ -}; - -/* - * VIA C3 Nehemiah */ - -static const int __cpuinitdata nehemiah_mults[32] = { - 100, /* 0000 -> 10.0x */ - -1, /* 0001 -> 16.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - -1, /* 0101 -> RESERVED */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ - -1, /* 0000 -> 10.0x */ - 110, /* 0001 -> 11.0x */ - -1, /* 0010 -> 12.0x */ - -1, /* 0011 -> 9.0x */ - 105, /* 0100 -> 10.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 135, /* 0111 -> 13.5x */ - 140, /* 1000 -> 14.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 130, /* 1011 -> 13.0x */ - 145, /* 1100 -> 14.5x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - -1, /* 1111 -> 12.0x */ -}; - -static const int __cpuinitdata nehemiah_eblcr[32] = { - 50, /* 0000 -> 5.0x */ - 160, /* 0001 -> 16.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - -1, /* 0101 -> RESERVED */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ - 90, /* 0000 -> 9.0x */ - 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ - 100, /* 0011 -> 10.0x */ - 135, /* 0100 -> 13.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 105, /* 0111 -> 10.5x */ - 130, /* 1000 -> 13.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 140, /* 1011 -> 14.0x */ - 120, /* 1100 -> 12.0x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - 145 /* 1111 -> 14.5x */ -}; - -/* - * Voltage scales. Div/Mod by 1000 to get actual voltage. - * Which scale to use depends on the VRM type in use. - */ - -struct mV_pos { - unsigned short mV; - unsigned short pos; -}; - -static const struct mV_pos __cpuinitdata vrm85_mV[32] = { - {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, - {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, - {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, - {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, - {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, - {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, - {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, - {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} -}; - -static const unsigned char __cpuinitdata mV_vrm85[32] = { - 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, - 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, - 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, - 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 -}; - -static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { - {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, - {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, - {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, - {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, - {975, 15}, {950, 14}, {925, 13}, {900, 12}, - {875, 11}, {850, 10}, {825, 9}, {800, 8}, - {775, 7}, {750, 6}, {725, 5}, {700, 4}, - {675, 3}, {650, 2}, {625, 1}, {600, 0} -}; - -static const unsigned char __cpuinitdata mV_mobilevrm[32] = { - 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, - 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, - 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, - 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 -}; - diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c deleted file mode 100644 index d9f51367666b..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ /dev/null @@ -1,327 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/timex.h> - -#include <asm/msr.h> -#include <asm/processor.h> - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "longrun", msg) - -static struct cpufreq_driver longrun_driver; - -/** - * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz - * values into per cent values. In TMTA microcode, the following is valid: - * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) - */ -static unsigned int longrun_low_freq, longrun_high_freq; - - -/** - * longrun_get_policy - get the current LongRun policy - * @policy: struct cpufreq_policy where current policy is written into - * - * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS - * and MSR_TMTA_LONGRUN_CTRL - */ -static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy) -{ - u32 msr_lo, msr_hi; - - rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); - if (msr_lo & 0x01) - policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else - policy->policy = CPUFREQ_POLICY_POWERSAVE; - - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); - msr_lo &= 0x0000007F; - msr_hi &= 0x0000007F; - - if (longrun_high_freq <= longrun_low_freq) { - /* Assume degenerate Longrun table */ - policy->min = policy->max = longrun_high_freq; - } else { - policy->min = longrun_low_freq + msr_lo * - ((longrun_high_freq - longrun_low_freq) / 100); - policy->max = longrun_low_freq + msr_hi * - ((longrun_high_freq - longrun_low_freq) / 100); - } - policy->cpu = 0; -} - - -/** - * longrun_set_policy - sets a new CPUFreq policy - * @policy: new policy - * - * Sets a new CPUFreq policy on LongRun-capable processors. This function - * has to be called with cpufreq_driver locked. - */ -static int longrun_set_policy(struct cpufreq_policy *policy) -{ - u32 msr_lo, msr_hi; - u32 pctg_lo, pctg_hi; - - if (!policy) - return -EINVAL; - - if (longrun_high_freq <= longrun_low_freq) { - /* Assume degenerate Longrun table */ - pctg_lo = pctg_hi = 100; - } else { - pctg_lo = (policy->min - longrun_low_freq) / - ((longrun_high_freq - longrun_low_freq) / 100); - pctg_hi = (policy->max - longrun_low_freq) / - ((longrun_high_freq - longrun_low_freq) / 100); - } - - if (pctg_hi > 100) - pctg_hi = 100; - if (pctg_lo > pctg_hi) - pctg_lo = pctg_hi; - - /* performance or economy mode */ - rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - msr_lo &= 0xFFFFFFFE; - switch (policy->policy) { - case CPUFREQ_POLICY_PERFORMANCE: - msr_lo |= 0x00000001; - break; - case CPUFREQ_POLICY_POWERSAVE: - break; - } - wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - - /* lower and upper boundary */ - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - msr_lo &= 0xFFFFFF80; - msr_hi &= 0xFFFFFF80; - msr_lo |= pctg_lo; - msr_hi |= pctg_hi; - wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - - return 0; -} - - -/** - * longrun_verify_poliy - verifies a new CPUFreq policy - * @policy: the policy to verify - * - * Validates a new CPUFreq policy. This function has to be called with - * cpufreq_driver locked. - */ -static int longrun_verify_policy(struct cpufreq_policy *policy) -{ - if (!policy) - return -EINVAL; - - policy->cpu = 0; - cpufreq_verify_within_limits(policy, - policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - - if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && - (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) - return -EINVAL; - - return 0; -} - -static unsigned int longrun_get(unsigned int cpu) -{ - u32 eax, ebx, ecx, edx; - - if (cpu) - return 0; - - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - dprintk("cpuid eax is %u\n", eax); - - return eax * 1000; -} - -/** - * longrun_determine_freqs - determines the lowest and highest possible core frequency - * @low_freq: an int to put the lowest frequency into - * @high_freq: an int to put the highest frequency into - * - * Determines the lowest and highest possible core frequencies on this CPU. - * This is necessary to calculate the performance percentage according to - * TMTA rules: - * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) - */ -static int __cpuinit longrun_determine_freqs(unsigned int *low_freq, - unsigned int *high_freq) -{ - u32 msr_lo, msr_hi; - u32 save_lo, save_hi; - u32 eax, ebx, ecx, edx; - u32 try_hi; - struct cpuinfo_x86 *c = &cpu_data(0); - - if (!low_freq || !high_freq) - return -EINVAL; - - if (cpu_has(c, X86_FEATURE_LRTI)) { - /* if the LongRun Table Interface is present, the - * detection is a bit easier: - * For minimum frequency, read out the maximum - * level (msr_hi), write that into "currently - * selected level", and read out the frequency. - * For maximum frequency, read out level zero. - */ - /* minimum */ - rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); - wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); - rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); - *low_freq = msr_lo * 1000; /* to kHz */ - - /* maximum */ - wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); - rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); - *high_freq = msr_lo * 1000; /* to kHz */ - - dprintk("longrun table interface told %u - %u kHz\n", - *low_freq, *high_freq); - - if (*low_freq > *high_freq) - *low_freq = *high_freq; - return 0; - } - - /* set the upper border to the value determined during TSC init */ - *high_freq = (cpu_khz / 1000); - *high_freq = *high_freq * 1000; - dprintk("high frequency is %u kHz\n", *high_freq); - - /* get current borders */ - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - save_lo = msr_lo & 0x0000007F; - save_hi = msr_hi & 0x0000007F; - - /* if current perf_pctg is larger than 90%, we need to decrease the - * upper limit to make the calculation more accurate. - */ - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - /* try decreasing in 10% steps, some processors react only - * on some barrier values */ - for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) { - /* set to 0 to try_hi perf_pctg */ - msr_lo &= 0xFFFFFF80; - msr_hi &= 0xFFFFFF80; - msr_hi |= try_hi; - wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - - /* read out current core MHz and current perf_pctg */ - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - - /* restore values */ - wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); - } - dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); - - /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) - * eqals - * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) - * - * high_freq * perf_pctg is stored tempoarily into "ebx". - */ - ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ - - if ((ecx > 95) || (ecx == 0) || (eax < ebx)) - return -EIO; - - edx = ((eax - ebx) * 100) / (100 - ecx); - *low_freq = edx * 1000; /* back to kHz */ - - dprintk("low frequency is %u kHz\n", *low_freq); - - if (*low_freq > *high_freq) - *low_freq = *high_freq; - - return 0; -} - - -static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy) -{ - int result = 0; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - /* detect low and high frequency */ - result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); - if (result) - return result; - - /* cpuinfo and default policy values */ - policy->cpuinfo.min_freq = longrun_low_freq; - policy->cpuinfo.max_freq = longrun_high_freq; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - longrun_get_policy(policy); - - return 0; -} - - -static struct cpufreq_driver longrun_driver = { - .flags = CPUFREQ_CONST_LOOPS, - .verify = longrun_verify_policy, - .setpolicy = longrun_set_policy, - .get = longrun_get, - .init = longrun_cpu_init, - .name = "longrun", - .owner = THIS_MODULE, -}; - - -/** - * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver - * - * Initializes the LongRun support. - */ -static int __init longrun_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (c->x86_vendor != X86_VENDOR_TRANSMETA || - !cpu_has(c, X86_FEATURE_LONGRUN)) - return -ENODEV; - - return cpufreq_register_driver(&longrun_driver); -} - - -/** - * longrun_exit - unregisters LongRun support - */ -static void __exit longrun_exit(void) -{ - cpufreq_unregister_driver(&longrun_driver); -} - - -MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); -MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and " - "Efficeon processors."); -MODULE_LICENSE("GPL"); - -module_init(longrun_init); -module_exit(longrun_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c deleted file mode 100644 index 911e193018ae..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/mperf.c +++ /dev/null @@ -1,51 +0,0 @@ -#include <linux/kernel.h> -#include <linux/smp.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/slab.h> - -#include "mperf.h" - -static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); - -/* Called via smp_call_function_single(), on the target CPU */ -static void read_measured_perf_ctrs(void *_cur) -{ - struct aperfmperf *am = _cur; - - get_aperfmperf(am); -} - -/* - * Return the measured active (C0) frequency on this CPU since last call - * to this function. - * Input: cpu number - * Return: Average CPU frequency in terms of max frequency (zero on error) - * - * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance - * over a period of time, while CPU is in C0 state. - * IA32_MPERF counts at the rate of max advertised frequency - * IA32_APERF counts at the rate of actual CPU frequency - * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and - * no meaning should be associated with absolute values of these MSRs. - */ -unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, - unsigned int cpu) -{ - struct aperfmperf perf; - unsigned long ratio; - unsigned int retval; - - if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) - return 0; - - ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); - per_cpu(acfreq_old_perf, cpu) = perf; - - retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; - - return retval; -} -EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); -MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h deleted file mode 100644 index 5dbf2950dc22..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/mperf.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * (c) 2010 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - */ - -unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, - unsigned int cpu); diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c deleted file mode 100644 index 52c93648e492..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Pentium 4/Xeon CPU on demand clock modulation/speed scaling - * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> - * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> - * (C) 2002 Arjan van de Ven <arjanv@redhat.com> - * (C) 2002 Tora T. Engstad - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * The author(s) of this software shall not be held liable for damages - * of any nature resulting due to the use of this software. This - * software is provided AS-IS with no warranties. - * - * Date Errata Description - * 20020525 N44, O17 12.5% or 25% DC causes lockup - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/cpufreq.h> -#include <linux/cpumask.h> -#include <linux/timex.h> - -#include <asm/processor.h> -#include <asm/msr.h> -#include <asm/timer.h> - -#include "speedstep-lib.h" - -#define PFX "p4-clockmod: " -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "p4-clockmod", msg) - -/* - * Duty Cycle (3bits), note DC_DISABLE is not specified in - * intel docs i just use it to mean disable - */ -enum { - DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, - DC_64PT, DC_75PT, DC_88PT, DC_DISABLE -}; - -#define DC_ENTRIES 8 - - -static int has_N44_O17_errata[NR_CPUS]; -static unsigned int stock_freq; -static struct cpufreq_driver p4clockmod_driver; -static unsigned int cpufreq_p4_get(unsigned int cpu); - -static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) -{ - u32 l, h; - - if (!cpu_online(cpu) || - (newstate > DC_DISABLE) || (newstate == DC_RESV)) - return -EINVAL; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); - - if (l & 0x01) - dprintk("CPU#%d currently thermal throttled\n", cpu); - - if (has_N44_O17_errata[cpu] && - (newstate == DC_25PT || newstate == DC_DFLT)) - newstate = DC_38PT; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); - if (newstate == DC_DISABLE) { - dprintk("CPU#%d disabling modulation\n", cpu); - wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); - } else { - dprintk("CPU#%d setting duty cycle to %d%%\n", - cpu, ((125 * newstate) / 10)); - /* bits 63 - 5 : reserved - * bit 4 : enable/disable - * bits 3-1 : duty cycle - * bit 0 : reserved - */ - l = (l & ~14); - l = l | (1<<4) | ((newstate & 0x7)<<1); - wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h); - } - - return 0; -} - - -static struct cpufreq_frequency_table p4clockmod_table[] = { - {DC_RESV, CPUFREQ_ENTRY_INVALID}, - {DC_DFLT, 0}, - {DC_25PT, 0}, - {DC_38PT, 0}, - {DC_50PT, 0}, - {DC_64PT, 0}, - {DC_75PT, 0}, - {DC_88PT, 0}, - {DC_DISABLE, 0}, - {DC_RESV, CPUFREQ_TABLE_END}, -}; - - -static int cpufreq_p4_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = DC_RESV; - struct cpufreq_freqs freqs; - int i; - - if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], - target_freq, relation, &newstate)) - return -EINVAL; - - freqs.old = cpufreq_p4_get(policy->cpu); - freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; - - if (freqs.new == freqs.old) - return 0; - - /* notifiers */ - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - /* run on each logical CPU, - * see section 13.15.3 of IA32 Intel Architecture Software - * Developer's Manual, Volume 3 - */ - for_each_cpu(i, policy->cpus) - cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); - - /* notifiers */ - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - return 0; -} - - -static int cpufreq_p4_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); -} - - -static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) -{ - if (c->x86 == 0x06) { - if (cpu_has(c, X86_FEATURE_EST)) - printk_once(KERN_WARNING PFX "Warning: EST-capable " - "CPU detected. The acpi-cpufreq module offers " - "voltage scaling in addition to frequency " - "scaling. You should use that instead of " - "p4-clockmod, if possible.\n"); - switch (c->x86_model) { - case 0x0E: /* Core */ - case 0x0F: /* Core Duo */ - case 0x16: /* Celeron Core */ - case 0x1C: /* Atom */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); - case 0x0D: /* Pentium M (Dothan) */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - /* fall through */ - case 0x09: /* Pentium M (Banias) */ - return speedstep_get_frequency(SPEEDSTEP_CPU_PM); - } - } - - if (c->x86 != 0xF) - return 0; - - /* on P-4s, the TSC runs with constant frequency independent whether - * throttling is active or not. */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - - if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) { - printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " - "The speedstep-ich or acpi cpufreq modules offer " - "voltage scaling in addition of frequency scaling. " - "You should use either one instead of p4-clockmod, " - "if possible.\n"); - return speedstep_get_frequency(SPEEDSTEP_CPU_P4M); - } - - return speedstep_get_frequency(SPEEDSTEP_CPU_P4D); -} - - - -static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = &cpu_data(policy->cpu); - int cpuid = 0; - unsigned int i; - -#ifdef CONFIG_SMP - cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); -#endif - - /* Errata workaround */ - cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; - switch (cpuid) { - case 0x0f07: - case 0x0f0a: - case 0x0f11: - case 0x0f12: - has_N44_O17_errata[policy->cpu] = 1; - dprintk("has errata -- disabling low frequencies\n"); - } - - if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D && - c->x86_model < 2) { - /* switch to maximum frequency and measure result */ - cpufreq_p4_setdc(policy->cpu, DC_DISABLE); - recalibrate_cpu_khz(); - } - /* get max frequency */ - stock_freq = cpufreq_p4_get_frequency(c); - if (!stock_freq) - return -EINVAL; - - /* table init */ - for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { - if ((i < 2) && (has_N44_O17_errata[policy->cpu])) - p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; - else - p4clockmod_table[i].frequency = (stock_freq * i)/8; - } - cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); - - /* cpuinfo and default policy values */ - - /* the transition latency is set to be 1 higher than the maximum - * transition latency of the ondemand governor */ - policy->cpuinfo.transition_latency = 10000001; - policy->cur = stock_freq; - - return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); -} - - -static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int cpufreq_p4_get(unsigned int cpu) -{ - u32 l, h; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); - - if (l & 0x10) { - l = l >> 1; - l &= 0x7; - } else - l = DC_DISABLE; - - if (l != DC_DISABLE) - return stock_freq * l / 8; - - return stock_freq; -} - -static struct freq_attr *p4clockmod_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver p4clockmod_driver = { - .verify = cpufreq_p4_verify, - .target = cpufreq_p4_target, - .init = cpufreq_p4_cpu_init, - .exit = cpufreq_p4_cpu_exit, - .get = cpufreq_p4_get, - .name = "p4-clockmod", - .owner = THIS_MODULE, - .attr = p4clockmod_attr, -}; - - -static int __init cpufreq_p4_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - int ret; - - /* - * THERM_CONTROL is architectural for IA32 now, so - * we can rely on the capability checks - */ - if (c->x86_vendor != X86_VENDOR_INTEL) - return -ENODEV; - - if (!test_cpu_cap(c, X86_FEATURE_ACPI) || - !test_cpu_cap(c, X86_FEATURE_ACC)) - return -ENODEV; - - ret = cpufreq_register_driver(&p4clockmod_driver); - if (!ret) - printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock " - "Modulation available\n"); - - return ret; -} - - -static void __exit cpufreq_p4_exit(void) -{ - cpufreq_unregister_driver(&p4clockmod_driver); -} - - -MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>"); -MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); -MODULE_LICENSE("GPL"); - -late_initcall(cpufreq_p4_init); -module_exit(cpufreq_p4_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c deleted file mode 100644 index 755a31e0f5b0..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface - * - * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com> - * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. - * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON - * INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/sched.h> -#include <linux/cpufreq.h> -#include <linux/compiler.h> -#include <linux/slab.h> - -#include <linux/acpi.h> -#include <linux/io.h> -#include <linux/spinlock.h> -#include <linux/uaccess.h> - -#include <acpi/processor.h> - -#define PCC_VERSION "1.00.00" -#define POLL_LOOPS 300 - -#define CMD_COMPLETE 0x1 -#define CMD_GET_FREQ 0x0 -#define CMD_SET_FREQ 0x1 - -#define BUF_SZ 4 - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "pcc-cpufreq", msg) - -struct pcc_register_resource { - u8 descriptor; - u16 length; - u8 space_id; - u8 bit_width; - u8 bit_offset; - u8 access_size; - u64 address; -} __attribute__ ((packed)); - -struct pcc_memory_resource { - u8 descriptor; - u16 length; - u8 space_id; - u8 resource_usage; - u8 type_specific; - u64 granularity; - u64 minimum; - u64 maximum; - u64 translation_offset; - u64 address_length; -} __attribute__ ((packed)); - -static struct cpufreq_driver pcc_cpufreq_driver; - -struct pcc_header { - u32 signature; - u16 length; - u8 major; - u8 minor; - u32 features; - u16 command; - u16 status; - u32 latency; - u32 minimum_time; - u32 maximum_time; - u32 nominal; - u32 throttled_frequency; - u32 minimum_frequency; -}; - -static void __iomem *pcch_virt_addr; -static struct pcc_header __iomem *pcch_hdr; - -static DEFINE_SPINLOCK(pcc_lock); - -static struct acpi_generic_address doorbell; - -static u64 doorbell_preserve; -static u64 doorbell_write; - -static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f, - 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46}; - -struct pcc_cpu { - u32 input_offset; - u32 output_offset; -}; - -static struct pcc_cpu __percpu *pcc_cpu_info; - -static int pcc_cpufreq_verify(struct cpufreq_policy *policy) -{ - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - return 0; -} - -static inline void pcc_cmd(void) -{ - u64 doorbell_value; - int i; - - acpi_read(&doorbell_value, &doorbell); - acpi_write((doorbell_value & doorbell_preserve) | doorbell_write, - &doorbell); - - for (i = 0; i < POLL_LOOPS; i++) { - if (ioread16(&pcch_hdr->status) & CMD_COMPLETE) - break; - } -} - -static inline void pcc_clear_mapping(void) -{ - if (pcch_virt_addr) - iounmap(pcch_virt_addr); - pcch_virt_addr = NULL; -} - -static unsigned int pcc_get_freq(unsigned int cpu) -{ - struct pcc_cpu *pcc_cpu_data; - unsigned int curr_freq; - unsigned int freq_limit; - u16 status; - u32 input_buffer; - u32 output_buffer; - - spin_lock(&pcc_lock); - - dprintk("get: get_freq for CPU %d\n", cpu); - pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); - - input_buffer = 0x1; - iowrite32(input_buffer, - (pcch_virt_addr + pcc_cpu_data->input_offset)); - iowrite16(CMD_GET_FREQ, &pcch_hdr->command); - - pcc_cmd(); - - output_buffer = - ioread32(pcch_virt_addr + pcc_cpu_data->output_offset); - - /* Clear the input buffer - we are done with the current command */ - memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); - - status = ioread16(&pcch_hdr->status); - if (status != CMD_COMPLETE) { - dprintk("get: FAILED: for CPU %d, status is %d\n", - cpu, status); - goto cmd_incomplete; - } - iowrite16(0, &pcch_hdr->status); - curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff)) - / 100) * 1000); - - dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is " - "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n", - cpu, (pcch_virt_addr + pcc_cpu_data->output_offset), - output_buffer, curr_freq); - - freq_limit = (output_buffer >> 8) & 0xff; - if (freq_limit != 0xff) { - dprintk("get: frequency for cpu %d is being temporarily" - " capped at %d\n", cpu, curr_freq); - } - - spin_unlock(&pcc_lock); - return curr_freq; - -cmd_incomplete: - iowrite16(0, &pcch_hdr->status); - spin_unlock(&pcc_lock); - return 0; -} - -static int pcc_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - struct pcc_cpu *pcc_cpu_data; - struct cpufreq_freqs freqs; - u16 status; - u32 input_buffer; - int cpu; - - spin_lock(&pcc_lock); - cpu = policy->cpu; - pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); - - dprintk("target: CPU %d should go to target freq: %d " - "(virtual) input_offset is 0x%x\n", - cpu, target_freq, - (pcch_virt_addr + pcc_cpu_data->input_offset)); - - freqs.new = target_freq; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - input_buffer = 0x1 | (((target_freq * 100) - / (ioread32(&pcch_hdr->nominal) * 1000)) << 8); - iowrite32(input_buffer, - (pcch_virt_addr + pcc_cpu_data->input_offset)); - iowrite16(CMD_SET_FREQ, &pcch_hdr->command); - - pcc_cmd(); - - /* Clear the input buffer - we are done with the current command */ - memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); - - status = ioread16(&pcch_hdr->status); - if (status != CMD_COMPLETE) { - dprintk("target: FAILED for cpu %d, with status: 0x%x\n", - cpu, status); - goto cmd_incomplete; - } - iowrite16(0, &pcch_hdr->status); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - dprintk("target: was SUCCESSFUL for cpu %d\n", cpu); - spin_unlock(&pcc_lock); - - return 0; - -cmd_incomplete: - iowrite16(0, &pcch_hdr->status); - spin_unlock(&pcc_lock); - return -EINVAL; -} - -static int pcc_get_offset(int cpu) -{ - acpi_status status; - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *pccp, *offset; - struct pcc_cpu *pcc_cpu_data; - struct acpi_processor *pr; - int ret = 0; - - pr = per_cpu(processors, cpu); - pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); - - status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer); - if (ACPI_FAILURE(status)) - return -ENODEV; - - pccp = buffer.pointer; - if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) { - ret = -ENODEV; - goto out_free; - }; - - offset = &(pccp->package.elements[0]); - if (!offset || offset->type != ACPI_TYPE_INTEGER) { - ret = -ENODEV; - goto out_free; - } - - pcc_cpu_data->input_offset = offset->integer.value; - - offset = &(pccp->package.elements[1]); - if (!offset || offset->type != ACPI_TYPE_INTEGER) { - ret = -ENODEV; - goto out_free; - } - - pcc_cpu_data->output_offset = offset->integer.value; - - memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); - memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ); - - dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data " - "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n", - cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset); -out_free: - kfree(buffer.pointer); - return ret; -} - -static int __init pcc_cpufreq_do_osc(acpi_handle *handle) -{ - acpi_status status; - struct acpi_object_list input; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object in_params[4]; - union acpi_object *out_obj; - u32 capabilities[2]; - u32 errors; - u32 supported; - int ret = 0; - - input.count = 4; - input.pointer = in_params; - in_params[0].type = ACPI_TYPE_BUFFER; - in_params[0].buffer.length = 16; - in_params[0].buffer.pointer = OSC_UUID; - in_params[1].type = ACPI_TYPE_INTEGER; - in_params[1].integer.value = 1; - in_params[2].type = ACPI_TYPE_INTEGER; - in_params[2].integer.value = 2; - in_params[3].type = ACPI_TYPE_BUFFER; - in_params[3].buffer.length = 8; - in_params[3].buffer.pointer = (u8 *)&capabilities; - - capabilities[0] = OSC_QUERY_ENABLE; - capabilities[1] = 0x1; - - status = acpi_evaluate_object(*handle, "_OSC", &input, &output); - if (ACPI_FAILURE(status)) - return -ENODEV; - - if (!output.length) - return -ENODEV; - - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - ret = -ENODEV; - goto out_free; - } - - errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) { - ret = -ENODEV; - goto out_free; - } - - supported = *((u32 *)(out_obj->buffer.pointer + 4)); - if (!(supported & 0x1)) { - ret = -ENODEV; - goto out_free; - } - - kfree(output.pointer); - capabilities[0] = 0x0; - capabilities[1] = 0x1; - - status = acpi_evaluate_object(*handle, "_OSC", &input, &output); - if (ACPI_FAILURE(status)) - return -ENODEV; - - if (!output.length) - return -ENODEV; - - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - ret = -ENODEV; - goto out_free; - } - - errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) { - ret = -ENODEV; - goto out_free; - } - - supported = *((u32 *)(out_obj->buffer.pointer + 4)); - if (!(supported & 0x1)) { - ret = -ENODEV; - goto out_free; - } - -out_free: - kfree(output.pointer); - return ret; -} - -static int __init pcc_cpufreq_probe(void) -{ - acpi_status status; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - struct pcc_memory_resource *mem_resource; - struct pcc_register_resource *reg_resource; - union acpi_object *out_obj, *member; - acpi_handle handle, osc_handle, pcch_handle; - int ret = 0; - - status = acpi_get_handle(NULL, "\\_SB", &handle); - if (ACPI_FAILURE(status)) - return -ENODEV; - - status = acpi_get_handle(handle, "PCCH", &pcch_handle); - if (ACPI_FAILURE(status)) - return -ENODEV; - - status = acpi_get_handle(handle, "_OSC", &osc_handle); - if (ACPI_SUCCESS(status)) { - ret = pcc_cpufreq_do_osc(&osc_handle); - if (ret) - dprintk("probe: _OSC evaluation did not succeed\n"); - /* Firmware's use of _OSC is optional */ - ret = 0; - } - - status = acpi_evaluate_object(handle, "PCCH", NULL, &output); - if (ACPI_FAILURE(status)) - return -ENODEV; - - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_PACKAGE) { - ret = -ENODEV; - goto out_free; - } - - member = &out_obj->package.elements[0]; - if (member->type != ACPI_TYPE_BUFFER) { - ret = -ENODEV; - goto out_free; - } - - mem_resource = (struct pcc_memory_resource *)member->buffer.pointer; - - dprintk("probe: mem_resource descriptor: 0x%x," - " length: %d, space_id: %d, resource_usage: %d," - " type_specific: %d, granularity: 0x%llx," - " minimum: 0x%llx, maximum: 0x%llx," - " translation_offset: 0x%llx, address_length: 0x%llx\n", - mem_resource->descriptor, mem_resource->length, - mem_resource->space_id, mem_resource->resource_usage, - mem_resource->type_specific, mem_resource->granularity, - mem_resource->minimum, mem_resource->maximum, - mem_resource->translation_offset, - mem_resource->address_length); - - if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) { - ret = -ENODEV; - goto out_free; - } - - pcch_virt_addr = ioremap_nocache(mem_resource->minimum, - mem_resource->address_length); - if (pcch_virt_addr == NULL) { - dprintk("probe: could not map shared mem region\n"); - goto out_free; - } - pcch_hdr = pcch_virt_addr; - - dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr); - dprintk("probe: PCCH header is at physical address: 0x%llx," - " signature: 0x%x, length: %d bytes, major: %d, minor: %d," - " supported features: 0x%x, command field: 0x%x," - " status field: 0x%x, nominal latency: %d us\n", - mem_resource->minimum, ioread32(&pcch_hdr->signature), - ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major), - ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features), - ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status), - ioread32(&pcch_hdr->latency)); - - dprintk("probe: min time between commands: %d us," - " max time between commands: %d us," - " nominal CPU frequency: %d MHz," - " minimum CPU frequency: %d MHz," - " minimum CPU frequency without throttling: %d MHz\n", - ioread32(&pcch_hdr->minimum_time), - ioread32(&pcch_hdr->maximum_time), - ioread32(&pcch_hdr->nominal), - ioread32(&pcch_hdr->throttled_frequency), - ioread32(&pcch_hdr->minimum_frequency)); - - member = &out_obj->package.elements[1]; - if (member->type != ACPI_TYPE_BUFFER) { - ret = -ENODEV; - goto pcch_free; - } - - reg_resource = (struct pcc_register_resource *)member->buffer.pointer; - - doorbell.space_id = reg_resource->space_id; - doorbell.bit_width = reg_resource->bit_width; - doorbell.bit_offset = reg_resource->bit_offset; - doorbell.access_width = 64; - doorbell.address = reg_resource->address; - - dprintk("probe: doorbell: space_id is %d, bit_width is %d, " - "bit_offset is %d, access_width is %d, address is 0x%llx\n", - doorbell.space_id, doorbell.bit_width, doorbell.bit_offset, - doorbell.access_width, reg_resource->address); - - member = &out_obj->package.elements[2]; - if (member->type != ACPI_TYPE_INTEGER) { - ret = -ENODEV; - goto pcch_free; - } - - doorbell_preserve = member->integer.value; - - member = &out_obj->package.elements[3]; - if (member->type != ACPI_TYPE_INTEGER) { - ret = -ENODEV; - goto pcch_free; - } - - doorbell_write = member->integer.value; - - dprintk("probe: doorbell_preserve: 0x%llx," - " doorbell_write: 0x%llx\n", - doorbell_preserve, doorbell_write); - - pcc_cpu_info = alloc_percpu(struct pcc_cpu); - if (!pcc_cpu_info) { - ret = -ENOMEM; - goto pcch_free; - } - - printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency" - " limits: %d MHz, %d MHz\n", PCC_VERSION, - ioread32(&pcch_hdr->minimum_frequency), - ioread32(&pcch_hdr->nominal)); - kfree(output.pointer); - return ret; -pcch_free: - pcc_clear_mapping(); -out_free: - kfree(output.pointer); - return ret; -} - -static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int cpu = policy->cpu; - unsigned int result = 0; - - if (!pcch_virt_addr) { - result = -1; - goto out; - } - - result = pcc_get_offset(cpu); - if (result) { - dprintk("init: PCCP evaluation failed\n"); - goto out; - } - - policy->max = policy->cpuinfo.max_freq = - ioread32(&pcch_hdr->nominal) * 1000; - policy->min = policy->cpuinfo.min_freq = - ioread32(&pcch_hdr->minimum_frequency) * 1000; - policy->cur = pcc_get_freq(cpu); - - if (!policy->cur) { - dprintk("init: Unable to get current CPU frequency\n"); - result = -EINVAL; - goto out; - } - - dprintk("init: policy->max is %d, policy->min is %d\n", - policy->max, policy->min); -out: - return result; -} - -static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy) -{ - return 0; -} - -static struct cpufreq_driver pcc_cpufreq_driver = { - .flags = CPUFREQ_CONST_LOOPS, - .get = pcc_get_freq, - .verify = pcc_cpufreq_verify, - .target = pcc_cpufreq_target, - .init = pcc_cpufreq_cpu_init, - .exit = pcc_cpufreq_cpu_exit, - .name = "pcc-cpufreq", - .owner = THIS_MODULE, -}; - -static int __init pcc_cpufreq_init(void) -{ - int ret; - - if (acpi_disabled) - return 0; - - ret = pcc_cpufreq_probe(); - if (ret) { - dprintk("pcc_cpufreq_init: PCCH evaluation failed\n"); - return ret; - } - - ret = cpufreq_register_driver(&pcc_cpufreq_driver); - - return ret; -} - -static void __exit pcc_cpufreq_exit(void) -{ - cpufreq_unregister_driver(&pcc_cpufreq_driver); - - pcc_clear_mapping(); - - free_percpu(pcc_cpu_info); -} - -MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar"); -MODULE_VERSION(PCC_VERSION); -MODULE_DESCRIPTION("Processor Clocking Control interface driver"); -MODULE_LICENSE("GPL"); - -late_initcall(pcc_cpufreq_init); -module_exit(pcc_cpufreq_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c deleted file mode 100644 index b3379d6a5c57..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) - * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, - * Dominik Brodowski. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/io.h> - -#include <asm/msr.h> - -#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long - as it is unused */ - -#define PFX "powernow-k6: " -static unsigned int busfreq; /* FSB, in 10 kHz */ -static unsigned int max_multiplier; - - -/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ -static struct cpufreq_frequency_table clock_ratio[] = { - {45, /* 000 -> 4.5x */ 0}, - {50, /* 001 -> 5.0x */ 0}, - {40, /* 010 -> 4.0x */ 0}, - {55, /* 011 -> 5.5x */ 0}, - {20, /* 100 -> 2.0x */ 0}, - {30, /* 101 -> 3.0x */ 0}, - {60, /* 110 -> 6.0x */ 0}, - {35, /* 111 -> 3.5x */ 0}, - {0, CPUFREQ_TABLE_END} -}; - - -/** - * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier - * - * Returns the current setting of the frequency multiplier. Core clock - * speed is frequency of the Front-Side Bus multiplied with this value. - */ -static int powernow_k6_get_cpu_multiplier(void) -{ - u64 invalue = 0; - u32 msrval; - - msrval = POWERNOW_IOPORT + 0x1; - wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue = inl(POWERNOW_IOPORT + 0x8); - msrval = POWERNOW_IOPORT + 0x0; - wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ - - return clock_ratio[(invalue >> 5)&7].index; -} - - -/** - * powernow_k6_set_state - set the PowerNow! multiplier - * @best_i: clock_ratio[best_i] is the target multiplier - * - * Tries to change the PowerNow! multiplier - */ -static void powernow_k6_set_state(unsigned int best_i) -{ - unsigned long outvalue = 0, invalue = 0; - unsigned long msrval; - struct cpufreq_freqs freqs; - - if (clock_ratio[best_i].index > max_multiplier) { - printk(KERN_ERR PFX "invalid target frequency\n"); - return; - } - - freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); - freqs.new = busfreq * clock_ratio[best_i].index; - freqs.cpu = 0; /* powernow-k6.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* we now need to transform best_i to the BVC format, see AMD#23446 */ - - outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); - - msrval = POWERNOW_IOPORT + 0x1; - wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue = inl(POWERNOW_IOPORT + 0x8); - invalue = invalue & 0xf; - outvalue = outvalue | invalue; - outl(outvalue , (POWERNOW_IOPORT + 0x8)); - msrval = POWERNOW_IOPORT + 0x0; - wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return; -} - - -/** - * powernow_k6_verify - verifies a new CPUfreq policy - * @policy: new policy - * - * Policy must be within lowest and highest possible CPU Frequency, - * and at least one possible state must be within min and max. - */ -static int powernow_k6_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); -} - - -/** - * powernow_k6_setpolicy - sets a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency - * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * sets a new CPUFreq policy - */ -static int powernow_k6_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, &clock_ratio[0], - target_freq, relation, &newstate)) - return -EINVAL; - - powernow_k6_set_state(newstate); - - return 0; -} - - -static int powernow_k6_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i, f; - int result; - - if (policy->cpu != 0) - return -ENODEV; - - /* get frequencies */ - max_multiplier = powernow_k6_get_cpu_multiplier(); - busfreq = cpu_khz / max_multiplier; - - /* table init */ - for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { - f = clock_ratio[i].index; - if (f > max_multiplier) - clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; - else - clock_ratio[i].frequency = busfreq * f; - } - - /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = 200000; - policy->cur = busfreq * max_multiplier; - - result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); - if (result) - return result; - - cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); - - return 0; -} - - -static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int i; - for (i = 0; i < 8; i++) { - if (i == max_multiplier) - powernow_k6_set_state(i); - } - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int powernow_k6_get(unsigned int cpu) -{ - unsigned int ret; - ret = (busfreq * powernow_k6_get_cpu_multiplier()); - return ret; -} - -static struct freq_attr *powernow_k6_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver powernow_k6_driver = { - .verify = powernow_k6_verify, - .target = powernow_k6_target, - .init = powernow_k6_cpu_init, - .exit = powernow_k6_cpu_exit, - .get = powernow_k6_get, - .name = "powernow-k6", - .owner = THIS_MODULE, - .attr = powernow_k6_attr, -}; - - -/** - * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver - * - * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported - * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero - * on success. - */ -static int __init powernow_k6_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || - ((c->x86_model != 12) && (c->x86_model != 13))) - return -ENODEV; - - if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { - printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n"); - return -EIO; - } - - if (cpufreq_register_driver(&powernow_k6_driver)) { - release_region(POWERNOW_IOPORT, 16); - return -EINVAL; - } - - return 0; -} - - -/** - * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support - * - * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. - */ -static void __exit powernow_k6_exit(void) -{ - cpufreq_unregister_driver(&powernow_k6_driver); - release_region(POWERNOW_IOPORT, 16); -} - - -MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, " - "Dominik Brodowski <linux@brodo.de>"); -MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); -MODULE_LICENSE("GPL"); - -module_init(powernow_k6_init); -module_exit(powernow_k6_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c deleted file mode 100644 index 4a45fd6e41ba..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ /dev/null @@ -1,752 +0,0 @@ -/* - * AMD K7 Powernow driver. - * (C) 2003 Dave Jones on behalf of SuSE Labs. - * (C) 2003-2004 Dave Jones <davej@redhat.com> - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by AMD. - * - * Errata 5: - * CPU may fail to execute a FID/VID change in presence of interrupt. - * - We cli/sti on stepping A0 CPUs around the FID/VID transition. - * Errata 15: - * CPU with half frequency multipliers may hang upon wakeup from disconnect. - * - We disable half multipliers if ACPI is used on A0 stepping CPUs. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/dmi.h> -#include <linux/timex.h> -#include <linux/io.h> - -#include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */ -#include <asm/msr.h> -#include <asm/system.h> - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI -#include <linux/acpi.h> -#include <acpi/processor.h> -#endif - -#include "powernow-k7.h" - -#define PFX "powernow: " - - -struct psb_s { - u8 signature[10]; - u8 tableversion; - u8 flags; - u16 settlingtime; - u8 reserved1; - u8 numpst; -}; - -struct pst_s { - u32 cpuid; - u8 fsbspeed; - u8 maxfid; - u8 startvid; - u8 numpstates; -}; - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI -union powernow_acpi_control_t { - struct { - unsigned long fid:5, - vid:5, - sgtc:20, - res1:2; - } bits; - unsigned long val; -}; -#endif - -#ifdef CONFIG_CPU_FREQ_DEBUG -/* divide by 1000 to get VCore voltage in V. */ -static const int mobile_vid_table[32] = { - 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, - 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, - 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, - 1075, 1050, 1025, 1000, 975, 950, 925, 0, -}; -#endif - -/* divide by 10 to get FID. */ -static const int fid_codes[32] = { - 110, 115, 120, 125, 50, 55, 60, 65, - 70, 75, 80, 85, 90, 95, 100, 105, - 30, 190, 40, 200, 130, 135, 140, 210, - 150, 225, 160, 165, 170, 180, -1, -1, -}; - -/* This parameter is used in order to force ACPI instead of legacy method for - * configuration purpose. - */ - -static int acpi_force; - -static struct cpufreq_frequency_table *powernow_table; - -static unsigned int can_scale_bus; -static unsigned int can_scale_vid; -static unsigned int minimum_speed = -1; -static unsigned int maximum_speed; -static unsigned int number_scales; -static unsigned int fsb; -static unsigned int latency; -static char have_a0; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "powernow-k7", msg) - -static int check_fsb(unsigned int fsbspeed) -{ - int delta; - unsigned int f = fsb / 1000; - - delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; - return delta < 5; -} - -static int check_powernow(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - unsigned int maxei, eax, ebx, ecx, edx; - - if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) { -#ifdef MODULE - printk(KERN_INFO PFX "This module only works with " - "AMD K7 CPUs\n"); -#endif - return 0; - } - - /* Get maximum capabilities */ - maxei = cpuid_eax(0x80000000); - if (maxei < 0x80000007) { /* Any powernow info ? */ -#ifdef MODULE - printk(KERN_INFO PFX "No powernow capabilities detected\n"); -#endif - return 0; - } - - if ((c->x86_model == 6) && (c->x86_mask == 0)) { - printk(KERN_INFO PFX "K7 660[A0] core detected, " - "enabling errata workarounds\n"); - have_a0 = 1; - } - - cpuid(0x80000007, &eax, &ebx, &ecx, &edx); - - /* Check we can actually do something before we say anything.*/ - if (!(edx & (1 << 1 | 1 << 2))) - return 0; - - printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); - - if (edx & 1 << 1) { - printk("frequency"); - can_scale_bus = 1; - } - - if ((edx & (1 << 1 | 1 << 2)) == 0x6) - printk(" and "); - - if (edx & 1 << 2) { - printk("voltage"); - can_scale_vid = 1; - } - - printk(".\n"); - return 1; -} - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI -static void invalidate_entry(unsigned int entry) -{ - powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; -} -#endif - -static int get_ranges(unsigned char *pst) -{ - unsigned int j; - unsigned int speed; - u8 fid, vid; - - powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * - (number_scales + 1)), GFP_KERNEL); - if (!powernow_table) - return -ENOMEM; - - for (j = 0 ; j < number_scales; j++) { - fid = *pst++; - - powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; - powernow_table[j].index = fid; /* lower 8 bits */ - - speed = powernow_table[j].frequency; - - if ((fid_codes[fid] % 10) == 5) { -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - if (have_a0 == 1) - invalidate_entry(j); -#endif - } - - if (speed < minimum_speed) - minimum_speed = speed; - if (speed > maximum_speed) - maximum_speed = speed; - - vid = *pst++; - powernow_table[j].index |= (vid << 8); /* upper 8 bits */ - - dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed/1000, vid, - mobile_vid_table[vid]/1000, - mobile_vid_table[vid]%1000); - } - powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; - powernow_table[number_scales].index = 0; - - return 0; -} - - -static void change_FID(int fid) -{ - union msr_fidvidctl fidvidctl; - - rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); - if (fidvidctl.bits.FID != fid) { - fidvidctl.bits.SGTC = latency; - fidvidctl.bits.FID = fid; - fidvidctl.bits.VIDC = 0; - fidvidctl.bits.FIDC = 1; - wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); - } -} - - -static void change_VID(int vid) -{ - union msr_fidvidctl fidvidctl; - - rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); - if (fidvidctl.bits.VID != vid) { - fidvidctl.bits.SGTC = latency; - fidvidctl.bits.VID = vid; - fidvidctl.bits.FIDC = 0; - fidvidctl.bits.VIDC = 1; - wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); - } -} - - -static void change_speed(unsigned int index) -{ - u8 fid, vid; - struct cpufreq_freqs freqs; - union msr_fidvidstatus fidvidstatus; - int cfid; - - /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in powernow_decode_bios, - * vid are the upper 8 bits. - */ - - fid = powernow_table[index].index & 0xFF; - vid = (powernow_table[index].index & 0xFF00) >> 8; - - freqs.cpu = 0; - - rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); - cfid = fidvidstatus.bits.CFID; - freqs.old = fsb * fid_codes[cfid] / 10; - - freqs.new = powernow_table[index].frequency; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Now do the magic poking into the MSRs. */ - - if (have_a0 == 1) /* A0 errata 5 */ - local_irq_disable(); - - if (freqs.old > freqs.new) { - /* Going down, so change FID first */ - change_FID(fid); - change_VID(vid); - } else { - /* Going up, so change VID first */ - change_VID(vid); - change_FID(fid); - } - - - if (have_a0 == 1) - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -} - - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - -static struct acpi_processor_performance *acpi_processor_perf; - -static int powernow_acpi_init(void) -{ - int i; - int retval = 0; - union powernow_acpi_control_t pc; - - if (acpi_processor_perf != NULL && powernow_table != NULL) { - retval = -EINVAL; - goto err0; - } - - acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), - GFP_KERNEL); - if (!acpi_processor_perf) { - retval = -ENOMEM; - goto err0; - } - - if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, - GFP_KERNEL)) { - retval = -ENOMEM; - goto err05; - } - - if (acpi_processor_register_performance(acpi_processor_perf, 0)) { - retval = -EIO; - goto err1; - } - - if (acpi_processor_perf->control_register.space_id != - ACPI_ADR_SPACE_FIXED_HARDWARE) { - retval = -ENODEV; - goto err2; - } - - if (acpi_processor_perf->status_register.space_id != - ACPI_ADR_SPACE_FIXED_HARDWARE) { - retval = -ENODEV; - goto err2; - } - - number_scales = acpi_processor_perf->state_count; - - if (number_scales < 2) { - retval = -ENODEV; - goto err2; - } - - powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * - (number_scales + 1)), GFP_KERNEL); - if (!powernow_table) { - retval = -ENOMEM; - goto err2; - } - - pc.val = (unsigned long) acpi_processor_perf->states[0].control; - for (i = 0; i < number_scales; i++) { - u8 fid, vid; - struct acpi_processor_px *state = - &acpi_processor_perf->states[i]; - unsigned int speed, speed_mhz; - - pc.val = (unsigned long) state->control; - dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", - i, - (u32) state->core_frequency, - (u32) state->power, - (u32) state->transition_latency, - (u32) state->control, - pc.bits.sgtc); - - vid = pc.bits.vid; - fid = pc.bits.fid; - - powernow_table[i].frequency = fsb * fid_codes[fid] / 10; - powernow_table[i].index = fid; /* lower 8 bits */ - powernow_table[i].index |= (vid << 8); /* upper 8 bits */ - - speed = powernow_table[i].frequency; - speed_mhz = speed / 1000; - - /* processor_perflib will multiply the MHz value by 1000 to - * get a KHz value (e.g. 1266000). However, powernow-k7 works - * with true KHz values (e.g. 1266768). To ensure that all - * powernow frequencies are available, we must ensure that - * ACPI doesn't restrict them, so we round up the MHz value - * to ensure that perflib's computed KHz value is greater than - * or equal to powernow's KHz value. - */ - if (speed % 1000 > 0) - speed_mhz++; - - if ((fid_codes[fid] % 10) == 5) { - if (have_a0 == 1) - invalidate_entry(i); - } - - dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed_mhz, vid, - mobile_vid_table[vid]/1000, - mobile_vid_table[vid]%1000); - - if (state->core_frequency != speed_mhz) { - state->core_frequency = speed_mhz; - dprintk(" Corrected ACPI frequency to %d\n", - speed_mhz); - } - - if (latency < pc.bits.sgtc) - latency = pc.bits.sgtc; - - if (speed < minimum_speed) - minimum_speed = speed; - if (speed > maximum_speed) - maximum_speed = speed; - } - - powernow_table[i].frequency = CPUFREQ_TABLE_END; - powernow_table[i].index = 0; - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - return 0; - -err2: - acpi_processor_unregister_performance(acpi_processor_perf, 0); -err1: - free_cpumask_var(acpi_processor_perf->shared_cpu_map); -err05: - kfree(acpi_processor_perf); -err0: - printk(KERN_WARNING PFX "ACPI perflib can not be used on " - "this platform\n"); - acpi_processor_perf = NULL; - return retval; -} -#else -static int powernow_acpi_init(void) -{ - printk(KERN_INFO PFX "no support for ACPI processor found." - " Please recompile your kernel with ACPI processor\n"); - return -EINVAL; -} -#endif - -static void print_pst_entry(struct pst_s *pst, unsigned int j) -{ - dprintk("PST:%d (@%p)\n", j, pst); - dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", - pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); -} - -static int powernow_decode_bios(int maxfid, int startvid) -{ - struct psb_s *psb; - struct pst_s *pst; - unsigned int i, j; - unsigned char *p; - unsigned int etuple; - unsigned int ret; - - etuple = cpuid_eax(0x80000001); - - for (i = 0xC0000; i < 0xffff0 ; i += 16) { - - p = phys_to_virt(i); - - if (memcmp(p, "AMDK7PNOW!", 10) == 0) { - dprintk("Found PSB header at %p\n", p); - psb = (struct psb_s *) p; - dprintk("Table version: 0x%x\n", psb->tableversion); - if (psb->tableversion != 0x12) { - printk(KERN_INFO PFX "Sorry, only v1.2 tables" - " supported right now\n"); - return -ENODEV; - } - - dprintk("Flags: 0x%x\n", psb->flags); - if ((psb->flags & 1) == 0) - dprintk("Mobile voltage regulator\n"); - else - dprintk("Desktop voltage regulator\n"); - - latency = psb->settlingtime; - if (latency < 100) { - printk(KERN_INFO PFX "BIOS set settling time " - "to %d microseconds. " - "Should be at least 100. " - "Correcting.\n", latency); - latency = 100; - } - dprintk("Settling Time: %d microseconds.\n", - psb->settlingtime); - dprintk("Has %d PST tables. (Only dumping ones " - "relevant to this CPU).\n", - psb->numpst); - - p += sizeof(struct psb_s); - - pst = (struct pst_s *) p; - - for (j = 0; j < psb->numpst; j++) { - pst = (struct pst_s *) p; - number_scales = pst->numpstates; - - if ((etuple == pst->cpuid) && - check_fsb(pst->fsbspeed) && - (maxfid == pst->maxfid) && - (startvid == pst->startvid)) { - print_pst_entry(pst, j); - p = (char *)pst + sizeof(struct pst_s); - ret = get_ranges(p); - return ret; - } else { - unsigned int k; - p = (char *)pst + sizeof(struct pst_s); - for (k = 0; k < number_scales; k++) - p += 2; - } - } - printk(KERN_INFO PFX "No PST tables match this cpuid " - "(0x%x)\n", etuple); - printk(KERN_INFO PFX "This is indicative of a broken " - "BIOS.\n"); - - return -EINVAL; - } - p++; - } - - return -ENODEV; -} - - -static int powernow_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate; - - if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, - relation, &newstate)) - return -EINVAL; - - change_speed(newstate); - - return 0; -} - - -static int powernow_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, powernow_table); -} - -/* - * We use the fact that the bus frequency is somehow - * a multiple of 100000/3 khz, then we compute sgtc according - * to this multiple. - * That way, we match more how AMD thinks all of that work. - * We will then get the same kind of behaviour already tested under - * the "well-known" other OS. - */ -static int __cpuinit fixup_sgtc(void) -{ - unsigned int sgtc; - unsigned int m; - - m = fsb / 3333; - if ((m % 10) >= 5) - m += 5; - - m /= 10; - - sgtc = 100 * m * latency; - sgtc = sgtc / 3; - if (sgtc > 0xfffff) { - printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); - sgtc = 0xfffff; - } - return sgtc; -} - -static unsigned int powernow_get(unsigned int cpu) -{ - union msr_fidvidstatus fidvidstatus; - unsigned int cfid; - - if (cpu) - return 0; - rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); - cfid = fidvidstatus.bits.CFID; - - return fsb * fid_codes[cfid] / 10; -} - - -static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d) -{ - printk(KERN_WARNING PFX - "%s laptop with broken PST tables in BIOS detected.\n", - d->ident); - printk(KERN_WARNING PFX - "You need to downgrade to 3A21 (09/09/2002), or try a newer " - "BIOS than 3A71 (01/20/2003)\n"); - printk(KERN_WARNING PFX - "cpufreq scaling has been disabled as a result of this.\n"); - return 0; -} - -/* - * Some Athlon laptops have really fucked PST tables. - * A BIOS update is all that can save them. - * Mention this, and disable cpufreq. - */ -static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = { - { - .callback = acer_cpufreq_pst, - .ident = "Acer Aspire", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), - DMI_MATCH(DMI_BIOS_VERSION, "3A71"), - }, - }, - { } -}; - -static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy) -{ - union msr_fidvidstatus fidvidstatus; - int result; - - if (policy->cpu != 0) - return -ENODEV; - - rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); - - recalibrate_cpu_khz(); - - fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; - if (!fsb) { - printk(KERN_WARNING PFX "can not determine bus frequency\n"); - return -EINVAL; - } - dprintk("FSB: %3dMHz\n", fsb/1000); - - if (dmi_check_system(powernow_dmi_table) || acpi_force) { - printk(KERN_INFO PFX "PSB/PST known to be broken. " - "Trying ACPI instead\n"); - result = powernow_acpi_init(); - } else { - result = powernow_decode_bios(fidvidstatus.bits.MFID, - fidvidstatus.bits.SVID); - if (result) { - printk(KERN_INFO PFX "Trying ACPI perflib\n"); - maximum_speed = 0; - minimum_speed = -1; - latency = 0; - result = powernow_acpi_init(); - if (result) { - printk(KERN_INFO PFX - "ACPI and legacy methods failed\n"); - } - } else { - /* SGTC use the bus clock as timer */ - latency = fixup_sgtc(); - printk(KERN_INFO PFX "SGTC: %d\n", latency); - } - } - - if (result) - return result; - - printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", - minimum_speed/1000, maximum_speed/1000); - - policy->cpuinfo.transition_latency = - cpufreq_scale(2000000UL, fsb, latency); - - policy->cur = powernow_get(0); - - cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); - - return cpufreq_frequency_table_cpuinfo(policy, powernow_table); -} - -static int powernow_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - if (acpi_processor_perf) { - acpi_processor_unregister_performance(acpi_processor_perf, 0); - free_cpumask_var(acpi_processor_perf->shared_cpu_map); - kfree(acpi_processor_perf); - } -#endif - - kfree(powernow_table); - return 0; -} - -static struct freq_attr *powernow_table_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver powernow_driver = { - .verify = powernow_verify, - .target = powernow_target, - .get = powernow_get, -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - .bios_limit = acpi_processor_get_bios_limit, -#endif - .init = powernow_cpu_init, - .exit = powernow_cpu_exit, - .name = "powernow-k7", - .owner = THIS_MODULE, - .attr = powernow_table_attr, -}; - -static int __init powernow_init(void) -{ - if (check_powernow() == 0) - return -ENODEV; - return cpufreq_register_driver(&powernow_driver); -} - - -static void __exit powernow_exit(void) -{ - cpufreq_unregister_driver(&powernow_driver); -} - -module_param(acpi_force, int, 0444); -MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); - -MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); -MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); -MODULE_LICENSE("GPL"); - -late_initcall(powernow_init); -module_exit(powernow_exit); - diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h deleted file mode 100644 index 35fb4eaf6e1c..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * (C) 2003 Dave Jones. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * AMD-specific information - * - */ - -union msr_fidvidctl { - struct { - unsigned FID:5, // 4:0 - reserved1:3, // 7:5 - VID:5, // 12:8 - reserved2:3, // 15:13 - FIDC:1, // 16 - VIDC:1, // 17 - reserved3:2, // 19:18 - FIDCHGRATIO:1, // 20 - reserved4:11, // 31-21 - SGTC:20, // 32:51 - reserved5:12; // 63:52 - } bits; - unsigned long long val; -}; - -union msr_fidvidstatus { - struct { - unsigned CFID:5, // 4:0 - reserved1:3, // 7:5 - SFID:5, // 12:8 - reserved2:3, // 15:13 - MFID:5, // 20:16 - reserved3:11, // 31:21 - CVID:5, // 36:32 - reserved4:3, // 39:37 - SVID:5, // 44:40 - reserved5:3, // 47:45 - MVID:5, // 52:48 - reserved6:11; // 63:53 - } bits; - unsigned long long val; -}; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c deleted file mode 100644 index 2368e38327b3..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ /dev/null @@ -1,1607 +0,0 @@ -/* - * (c) 2003-2010 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - * - * Support : mark.langsdorf@amd.com - * - * Based on the powernow-k7.c module written by Dave Jones. - * (C) 2003 Dave Jones on behalf of SuSE Labs - * (C) 2004 Dominik Brodowski <linux@brodo.de> - * (C) 2004 Pavel Machek <pavel@ucw.cz> - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by AMD. - * - * Valuable input gratefully received from Dave Jones, Pavel Machek, - * Dominik Brodowski, Jacob Shin, and others. - * Originally developed by Paul Devriendt. - * Processor information obtained from Chapter 9 (Power and Thermal Management) - * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD - * Opteron Processors" available for download from www.amd.com - * - * Tables for specific CPUs can be inferred from - * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf - */ - -#include <linux/kernel.h> -#include <linux/smp.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/cpumask.h> -#include <linux/sched.h> /* for current / set_cpus_allowed() */ -#include <linux/io.h> -#include <linux/delay.h> - -#include <asm/msr.h> - -#include <linux/acpi.h> -#include <linux/mutex.h> -#include <acpi/processor.h> - -#define PFX "powernow-k8: " -#define VERSION "version 2.20.00" -#include "powernow-k8.h" -#include "mperf.h" - -/* serialize freq changes */ -static DEFINE_MUTEX(fidvid_mutex); - -static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); - -static int cpu_family = CPU_OPTERON; - -/* core performance boost */ -static bool cpb_capable, cpb_enabled; -static struct msr __percpu *msrs; - -static struct cpufreq_driver cpufreq_amd64_driver; - -#ifndef CONFIG_SMP -static inline const struct cpumask *cpu_core_mask(int cpu) -{ - return cpumask_of(0); -} -#endif - -/* Return a frequency in MHz, given an input fid */ -static u32 find_freq_from_fid(u32 fid) -{ - return 800 + (fid * 100); -} - -/* Return a frequency in KHz, given an input fid */ -static u32 find_khz_freq_from_fid(u32 fid) -{ - return 1000 * find_freq_from_fid(fid); -} - -static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, - u32 pstate) -{ - return data[pstate].frequency; -} - -/* Return the vco fid for an input fid - * - * Each "low" fid has corresponding "high" fid, and you can get to "low" fids - * only from corresponding high fids. This returns "high" fid corresponding to - * "low" one. - */ -static u32 convert_fid_to_vco_fid(u32 fid) -{ - if (fid < HI_FID_TABLE_BOTTOM) - return 8 + (2 * fid); - else - return fid; -} - -/* - * Return 1 if the pending bit is set. Unless we just instructed the processor - * to transition to a new state, seeing this bit set is really bad news. - */ -static int pending_bit_stuck(void) -{ - u32 lo, hi; - - if (cpu_family == CPU_HW_PSTATE) - return 0; - - rdmsr(MSR_FIDVID_STATUS, lo, hi); - return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; -} - -/* - * Update the global current fid / vid values from the status msr. - * Returns 1 on error. - */ -static int query_current_values_with_pending_wait(struct powernow_k8_data *data) -{ - u32 lo, hi; - u32 i = 0; - - if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - data->currpstate = i; - - /* - * a workaround for family 11h erratum 311 might cause - * an "out-of-range Pstate if the core is in Pstate-0 - */ - if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps)) - data->currpstate = HW_PSTATE_0; - - return 0; - } - do { - if (i++ > 10000) { - dprintk("detected change pending stuck\n"); - return 1; - } - rdmsr(MSR_FIDVID_STATUS, lo, hi); - } while (lo & MSR_S_LO_CHANGE_PENDING); - - data->currvid = hi & MSR_S_HI_CURRENT_VID; - data->currfid = lo & MSR_S_LO_CURRENT_FID; - - return 0; -} - -/* the isochronous relief time */ -static void count_off_irt(struct powernow_k8_data *data) -{ - udelay((1 << data->irt) * 10); - return; -} - -/* the voltage stabilization time */ -static void count_off_vst(struct powernow_k8_data *data) -{ - udelay(data->vstable * VST_UNITS_20US); - return; -} - -/* need to init the control msr to a safe value (for each cpu) */ -static void fidvid_msr_init(void) -{ - u32 lo, hi; - u8 fid, vid; - - rdmsr(MSR_FIDVID_STATUS, lo, hi); - vid = hi & MSR_S_HI_CURRENT_VID; - fid = lo & MSR_S_LO_CURRENT_FID; - lo = fid | (vid << MSR_C_LO_VID_SHIFT); - hi = MSR_C_HI_STP_GNT_BENIGN; - dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); - wrmsr(MSR_FIDVID_CTL, lo, hi); -} - -/* write the new fid value along with the other control fields to the msr */ -static int write_new_fid(struct powernow_k8_data *data, u32 fid) -{ - u32 lo; - u32 savevid = data->currvid; - u32 i = 0; - - if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { - printk(KERN_ERR PFX "internal error - overflow on fid write\n"); - return 1; - } - - lo = fid; - lo |= (data->currvid << MSR_C_LO_VID_SHIFT); - lo |= MSR_C_LO_INIT_FID_VID; - - dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", - fid, lo, data->plllock * PLL_LOCK_CONVERSION); - - do { - wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); - if (i++ > 100) { - printk(KERN_ERR PFX - "Hardware error - pending bit very stuck - " - "no further pstate changes possible\n"); - return 1; - } - } while (query_current_values_with_pending_wait(data)); - - count_off_irt(data); - - if (savevid != data->currvid) { - printk(KERN_ERR PFX - "vid change on fid trans, old 0x%x, new 0x%x\n", - savevid, data->currvid); - return 1; - } - - if (fid != data->currfid) { - printk(KERN_ERR PFX - "fid trans failed, fid 0x%x, curr 0x%x\n", fid, - data->currfid); - return 1; - } - - return 0; -} - -/* Write a new vid to the hardware */ -static int write_new_vid(struct powernow_k8_data *data, u32 vid) -{ - u32 lo; - u32 savefid = data->currfid; - int i = 0; - - if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { - printk(KERN_ERR PFX "internal error - overflow on vid write\n"); - return 1; - } - - lo = data->currfid; - lo |= (vid << MSR_C_LO_VID_SHIFT); - lo |= MSR_C_LO_INIT_FID_VID; - - dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", - vid, lo, STOP_GRANT_5NS); - - do { - wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit " - "very stuck - no further pstate " - "changes possible\n"); - return 1; - } - } while (query_current_values_with_pending_wait(data)); - - if (savefid != data->currfid) { - printk(KERN_ERR PFX "fid changed on vid trans, old " - "0x%x new 0x%x\n", - savefid, data->currfid); - return 1; - } - - if (vid != data->currvid) { - printk(KERN_ERR PFX "vid trans failed, vid 0x%x, " - "curr 0x%x\n", - vid, data->currvid); - return 1; - } - - return 0; -} - -/* - * Reduce the vid by the max of step or reqvid. - * Decreasing vid codes represent increasing voltages: - * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. - */ -static int decrease_vid_code_by_step(struct powernow_k8_data *data, - u32 reqvid, u32 step) -{ - if ((data->currvid - reqvid) > step) - reqvid = data->currvid - step; - - if (write_new_vid(data, reqvid)) - return 1; - - count_off_vst(data); - - return 0; -} - -/* Change hardware pstate by single MSR write */ -static int transition_pstate(struct powernow_k8_data *data, u32 pstate) -{ - wrmsr(MSR_PSTATE_CTRL, pstate, 0); - data->currpstate = pstate; - return 0; -} - -/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ -static int transition_fid_vid(struct powernow_k8_data *data, - u32 reqfid, u32 reqvid) -{ - if (core_voltage_pre_transition(data, reqvid, reqfid)) - return 1; - - if (core_frequency_transition(data, reqfid)) - return 1; - - if (core_voltage_post_transition(data, reqvid)) - return 1; - - if (query_current_values_with_pending_wait(data)) - return 1; - - if ((reqfid != data->currfid) || (reqvid != data->currvid)) { - printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, " - "curr 0x%x 0x%x\n", - smp_processor_id(), - reqfid, reqvid, data->currfid, data->currvid); - return 1; - } - - dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", - smp_processor_id(), data->currfid, data->currvid); - - return 0; -} - -/* Phase 1 - core voltage transition ... setup voltage */ -static int core_voltage_pre_transition(struct powernow_k8_data *data, - u32 reqvid, u32 reqfid) -{ - u32 rvosteps = data->rvo; - u32 savefid = data->currfid; - u32 maxvid, lo, rvomult = 1; - - dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " - "reqvid 0x%x, rvo 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid, reqvid, data->rvo); - - if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP)) - rvomult = 2; - rvosteps *= rvomult; - rdmsr(MSR_FIDVID_STATUS, lo, maxvid); - maxvid = 0x1f & (maxvid >> 16); - dprintk("ph1 maxvid=0x%x\n", maxvid); - if (reqvid < maxvid) /* lower numbers are higher voltages */ - reqvid = maxvid; - - while (data->currvid > reqvid) { - dprintk("ph1: curr 0x%x, req vid 0x%x\n", - data->currvid, reqvid); - if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) - return 1; - } - - while ((rvosteps > 0) && - ((rvomult * data->rvo + data->currvid) > reqvid)) { - if (data->currvid == maxvid) { - rvosteps = 0; - } else { - dprintk("ph1: changing vid for rvo, req 0x%x\n", - data->currvid - 1); - if (decrease_vid_code_by_step(data, data->currvid-1, 1)) - return 1; - rvosteps--; - } - } - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (savefid != data->currfid) { - printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", - data->currfid); - return 1; - } - - dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -/* Phase 2 - core frequency transition */ -static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) -{ - u32 vcoreqfid, vcocurrfid, vcofiddiff; - u32 fid_interval, savevid = data->currvid; - - if (data->currfid == reqfid) { - printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", - data->currfid); - return 0; - } - - dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, " - "reqfid 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid, reqfid); - - vcoreqfid = convert_fid_to_vco_fid(reqfid); - vcocurrfid = convert_fid_to_vco_fid(data->currfid); - vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid - : vcoreqfid - vcocurrfid; - - if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP)) - vcofiddiff = 0; - - while (vcofiddiff > 2) { - (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); - - if (reqfid > data->currfid) { - if (data->currfid > LO_FID_TABLE_TOP) { - if (write_new_fid(data, - data->currfid + fid_interval)) - return 1; - } else { - if (write_new_fid - (data, - 2 + convert_fid_to_vco_fid(data->currfid))) - return 1; - } - } else { - if (write_new_fid(data, data->currfid - fid_interval)) - return 1; - } - - vcocurrfid = convert_fid_to_vco_fid(data->currfid); - vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid - : vcoreqfid - vcocurrfid; - } - - if (write_new_fid(data, reqfid)) - return 1; - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (data->currfid != reqfid) { - printk(KERN_ERR PFX - "ph2: mismatch, failed fid transition, " - "curr 0x%x, req 0x%x\n", - data->currfid, reqfid); - return 1; - } - - if (savevid != data->currvid) { - printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", - savevid, data->currvid); - return 1; - } - - dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -/* Phase 3 - core voltage transition flow ... jump to the final vid. */ -static int core_voltage_post_transition(struct powernow_k8_data *data, - u32 reqvid) -{ - u32 savefid = data->currfid; - u32 savereqvid = reqvid; - - dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid); - - if (reqvid != data->currvid) { - if (write_new_vid(data, reqvid)) - return 1; - - if (savefid != data->currfid) { - printk(KERN_ERR PFX - "ph3: bad fid change, save 0x%x, curr 0x%x\n", - savefid, data->currfid); - return 1; - } - - if (data->currvid != reqvid) { - printk(KERN_ERR PFX - "ph3: failed vid transition\n, " - "req 0x%x, curr 0x%x", - reqvid, data->currvid); - return 1; - } - } - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (savereqvid != data->currvid) { - dprintk("ph3 failed, currvid 0x%x\n", data->currvid); - return 1; - } - - if (savefid != data->currfid) { - dprintk("ph3 failed, currfid changed 0x%x\n", - data->currfid); - return 1; - } - - dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -static void check_supported_cpu(void *_rc) -{ - u32 eax, ebx, ecx, edx; - int *rc = _rc; - - *rc = -ENODEV; - - if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD) - return; - - eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && - ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) - return; - - if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { - if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { - printk(KERN_INFO PFX - "Processor cpuid %x not supported\n", eax); - return; - } - - eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); - if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { - printk(KERN_INFO PFX - "No frequency change capabilities detected\n"); - return; - } - - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & P_STATE_TRANSITION_CAPABLE) - != P_STATE_TRANSITION_CAPABLE) { - printk(KERN_INFO PFX - "Power state transitions not supported\n"); - return; - } - } else { /* must be a HW Pstate capable processor */ - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) - cpu_family = CPU_HW_PSTATE; - else - return; - } - - *rc = 0; -} - -static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, - u8 maxvid) -{ - unsigned int j; - u8 lastfid = 0xff; - - for (j = 0; j < data->numps; j++) { - if (pst[j].vid > LEAST_VID) { - printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n", - j, pst[j].vid); - return -EINVAL; - } - if (pst[j].vid < data->rvo) { - /* vid + rvo >= 0 */ - printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" - " %d\n", j); - return -ENODEV; - } - if (pst[j].vid < maxvid + data->rvo) { - /* vid + rvo >= maxvid */ - printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" - " %d\n", j); - return -ENODEV; - } - if (pst[j].fid > MAX_FID) { - printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate" - " %d\n", j); - return -ENODEV; - } - if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { - /* Only first fid is allowed to be in "low" range */ - printk(KERN_ERR FW_BUG PFX "two low fids - %d : " - "0x%x\n", j, pst[j].fid); - return -EINVAL; - } - if (pst[j].fid < lastfid) - lastfid = pst[j].fid; - } - if (lastfid & 1) { - printk(KERN_ERR FW_BUG PFX "lastfid invalid\n"); - return -EINVAL; - } - if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO FW_BUG PFX - "first fid not from lo freq table\n"); - - return 0; -} - -static void invalidate_entry(struct cpufreq_frequency_table *powernow_table, - unsigned int entry) -{ - powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; -} - -static void print_basics(struct powernow_k8_data *data) -{ - int j; - for (j = 0; j < data->numps; j++) { - if (data->powernow_table[j].frequency != - CPUFREQ_ENTRY_INVALID) { - if (cpu_family == CPU_HW_PSTATE) { - printk(KERN_INFO PFX - " %d : pstate %d (%d MHz)\n", j, - data->powernow_table[j].index, - data->powernow_table[j].frequency/1000); - } else { - printk(KERN_INFO PFX - "fid 0x%x (%d MHz), vid 0x%x\n", - data->powernow_table[j].index & 0xff, - data->powernow_table[j].frequency/1000, - data->powernow_table[j].index >> 8); - } - } - } - if (data->batps) - printk(KERN_INFO PFX "Only %d pstates on battery\n", - data->batps); -} - -static u32 freq_from_fid_did(u32 fid, u32 did) -{ - u32 mhz = 0; - - if (boot_cpu_data.x86 == 0x10) - mhz = (100 * (fid + 0x10)) >> did; - else if (boot_cpu_data.x86 == 0x11) - mhz = (100 * (fid + 8)) >> did; - else - BUG(); - - return mhz * 1000; -} - -static int fill_powernow_table(struct powernow_k8_data *data, - struct pst_s *pst, u8 maxvid) -{ - struct cpufreq_frequency_table *powernow_table; - unsigned int j; - - if (data->batps) { - /* use ACPI support to get full speed on mains power */ - printk(KERN_WARNING PFX - "Only %d pstates usable (use ACPI driver for full " - "range\n", data->batps); - data->numps = data->batps; - } - - for (j = 1; j < data->numps; j++) { - if (pst[j-1].fid >= pst[j].fid) { - printk(KERN_ERR PFX "PST out of sequence\n"); - return -EINVAL; - } - } - - if (data->numps < 2) { - printk(KERN_ERR PFX "no p states to transition\n"); - return -ENODEV; - } - - if (check_pst_table(data, pst, maxvid)) - return -EINVAL; - - powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->numps + 1)), GFP_KERNEL); - if (!powernow_table) { - printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); - return -ENOMEM; - } - - for (j = 0; j < data->numps; j++) { - int freq; - powernow_table[j].index = pst[j].fid; /* lower 8 bits */ - powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ - freq = find_khz_freq_from_fid(pst[j].fid); - powernow_table[j].frequency = freq; - } - powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; - powernow_table[data->numps].index = 0; - - if (query_current_values_with_pending_wait(data)) { - kfree(powernow_table); - return -EIO; - } - - dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); - data->powernow_table = powernow_table; - if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) - print_basics(data); - - for (j = 0; j < data->numps; j++) - if ((pst[j].fid == data->currfid) && - (pst[j].vid == data->currvid)) - return 0; - - dprintk("currfid/vid do not match PST, ignoring\n"); - return 0; -} - -/* Find and validate the PSB/PST table in BIOS. */ -static int find_psb_table(struct powernow_k8_data *data) -{ - struct psb_s *psb; - unsigned int i; - u32 mvs; - u8 maxvid; - u32 cpst = 0; - u32 thiscpuid; - - for (i = 0xc0000; i < 0xffff0; i += 0x10) { - /* Scan BIOS looking for the signature. */ - /* It can not be at ffff0 - it is too big. */ - - psb = phys_to_virt(i); - if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) - continue; - - dprintk("found PSB header at 0x%p\n", psb); - - dprintk("table vers: 0x%x\n", psb->tableversion); - if (psb->tableversion != PSB_VERSION_1_4) { - printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n"); - return -ENODEV; - } - - dprintk("flags: 0x%x\n", psb->flags1); - if (psb->flags1) { - printk(KERN_ERR FW_BUG PFX "unknown flags\n"); - return -ENODEV; - } - - data->vstable = psb->vstable; - dprintk("voltage stabilization time: %d(*20us)\n", - data->vstable); - - dprintk("flags2: 0x%x\n", psb->flags2); - data->rvo = psb->flags2 & 3; - data->irt = ((psb->flags2) >> 2) & 3; - mvs = ((psb->flags2) >> 4) & 3; - data->vidmvs = 1 << mvs; - data->batps = ((psb->flags2) >> 6) & 3; - - dprintk("ramp voltage offset: %d\n", data->rvo); - dprintk("isochronous relief time: %d\n", data->irt); - dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); - - dprintk("numpst: 0x%x\n", psb->num_tables); - cpst = psb->num_tables; - if ((psb->cpuid == 0x00000fc0) || - (psb->cpuid == 0x00000fe0)) { - thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if ((thiscpuid == 0x00000fc0) || - (thiscpuid == 0x00000fe0)) - cpst = 1; - } - if (cpst != 1) { - printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); - return -ENODEV; - } - - data->plllock = psb->plllocktime; - dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); - dprintk("maxfid: 0x%x\n", psb->maxfid); - dprintk("maxvid: 0x%x\n", psb->maxvid); - maxvid = psb->maxvid; - - data->numps = psb->numps; - dprintk("numpstates: 0x%x\n", data->numps); - return fill_powernow_table(data, - (struct pst_s *)(psb+1), maxvid); - } - /* - * If you see this message, complain to BIOS manufacturer. If - * he tells you "we do not support Linux" or some similar - * nonsense, remember that Windows 2000 uses the same legacy - * mechanism that the old Linux PSB driver uses. Tell them it - * is broken with Windows 2000. - * - * The reference to the AMD documentation is chapter 9 in the - * BIOS and Kernel Developer's Guide, which is available on - * www.amd.com - */ - printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); - printk(KERN_ERR PFX "Make sure that your BIOS is up to date" - " and Cool'N'Quiet support is enabled in BIOS setup\n"); - return -ENODEV; -} - -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, - unsigned int index) -{ - u64 control; - - if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) - return; - - control = data->acpi_data.states[index].control; - data->irt = (control >> IRT_SHIFT) & IRT_MASK; - data->rvo = (control >> RVO_SHIFT) & RVO_MASK; - data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; - data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK); - data->vstable = (control >> VST_SHIFT) & VST_MASK; -} - -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) -{ - struct cpufreq_frequency_table *powernow_table; - int ret_val = -ENODEV; - u64 control, status; - - if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { - dprintk("register performance failed: bad ACPI data\n"); - return -EIO; - } - - /* verify the data contained in the ACPI structures */ - if (data->acpi_data.state_count <= 1) { - dprintk("No ACPI P-States\n"); - goto err_out; - } - - control = data->acpi_data.control_register.space_id; - status = data->acpi_data.status_register.space_id; - - if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) { - dprintk("Invalid control/status registers (%x - %x)\n", - control, status); - goto err_out; - } - - /* fill in data->powernow_table */ - powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->acpi_data.state_count + 1)), GFP_KERNEL); - if (!powernow_table) { - dprintk("powernow_table memory alloc failure\n"); - goto err_out; - } - - /* fill in data */ - data->numps = data->acpi_data.state_count; - powernow_k8_acpi_pst_values(data, 0); - - if (cpu_family == CPU_HW_PSTATE) - ret_val = fill_powernow_table_pstate(data, powernow_table); - else - ret_val = fill_powernow_table_fidvid(data, powernow_table); - if (ret_val) - goto err_out_mem; - - powernow_table[data->acpi_data.state_count].frequency = - CPUFREQ_TABLE_END; - powernow_table[data->acpi_data.state_count].index = 0; - data->powernow_table = powernow_table; - - if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) - print_basics(data); - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { - printk(KERN_ERR PFX - "unable to alloc powernow_k8_data cpumask\n"); - ret_val = -ENOMEM; - goto err_out_mem; - } - - return 0; - -err_out_mem: - kfree(powernow_table); - -err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); - - /* data->acpi_data.state_count informs us at ->exit() - * whether ACPI was used */ - data->acpi_data.state_count = 0; - - return ret_val; -} - -static int fill_powernow_table_pstate(struct powernow_k8_data *data, - struct cpufreq_frequency_table *powernow_table) -{ - int i; - u32 hi = 0, lo = 0; - rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); - data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; - - for (i = 0; i < data->acpi_data.state_count; i++) { - u32 index; - - index = data->acpi_data.states[i].control & HW_PSTATE_MASK; - if (index > data->max_hw_pstate) { - printk(KERN_ERR PFX "invalid pstate %d - " - "bad value %d.\n", i, index); - printk(KERN_ERR PFX "Please report to BIOS " - "manufacturer\n"); - invalidate_entry(powernow_table, i); - continue; - } - rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); - if (!(hi & HW_PSTATE_VALID_MASK)) { - dprintk("invalid pstate %d, ignoring\n", index); - invalidate_entry(powernow_table, i); - continue; - } - - powernow_table[i].index = index; - - /* Frequency may be rounded for these */ - if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) - || boot_cpu_data.x86 == 0x11) { - powernow_table[i].frequency = - freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); - } else - powernow_table[i].frequency = - data->acpi_data.states[i].core_frequency * 1000; - } - return 0; -} - -static int fill_powernow_table_fidvid(struct powernow_k8_data *data, - struct cpufreq_frequency_table *powernow_table) -{ - int i; - - for (i = 0; i < data->acpi_data.state_count; i++) { - u32 fid; - u32 vid; - u32 freq, index; - u64 status, control; - - if (data->exttype) { - status = data->acpi_data.states[i].status; - fid = status & EXT_FID_MASK; - vid = (status >> VID_SHIFT) & EXT_VID_MASK; - } else { - control = data->acpi_data.states[i].control; - fid = control & FID_MASK; - vid = (control >> VID_SHIFT) & VID_MASK; - } - - dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); - - index = fid | (vid<<8); - powernow_table[i].index = index; - - freq = find_khz_freq_from_fid(fid); - powernow_table[i].frequency = freq; - - /* verify frequency is OK */ - if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { - dprintk("invalid freq %u kHz, ignoring\n", freq); - invalidate_entry(powernow_table, i); - continue; - } - - /* verify voltage is OK - - * BIOSs are using "off" to indicate invalid */ - if (vid == VID_OFF) { - dprintk("invalid vid %u, ignoring\n", vid); - invalidate_entry(powernow_table, i); - continue; - } - - if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries " - "%u kHz vs. %u kHz\n", freq, - (unsigned int) - (data->acpi_data.states[i].core_frequency - * 1000)); - invalidate_entry(powernow_table, i); - continue; - } - } - return 0; -} - -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) -{ - if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, - data->cpu); - free_cpumask_var(data->acpi_data.shared_cpu_map); -} - -static int get_transition_latency(struct powernow_k8_data *data) -{ - int max_latency = 0; - int i; - for (i = 0; i < data->acpi_data.state_count; i++) { - int cur_latency = data->acpi_data.states[i].transition_latency - + data->acpi_data.states[i].bus_master_latency; - if (cur_latency > max_latency) - max_latency = cur_latency; - } - if (max_latency == 0) { - /* - * Fam 11h and later may return 0 as transition latency. This - * is intended and means "very fast". While cpufreq core and - * governors currently can handle that gracefully, better set it - * to 1 to avoid problems in the future. - */ - if (boot_cpu_data.x86 < 0x11) - printk(KERN_ERR FW_WARN PFX "Invalid zero transition " - "latency\n"); - max_latency = 1; - } - /* value in usecs, needs to be in nanoseconds */ - return 1000 * max_latency; -} - -/* Take a frequency, and issue the fid/vid transition command */ -static int transition_frequency_fidvid(struct powernow_k8_data *data, - unsigned int index) -{ - u32 fid = 0; - u32 vid = 0; - int res, i; - struct cpufreq_freqs freqs; - - dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - - /* fid/vid correctness check for k8 */ - /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in find_psb_table, vid - * are the upper 8 bits. - */ - fid = data->powernow_table[index].index & 0xFF; - vid = (data->powernow_table[index].index & 0xFF00) >> 8; - - dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); - - if (query_current_values_with_pending_wait(data)) - return 1; - - if ((data->currvid == vid) && (data->currfid == fid)) { - dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", - fid, vid); - return 0; - } - - dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", - smp_processor_id(), fid, vid); - freqs.old = find_khz_freq_from_fid(data->currfid); - freqs.new = find_khz_freq_from_fid(fid); - - for_each_cpu(i, data->available_cores) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - res = transition_fid_vid(data, fid, vid); - freqs.new = find_khz_freq_from_fid(data->currfid); - - for_each_cpu(i, data->available_cores) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - return res; -} - -/* Take a frequency, and issue the hardware pstate transition command */ -static int transition_frequency_pstate(struct powernow_k8_data *data, - unsigned int index) -{ - u32 pstate = 0; - int res, i; - struct cpufreq_freqs freqs; - - dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - - /* get MSR index for hardware pstate transition */ - pstate = index & HW_PSTATE_MASK; - if (pstate > data->max_hw_pstate) - return 0; - freqs.old = find_khz_freq_from_pstate(data->powernow_table, - data->currpstate); - freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - - for_each_cpu(i, data->available_cores) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - res = transition_pstate(data, pstate); - freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - - for_each_cpu(i, data->available_cores) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - return res; -} - -/* Driver entry point to switch to the target frequency */ -static int powernowk8_target(struct cpufreq_policy *pol, - unsigned targfreq, unsigned relation) -{ - cpumask_var_t oldmask; - struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); - u32 checkfid; - u32 checkvid; - unsigned int newstate; - int ret = -EIO; - - if (!data) - return -EINVAL; - - checkfid = data->currfid; - checkvid = data->currvid; - - /* only run on specific CPU from here on. */ - /* This is poor form: use a workqueue or smp_call_function_single */ - if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_copy(oldmask, tsk_cpus_allowed(current)); - set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; - } - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing targ, change pending bit set\n"); - goto err_out; - } - - dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", - pol->cpu, targfreq, pol->min, pol->max, relation); - - if (query_current_values_with_pending_wait(data)) - goto err_out; - - if (cpu_family != CPU_HW_PSTATE) { - dprintk("targ: curr fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); - - if ((checkvid != data->currvid) || - (checkfid != data->currfid)) { - printk(KERN_INFO PFX - "error - out of sync, fix 0x%x 0x%x, " - "vid 0x%x 0x%x\n", - checkfid, data->currfid, - checkvid, data->currvid); - } - } - - if (cpufreq_frequency_table_target(pol, data->powernow_table, - targfreq, relation, &newstate)) - goto err_out; - - mutex_lock(&fidvid_mutex); - - powernow_k8_acpi_pst_values(data, newstate); - - if (cpu_family == CPU_HW_PSTATE) - ret = transition_frequency_pstate(data, newstate); - else - ret = transition_frequency_fidvid(data, newstate); - if (ret) { - printk(KERN_ERR PFX "transition frequency failed\n"); - ret = 1; - mutex_unlock(&fidvid_mutex); - goto err_out; - } - mutex_unlock(&fidvid_mutex); - - if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_pstate(data->powernow_table, - newstate); - else - pol->cur = find_khz_freq_from_fid(data->currfid); - ret = 0; - -err_out: - set_cpus_allowed_ptr(current, oldmask); - free_cpumask_var(oldmask); - return ret; -} - -/* Driver entry point to verify the policy and range of frequencies */ -static int powernowk8_verify(struct cpufreq_policy *pol) -{ - struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); - - if (!data) - return -EINVAL; - - return cpufreq_frequency_table_verify(pol, data->powernow_table); -} - -struct init_on_cpu { - struct powernow_k8_data *data; - int rc; -}; - -static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) -{ - struct init_on_cpu *init_on_cpu = _init_on_cpu; - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing init, change pending bit set\n"); - init_on_cpu->rc = -ENODEV; - return; - } - - if (query_current_values_with_pending_wait(init_on_cpu->data)) { - init_on_cpu->rc = -ENODEV; - return; - } - - if (cpu_family == CPU_OPTERON) - fidvid_msr_init(); - - init_on_cpu->rc = 0; -} - -/* per CPU init entry point to the driver */ -static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) -{ - static const char ACPI_PSS_BIOS_BUG_MSG[] = - KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" - FW_BUG PFX "Try again with latest BIOS.\n"; - struct powernow_k8_data *data; - struct init_on_cpu init_on_cpu; - int rc; - struct cpuinfo_x86 *c = &cpu_data(pol->cpu); - - if (!cpu_online(pol->cpu)) - return -ENODEV; - - smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1); - if (rc) - return -ENODEV; - - data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); - if (!data) { - printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); - return -ENOMEM; - } - - data->cpu = pol->cpu; - data->currpstate = HW_PSTATE_INVALID; - - if (powernow_k8_cpu_init_acpi(data)) { - /* - * Use the PSB BIOS structure. This is only available on - * an UP version, and is deprecated by AMD. - */ - if (num_online_cpus() != 1) { - printk_once(ACPI_PSS_BIOS_BUG_MSG); - goto err_out; - } - if (pol->cpu != 0) { - printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " - "CPU other than CPU0. Complain to your BIOS " - "vendor.\n"); - goto err_out; - } - rc = find_psb_table(data); - if (rc) - goto err_out; - - /* Take a crude guess here. - * That guess was in microseconds, so multiply with 1000 */ - pol->cpuinfo.transition_latency = ( - ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + - ((1 << data->irt) * 30)) * 1000; - } else /* ACPI _PSS objects available */ - pol->cpuinfo.transition_latency = get_transition_latency(data); - - /* only run on specific CPU from here on */ - init_on_cpu.data = data; - smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu, - &init_on_cpu, 1); - rc = init_on_cpu.rc; - if (rc != 0) - goto err_out_exit_acpi; - - if (cpu_family == CPU_HW_PSTATE) - cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); - else - cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); - data->available_cores = pol->cpus; - - if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_pstate(data->powernow_table, - data->currpstate); - else - pol->cur = find_khz_freq_from_fid(data->currfid); - dprintk("policy current frequency %d kHz\n", pol->cur); - - /* min/max the cpu is capable of */ - if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { - printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n"); - powernow_k8_cpu_exit_acpi(data); - kfree(data->powernow_table); - kfree(data); - return -EINVAL; - } - - /* Check for APERF/MPERF support in hardware */ - if (cpu_has(c, X86_FEATURE_APERFMPERF)) - cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; - - cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); - - if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current pstate 0x%x\n", - data->currpstate); - else - dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); - - per_cpu(powernow_data, pol->cpu) = data; - - return 0; - -err_out_exit_acpi: - powernow_k8_cpu_exit_acpi(data); - -err_out: - kfree(data); - return -ENODEV; -} - -static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) -{ - struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); - - if (!data) - return -EINVAL; - - powernow_k8_cpu_exit_acpi(data); - - cpufreq_frequency_table_put_attr(pol->cpu); - - kfree(data->powernow_table); - kfree(data); - per_cpu(powernow_data, pol->cpu) = NULL; - - return 0; -} - -static void query_values_on_cpu(void *_err) -{ - int *err = _err; - struct powernow_k8_data *data = __this_cpu_read(powernow_data); - - *err = query_current_values_with_pending_wait(data); -} - -static unsigned int powernowk8_get(unsigned int cpu) -{ - struct powernow_k8_data *data = per_cpu(powernow_data, cpu); - unsigned int khz = 0; - int err; - - if (!data) - return 0; - - smp_call_function_single(cpu, query_values_on_cpu, &err, true); - if (err) - goto out; - - if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_pstate(data->powernow_table, - data->currpstate); - else - khz = find_khz_freq_from_fid(data->currfid); - - -out: - return khz; -} - -static void _cpb_toggle_msrs(bool t) -{ - int cpu; - - get_online_cpus(); - - rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); - - for_each_cpu(cpu, cpu_online_mask) { - struct msr *reg = per_cpu_ptr(msrs, cpu); - if (t) - reg->l &= ~BIT(25); - else - reg->l |= BIT(25); - } - wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); - - put_online_cpus(); -} - -/* - * Switch on/off core performance boosting. - * - * 0=disable - * 1=enable. - */ -static void cpb_toggle(bool t) -{ - if (!cpb_capable) - return; - - if (t && !cpb_enabled) { - cpb_enabled = true; - _cpb_toggle_msrs(t); - printk(KERN_INFO PFX "Core Boosting enabled.\n"); - } else if (!t && cpb_enabled) { - cpb_enabled = false; - _cpb_toggle_msrs(t); - printk(KERN_INFO PFX "Core Boosting disabled.\n"); - } -} - -static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, - size_t count) -{ - int ret = -EINVAL; - unsigned long val = 0; - - ret = strict_strtoul(buf, 10, &val); - if (!ret && (val == 0 || val == 1) && cpb_capable) - cpb_toggle(val); - else - return -EINVAL; - - return count; -} - -static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) -{ - return sprintf(buf, "%u\n", cpb_enabled); -} - -#define define_one_rw(_name) \ -static struct freq_attr _name = \ -__ATTR(_name, 0644, show_##_name, store_##_name) - -define_one_rw(cpb); - -static struct freq_attr *powernow_k8_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - &cpb, - NULL, -}; - -static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .bios_limit = acpi_processor_get_bios_limit, - .init = powernowk8_cpu_init, - .exit = __devexit_p(powernowk8_cpu_exit), - .get = powernowk8_get, - .name = "powernow-k8", - .owner = THIS_MODULE, - .attr = powernow_k8_attr, -}; - -/* - * Clear the boost-disable flag on the CPU_DOWN path so that this cpu - * cannot block the remaining ones from boosting. On the CPU_UP path we - * simply keep the boost-disable flag in sync with the current global - * state. - */ -static int cpb_notify(struct notifier_block *nb, unsigned long action, - void *hcpu) -{ - unsigned cpu = (long)hcpu; - u32 lo, hi; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - - if (!cpb_enabled) { - rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); - lo |= BIT(25); - wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); - } - break; - - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); - lo &= ~BIT(25); - wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); - break; - - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block cpb_nb = { - .notifier_call = cpb_notify, -}; - -/* driver entry point for init */ -static int __cpuinit powernowk8_init(void) -{ - unsigned int i, supported_cpus = 0, cpu; - int rv; - - for_each_online_cpu(i) { - int rc; - smp_call_function_single(i, check_supported_cpu, &rc, 1); - if (rc == 0) - supported_cpus++; - } - - if (supported_cpus != num_online_cpus()) - return -ENODEV; - - printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", - num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); - - if (boot_cpu_has(X86_FEATURE_CPB)) { - - cpb_capable = true; - - msrs = msrs_alloc(); - if (!msrs) { - printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); - return -ENOMEM; - } - - register_cpu_notifier(&cpb_nb); - - rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); - - for_each_cpu(cpu, cpu_online_mask) { - struct msr *reg = per_cpu_ptr(msrs, cpu); - cpb_enabled |= !(!!(reg->l & BIT(25))); - } - - printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", - (cpb_enabled ? "on" : "off")); - } - - rv = cpufreq_register_driver(&cpufreq_amd64_driver); - if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) { - unregister_cpu_notifier(&cpb_nb); - msrs_free(msrs); - msrs = NULL; - } - return rv; -} - -/* driver entry point for term */ -static void __exit powernowk8_exit(void) -{ - dprintk("exit\n"); - - if (boot_cpu_has(X86_FEATURE_CPB)) { - msrs_free(msrs); - msrs = NULL; - - unregister_cpu_notifier(&cpb_nb); - } - - cpufreq_unregister_driver(&cpufreq_amd64_driver); -} - -MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and " - "Mark Langsdorf <mark.langsdorf@amd.com>"); -MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); -MODULE_LICENSE("GPL"); - -late_initcall(powernowk8_init); -module_exit(powernowk8_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h deleted file mode 100644 index df3529b1c02d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * (c) 2003-2006 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - */ - -enum pstate { - HW_PSTATE_INVALID = 0xff, - HW_PSTATE_0 = 0, - HW_PSTATE_1 = 1, - HW_PSTATE_2 = 2, - HW_PSTATE_3 = 3, - HW_PSTATE_4 = 4, - HW_PSTATE_5 = 5, - HW_PSTATE_6 = 6, - HW_PSTATE_7 = 7, -}; - -struct powernow_k8_data { - unsigned int cpu; - - u32 numps; /* number of p-states */ - u32 batps; /* number of p-states supported on battery */ - u32 max_hw_pstate; /* maximum legal hardware pstate */ - - /* these values are constant when the PSB is used to determine - * vid/fid pairings, but are modified during the ->target() call - * when ACPI is used */ - u32 rvo; /* ramp voltage offset */ - u32 irt; /* isochronous relief time */ - u32 vidmvs; /* usable value calculated from mvs */ - u32 vstable; /* voltage stabilization time, units 20 us */ - u32 plllock; /* pll lock time, units 1 us */ - u32 exttype; /* extended interface = 1 */ - - /* keep track of the current fid / vid or pstate */ - u32 currvid; - u32 currfid; - enum pstate currpstate; - - /* the powernow_table includes all frequency and vid/fid pairings: - * fid are the lower 8 bits of the index, vid are the upper 8 bits. - * frequency is in kHz */ - struct cpufreq_frequency_table *powernow_table; - - /* the acpi table needs to be kept. it's only available if ACPI was - * used to determine valid frequency/vid/fid states */ - struct acpi_processor_performance acpi_data; - - /* we need to keep track of associated cores, but let cpufreq - * handle hotplug events - so just point at cpufreq pol->cpus - * structure */ - struct cpumask *available_cores; -}; - -/* processor's cpuid instruction support */ -#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ -#define CPUID_XFAM 0x0ff00000 /* extended family */ -#define CPUID_XFAM_K8 0 -#define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_MASK 0x000c0000 -#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ -#define CPUID_USE_XFAM_XMOD 0x00000f00 -#define CPUID_GET_MAX_CAPABILITIES 0x80000000 -#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 -#define P_STATE_TRANSITION_CAPABLE 6 - -/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ -/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ -/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ -/* the register number is placed in ecx, and the data is returned in edx:eax. */ - -#define MSR_FIDVID_CTL 0xc0010041 -#define MSR_FIDVID_STATUS 0xc0010042 - -/* Field definitions within the FID VID Low Control MSR : */ -#define MSR_C_LO_INIT_FID_VID 0x00010000 -#define MSR_C_LO_NEW_VID 0x00003f00 -#define MSR_C_LO_NEW_FID 0x0000003f -#define MSR_C_LO_VID_SHIFT 8 - -/* Field definitions within the FID VID High Control MSR : */ -#define MSR_C_HI_STP_GNT_TO 0x000fffff - -/* Field definitions within the FID VID Low Status MSR : */ -#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ -#define MSR_S_LO_MAX_RAMP_VID 0x3f000000 -#define MSR_S_LO_MAX_FID 0x003f0000 -#define MSR_S_LO_START_FID 0x00003f00 -#define MSR_S_LO_CURRENT_FID 0x0000003f - -/* Field definitions within the FID VID High Status MSR : */ -#define MSR_S_HI_MIN_WORKING_VID 0x3f000000 -#define MSR_S_HI_MAX_WORKING_VID 0x003f0000 -#define MSR_S_HI_START_VID 0x00003f00 -#define MSR_S_HI_CURRENT_VID 0x0000003f -#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 - - -/* Hardware Pstate _PSS and MSR definitions */ -#define USE_HW_PSTATE 0x00000080 -#define HW_PSTATE_MASK 0x00000007 -#define HW_PSTATE_VALID_MASK 0x80000000 -#define HW_PSTATE_MAX_MASK 0x000000f0 -#define HW_PSTATE_MAX_SHIFT 4 -#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ -#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ -#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ -#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ - -/* define the two driver architectures */ -#define CPU_OPTERON 0 -#define CPU_HW_PSTATE 1 - - -/* - * There are restrictions frequencies have to follow: - * - only 1 entry in the low fid table ( <=1.4GHz ) - * - lowest entry in the high fid table must be >= 2 * the entry in the - * low fid table - * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry - * in the low fid table - * - the parts can only step at <= 200 MHz intervals, odd fid values are - * supported in revision G and later revisions. - * - lowest frequency must be >= interprocessor hypertransport link speed - * (only applies to MP systems obviously) - */ - -/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ -#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ -#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ - -#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ -#define HI_VCOFREQ_TABLE_BOTTOM 1600 - -#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ - -#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ -#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ - -#define MIN_FREQ 800 /* Min and max freqs, per spec */ -#define MAX_FREQ 5000 - -#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ -#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ - -#define VID_OFF 0x3f - -#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ - -#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ - -#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ -#define VST_UNITS_20US 20 /* Voltage Stabilization Time is in units of 20us */ - -/* - * Most values of interest are encoded in a single field of the _PSS - * entries: the "control" value. - */ - -#define IRT_SHIFT 30 -#define RVO_SHIFT 28 -#define EXT_TYPE_SHIFT 27 -#define PLL_L_SHIFT 20 -#define MVS_SHIFT 18 -#define VST_SHIFT 11 -#define VID_SHIFT 6 -#define IRT_MASK 3 -#define RVO_MASK 3 -#define EXT_TYPE_MASK 1 -#define PLL_L_MASK 0x7f -#define MVS_MASK 3 -#define VST_MASK 0x7f -#define VID_MASK 0x1f -#define FID_MASK 0x1f -#define EXT_VID_MASK 0x3f -#define EXT_FID_MASK 0x3f - - -/* - * Version 1.4 of the PSB table. This table is constructed by BIOS and is - * to tell the OS's power management driver which VIDs and FIDs are - * supported by this particular processor. - * If the data in the PSB / PST is wrong, then this driver will program the - * wrong values into hardware, which is very likely to lead to a crash. - */ - -#define PSB_ID_STRING "AMDK7PNOW!" -#define PSB_ID_STRING_LEN 10 - -#define PSB_VERSION_1_4 0x14 - -struct psb_s { - u8 signature[10]; - u8 tableversion; - u8 flags1; - u16 vstable; - u8 flags2; - u8 num_tables; - u32 cpuid; - u8 plllocktime; - u8 maxfid; - u8 maxvid; - u8 numps; -}; - -/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ -struct pst_s { - u8 fid; - u8 vid; -}; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) - -static int core_voltage_pre_transition(struct powernow_k8_data *data, - u32 reqvid, u32 regfid); -static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); -static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); - -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); - -static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); -static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c deleted file mode 100644 index 435a996a613a..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * sc520_freq.c: cpufreq driver for the AMD Elan sc520 - * - * Copyright (C) 2005 Sean Young <sean@mess.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Based on elanfreq.c - * - * 2005-03-30: - initial revision - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> - -#include <linux/delay.h> -#include <linux/cpufreq.h> -#include <linux/timex.h> -#include <linux/io.h> - -#include <asm/msr.h> - -#define MMCR_BASE 0xfffef000 /* The default base address */ -#define OFFS_CPUCTL 0x2 /* CPU Control Register */ - -static __u8 __iomem *cpuctl; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "sc520_freq", msg) -#define PFX "sc520_freq: " - -static struct cpufreq_frequency_table sc520_freq_table[] = { - {0x01, 100000}, - {0x02, 133000}, - {0, CPUFREQ_TABLE_END}, -}; - -static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) -{ - u8 clockspeed_reg = *cpuctl; - - switch (clockspeed_reg & 0x03) { - default: - printk(KERN_ERR PFX "error: cpuctl register has unexpected " - "value %02x\n", clockspeed_reg); - case 0x01: - return 100000; - case 0x02: - return 133000; - } -} - -static void sc520_freq_set_cpu_state(unsigned int state) -{ - - struct cpufreq_freqs freqs; - u8 clockspeed_reg; - - freqs.old = sc520_freq_get_cpu_frequency(0); - freqs.new = sc520_freq_table[state].frequency; - freqs.cpu = 0; /* AMD Elan is UP */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - dprintk("attempting to set frequency to %i kHz\n", - sc520_freq_table[state].frequency); - - local_irq_disable(); - - clockspeed_reg = *cpuctl & ~0x03; - *cpuctl = clockspeed_reg | sc520_freq_table[state].index; - - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -}; - -static int sc520_freq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); -} - -static int sc520_freq_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, sc520_freq_table, - target_freq, relation, &newstate)) - return -EINVAL; - - sc520_freq_set_cpu_state(newstate); - - return 0; -} - - -/* - * Module init and exit code - */ - -static int sc520_freq_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - int result; - - /* capability check */ - if (c->x86_vendor != X86_VENDOR_AMD || - c->x86 != 4 || c->x86_model != 9) - return -ENODEV; - - /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = 1000000; /* 1ms */ - policy->cur = sc520_freq_get_cpu_frequency(0); - - result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); - if (result) - return result; - - cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); - - return 0; -} - - -static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - - -static struct freq_attr *sc520_freq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver sc520_freq_driver = { - .get = sc520_freq_get_cpu_frequency, - .verify = sc520_freq_verify, - .target = sc520_freq_target, - .init = sc520_freq_cpu_init, - .exit = sc520_freq_cpu_exit, - .name = "sc520_freq", - .owner = THIS_MODULE, - .attr = sc520_freq_attr, -}; - - -static int __init sc520_freq_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - int err; - - /* Test if we have the right hardware */ - if (c->x86_vendor != X86_VENDOR_AMD || - c->x86 != 4 || c->x86_model != 9) { - dprintk("no Elan SC520 processor found!\n"); - return -ENODEV; - } - cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); - if (!cpuctl) { - printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); - return -ENOMEM; - } - - err = cpufreq_register_driver(&sc520_freq_driver); - if (err) - iounmap(cpuctl); - - return err; -} - - -static void __exit sc520_freq_exit(void) -{ - cpufreq_unregister_driver(&sc520_freq_driver); - iounmap(cpuctl); -} - - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sean Young <sean@mess.org>"); -MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); - -module_init(sc520_freq_init); -module_exit(sc520_freq_exit); - diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c deleted file mode 100644 index 9b1ff37de46a..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ /dev/null @@ -1,636 +0,0 @@ -/* - * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium - * M (part of the Centrino chipset). - * - * Since the original Pentium M, most new Intel CPUs support Enhanced - * SpeedStep. - * - * Despite the "SpeedStep" in the name, this is almost entirely unlike - * traditional SpeedStep. - * - * Modelled on speedstep.c - * - * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/sched.h> /* current */ -#include <linux/delay.h> -#include <linux/compiler.h> -#include <linux/gfp.h> - -#include <asm/msr.h> -#include <asm/processor.h> -#include <asm/cpufeature.h> - -#define PFX "speedstep-centrino: " -#define MAINTAINER "cpufreq@vger.kernel.org" - -#define dprintk(msg...) \ - cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) - -#define INTEL_MSR_RANGE (0xffff) - -struct cpu_id -{ - __u8 x86; /* CPU family */ - __u8 x86_model; /* model */ - __u8 x86_mask; /* stepping */ -}; - -enum { - CPU_BANIAS, - CPU_DOTHAN_A1, - CPU_DOTHAN_A2, - CPU_DOTHAN_B0, - CPU_MP4HT_D0, - CPU_MP4HT_E0, -}; - -static const struct cpu_id cpu_ids[] = { - [CPU_BANIAS] = { 6, 9, 5 }, - [CPU_DOTHAN_A1] = { 6, 13, 1 }, - [CPU_DOTHAN_A2] = { 6, 13, 2 }, - [CPU_DOTHAN_B0] = { 6, 13, 6 }, - [CPU_MP4HT_D0] = {15, 3, 4 }, - [CPU_MP4HT_E0] = {15, 4, 1 }, -}; -#define N_IDS ARRAY_SIZE(cpu_ids) - -struct cpu_model -{ - const struct cpu_id *cpu_id; - const char *model_name; - unsigned max_freq; /* max clock in kHz */ - - struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ -}; -static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, - const struct cpu_id *x); - -/* Operating points for current CPU */ -static DEFINE_PER_CPU(struct cpu_model *, centrino_model); -static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); - -static struct cpufreq_driver centrino_driver; - -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE - -/* Computes the correct form for IA32_PERF_CTL MSR for a particular - frequency/voltage operating point; frequency in MHz, volts in mV. - This is stored as "index" in the structure. */ -#define OP(mhz, mv) \ - { \ - .frequency = (mhz) * 1000, \ - .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ - } - -/* - * These voltage tables were derived from the Intel Pentium M - * datasheet, document 25261202.pdf, Table 5. I have verified they - * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium - * M. - */ - -/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ -static struct cpufreq_frequency_table banias_900[] = -{ - OP(600, 844), - OP(800, 988), - OP(900, 1004), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ -static struct cpufreq_frequency_table banias_1000[] = -{ - OP(600, 844), - OP(800, 972), - OP(900, 988), - OP(1000, 1004), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ -static struct cpufreq_frequency_table banias_1100[] = -{ - OP( 600, 956), - OP( 800, 1020), - OP( 900, 1100), - OP(1000, 1164), - OP(1100, 1180), - { .frequency = CPUFREQ_TABLE_END } -}; - - -/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ -static struct cpufreq_frequency_table banias_1200[] = -{ - OP( 600, 956), - OP( 800, 1004), - OP( 900, 1020), - OP(1000, 1100), - OP(1100, 1164), - OP(1200, 1180), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.30GHz (Banias) */ -static struct cpufreq_frequency_table banias_1300[] = -{ - OP( 600, 956), - OP( 800, 1260), - OP(1000, 1292), - OP(1200, 1356), - OP(1300, 1388), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.40GHz (Banias) */ -static struct cpufreq_frequency_table banias_1400[] = -{ - OP( 600, 956), - OP( 800, 1180), - OP(1000, 1308), - OP(1200, 1436), - OP(1400, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.50GHz (Banias) */ -static struct cpufreq_frequency_table banias_1500[] = -{ - OP( 600, 956), - OP( 800, 1116), - OP(1000, 1228), - OP(1200, 1356), - OP(1400, 1452), - OP(1500, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.60GHz (Banias) */ -static struct cpufreq_frequency_table banias_1600[] = -{ - OP( 600, 956), - OP( 800, 1036), - OP(1000, 1164), - OP(1200, 1276), - OP(1400, 1420), - OP(1600, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.70GHz (Banias) */ -static struct cpufreq_frequency_table banias_1700[] = -{ - OP( 600, 956), - OP( 800, 1004), - OP(1000, 1116), - OP(1200, 1228), - OP(1400, 1308), - OP(1700, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; -#undef OP - -#define _BANIAS(cpuid, max, name) \ -{ .cpu_id = cpuid, \ - .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ - .max_freq = (max)*1000, \ - .op_points = banias_##max, \ -} -#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) - -/* CPU models, their operating frequency range, and freq/voltage - operating points */ -static struct cpu_model models[] = -{ - _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), - BANIAS(1000), - BANIAS(1100), - BANIAS(1200), - BANIAS(1300), - BANIAS(1400), - BANIAS(1500), - BANIAS(1600), - BANIAS(1700), - - /* NULL model_name is a wildcard */ - { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, - { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, - { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, - { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, - { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, - - { NULL, } -}; -#undef _BANIAS -#undef BANIAS - -static int centrino_cpu_init_table(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); - struct cpu_model *model; - - for(model = models; model->cpu_id != NULL; model++) - if (centrino_verify_cpu_id(cpu, model->cpu_id) && - (model->model_name == NULL || - strcmp(cpu->x86_model_id, model->model_name) == 0)) - break; - - if (model->cpu_id == NULL) { - /* No match at all */ - dprintk("no support for CPU model \"%s\": " - "send /proc/cpuinfo to " MAINTAINER "\n", - cpu->x86_model_id); - return -ENOENT; - } - - if (model->op_points == NULL) { - /* Matched a non-match */ - dprintk("no table support for CPU model \"%s\"\n", - cpu->x86_model_id); - dprintk("try using the acpi-cpufreq driver\n"); - return -ENOENT; - } - - per_cpu(centrino_model, policy->cpu) = model; - - dprintk("found \"%s\": max frequency: %dkHz\n", - model->model_name, model->max_freq); - - return 0; -} - -#else -static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) -{ - return -ENODEV; -} -#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ - -static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, - const struct cpu_id *x) -{ - if ((c->x86 == x->x86) && - (c->x86_model == x->x86_model) && - (c->x86_mask == x->x86_mask)) - return 1; - return 0; -} - -/* To be called only after centrino_model is initialized */ -static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) -{ - int i; - - /* - * Extract clock in kHz from PERF_CTL value - * for centrino, as some DSDTs are buggy. - * Ideally, this can be done using the acpi_data structure. - */ - if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || - (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || - (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { - msr = (msr >> 8) & 0xff; - return msr * 100000; - } - - if ((!per_cpu(centrino_model, cpu)) || - (!per_cpu(centrino_model, cpu)->op_points)) - return 0; - - msr &= 0xffff; - for (i = 0; - per_cpu(centrino_model, cpu)->op_points[i].frequency - != CPUFREQ_TABLE_END; - i++) { - if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) - return per_cpu(centrino_model, cpu)-> - op_points[i].frequency; - } - if (failsafe) - return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; - else - return 0; -} - -/* Return the current CPU frequency in kHz */ -static unsigned int get_cur_freq(unsigned int cpu) -{ - unsigned l, h; - unsigned clock_freq; - - rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h); - clock_freq = extract_clock(l, cpu, 0); - - if (unlikely(clock_freq == 0)) { - /* - * On some CPUs, we can see transient MSR values (which are - * not present in _PSS), while CPU is doing some automatic - * P-state transition (like TM2). Get the last freq set - * in PERF_CTL. - */ - rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h); - clock_freq = extract_clock(l, cpu, 1); - } - return clock_freq; -} - - -static int centrino_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); - unsigned freq; - unsigned l, h; - int ret; - int i; - - /* Only Intel makes Enhanced Speedstep-capable CPUs */ - if (cpu->x86_vendor != X86_VENDOR_INTEL || - !cpu_has(cpu, X86_FEATURE_EST)) - return -ENODEV; - - if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) - centrino_driver.flags |= CPUFREQ_CONST_LOOPS; - - if (policy->cpu != 0) - return -ENODEV; - - for (i = 0; i < N_IDS; i++) - if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) - break; - - if (i != N_IDS) - per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; - - if (!per_cpu(centrino_cpu, policy->cpu)) { - dprintk("found unsupported CPU with " - "Enhanced SpeedStep: send /proc/cpuinfo to " - MAINTAINER "\n"); - return -ENODEV; - } - - if (centrino_cpu_init_table(policy)) { - return -ENODEV; - } - - /* Check to see if Enhanced SpeedStep is enabled, and try to - enable it if not. */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - - if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { - l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; - dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); - wrmsr(MSR_IA32_MISC_ENABLE, l, h); - - /* check to see if it stuck */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { - printk(KERN_INFO PFX - "couldn't enable Enhanced SpeedStep\n"); - return -ENODEV; - } - } - - freq = get_cur_freq(policy->cpu); - policy->cpuinfo.transition_latency = 10000; - /* 10uS transition latency */ - policy->cur = freq; - - dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); - - ret = cpufreq_frequency_table_cpuinfo(policy, - per_cpu(centrino_model, policy->cpu)->op_points); - if (ret) - return (ret); - - cpufreq_frequency_table_get_attr( - per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); - - return 0; -} - -static int centrino_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int cpu = policy->cpu; - - if (!per_cpu(centrino_model, cpu)) - return -ENODEV; - - cpufreq_frequency_table_put_attr(cpu); - - per_cpu(centrino_model, cpu) = NULL; - - return 0; -} - -/** - * centrino_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within this model's frequency range at least one - * border included. - */ -static int centrino_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, - per_cpu(centrino_model, policy->cpu)->op_points); -} - -/** - * centrino_setpolicy - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency - * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int centrino_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; - struct cpufreq_freqs freqs; - int retval = 0; - unsigned int j, k, first_cpu, tmp; - cpumask_var_t covered_cpus; - - if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) - return -ENOMEM; - - if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { - retval = -ENODEV; - goto out; - } - - if (unlikely(cpufreq_frequency_table_target(policy, - per_cpu(centrino_model, cpu)->op_points, - target_freq, - relation, - &newstate))) { - retval = -EINVAL; - goto out; - } - - first_cpu = 1; - for_each_cpu(j, policy->cpus) { - int good_cpu; - - /* cpufreq holds the hotplug lock, so we are safe here */ - if (!cpu_online(j)) - continue; - - /* - * Support for SMP systems. - * Make sure we are running on CPU that wants to change freq - */ - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - good_cpu = cpumask_any_and(policy->cpus, - cpu_online_mask); - else - good_cpu = j; - - if (good_cpu >= nr_cpu_ids) { - dprintk("couldn't limit to CPUs in this domain\n"); - retval = -EAGAIN; - if (first_cpu) { - /* We haven't started the transition yet. */ - goto out; - } - break; - } - - msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; - - if (first_cpu) { - rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h); - if (msr == (oldmsr & 0xffff)) { - dprintk("no change needed - msr was and needs " - "to be %x\n", oldmsr); - retval = 0; - goto out; - } - - freqs.old = extract_clock(oldmsr, cpu, 0); - freqs.new = extract_clock(msr, cpu, 0); - - dprintk("target=%dkHz old=%d new=%d msr=%04x\n", - target_freq, freqs.old, freqs.new, msr); - - for_each_cpu(k, policy->cpus) { - if (!cpu_online(k)) - continue; - freqs.cpu = k; - cpufreq_notify_transition(&freqs, - CPUFREQ_PRECHANGE); - } - - first_cpu = 0; - /* all but 16 LSB are reserved, treat them with care */ - oldmsr &= ~0xffff; - msr &= 0xffff; - oldmsr |= msr; - } - - wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - break; - - cpumask_set_cpu(j, covered_cpus); - } - - for_each_cpu(k, policy->cpus) { - if (!cpu_online(k)) - continue; - freqs.cpu = k; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - if (unlikely(retval)) { - /* - * We have failed halfway through the frequency change. - * We have sent callbacks to policy->cpus and - * MSRs have already been written on coverd_cpus. - * Best effort undo.. - */ - - for_each_cpu(j, covered_cpus) - wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h); - - tmp = freqs.new; - freqs.new = freqs.old; - freqs.old = tmp; - for_each_cpu(j, policy->cpus) { - if (!cpu_online(j)) - continue; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - } - retval = 0; - -out: - free_cpumask_var(covered_cpus); - return retval; -} - -static struct freq_attr* centrino_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver centrino_driver = { - .name = "centrino", /* should be speedstep-centrino, - but there's a 16 char limit */ - .init = centrino_cpu_init, - .exit = centrino_cpu_exit, - .verify = centrino_verify, - .target = centrino_target, - .get = get_cur_freq, - .attr = centrino_attr, - .owner = THIS_MODULE, -}; - - -/** - * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver - * - * Initializes the Enhanced SpeedStep support. Returns -ENODEV on - * unsupported devices, -ENOENT if there's no voltage table for this - * particular CPU model, -EINVAL on problems during initiatization, - * and zero on success. - * - * This is quite picky. Not only does the CPU have to advertise the - * "est" flag in the cpuid capability flags, we look for a specific - * CPU model and stepping, and we need to have the exact model name in - * our voltage tables. That is, be paranoid about not releasing - * someone's valuable magic smoke. - */ -static int __init centrino_init(void) -{ - struct cpuinfo_x86 *cpu = &cpu_data(0); - - if (!cpu_has(cpu, X86_FEATURE_EST)) - return -ENODEV; - - return cpufreq_register_driver(¢rino_driver); -} - -static void __exit centrino_exit(void) -{ - cpufreq_unregister_driver(¢rino_driver); -} - -MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); -MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); -MODULE_LICENSE ("GPL"); - -late_initcall(centrino_init); -module_exit(centrino_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c deleted file mode 100644 index 561758e95180..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ /dev/null @@ -1,452 +0,0 @@ -/* - * (C) 2001 Dave Jones, Arjan van de ven. - * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon reverse engineered information, and on Intel documentation - * for chipsets ICH2-M and ICH3-M. - * - * Many thanks to Ducrot Bruno for finding and fixing the last - * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler - * for extensive testing. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - - -/********************************************************************* - * SPEEDSTEP - DEFINITIONS * - *********************************************************************/ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/pci.h> -#include <linux/sched.h> - -#include "speedstep-lib.h" - - -/* speedstep_chipset: - * It is necessary to know which chipset is used. As accesses to - * this device occur at various places in this module, we need a - * static struct pci_dev * pointing to that device. - */ -static struct pci_dev *speedstep_chipset_dev; - - -/* speedstep_processor - */ -static enum speedstep_processor speedstep_processor; - -static u32 pmbase; - -/* - * There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, - {SPEEDSTEP_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "speedstep-ich", msg) - - -/** - * speedstep_find_register - read the PMBASE address - * - * Returns: -ENODEV if no register could be found - */ -static int speedstep_find_register(void) -{ - if (!speedstep_chipset_dev) - return -ENODEV; - - /* get PMBASE */ - pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); - if (!(pmbase & 0x01)) { - printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return -ENODEV; - } - - pmbase &= 0xFFFFFFFE; - if (!pmbase) { - printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return -ENODEV; - } - - dprintk("pmbase is 0x%x\n", pmbase); - return 0; -} - -/** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - * Tries to change the SpeedStep state. Can be called from - * smp_call_function_single. - */ -static void speedstep_set_state(unsigned int state) -{ - u8 pm2_blk; - u8 value; - unsigned long flags; - - if (state > 0x1) - return; - - /* Disable IRQs */ - local_irq_save(flags); - - /* read state */ - value = inb(pmbase + 0x50); - - dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); - - /* write new state */ - value &= 0xFE; - value |= state; - - dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); - - /* Disable bus master arbitration */ - pm2_blk = inb(pmbase + 0x20); - pm2_blk |= 0x01; - outb(pm2_blk, (pmbase + 0x20)); - - /* Actual transition */ - outb(value, (pmbase + 0x50)); - - /* Restore bus master arbitration */ - pm2_blk &= 0xfe; - outb(pm2_blk, (pmbase + 0x20)); - - /* check if transition was successful */ - value = inb(pmbase + 0x50); - - /* Enable IRQs */ - local_irq_restore(flags); - - dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); - - if (state == (value & 0x1)) - dprintk("change to %u MHz succeeded\n", - speedstep_get_frequency(speedstep_processor) / 1000); - else - printk(KERN_ERR "cpufreq: change failed - I/O error\n"); - - return; -} - -/* Wrapper for smp_call_function_single. */ -static void _speedstep_set_state(void *_state) -{ - speedstep_set_state(*(unsigned int *)_state); -} - -/** - * speedstep_activate - activate SpeedStep control in the chipset - * - * Tries to activate the SpeedStep status and control registers. - * Returns -EINVAL on an unsupported chipset, and zero on success. - */ -static int speedstep_activate(void) -{ - u16 value = 0; - - if (!speedstep_chipset_dev) - return -EINVAL; - - pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); - if (!(value & 0x08)) { - value |= 0x08; - dprintk("activating SpeedStep (TM) registers\n"); - pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); - } - - return 0; -} - - -/** - * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic - * - * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to - * the LPC bridge / PM module which contains all power-management - * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected - * chipset, or zero on failure. - */ -static unsigned int speedstep_detect_chipset(void) -{ - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801DB_12, - PCI_ANY_ID, PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) - return 4; /* 4-M */ - - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801CA_12, - PCI_ANY_ID, PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) - return 3; /* 3-M */ - - - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801BA_10, - PCI_ANY_ID, PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) { - /* speedstep.c causes lockups on Dell Inspirons 8000 and - * 8100 which use a pretty old revision of the 82815 - * host brige. Abort on these systems. - */ - static struct pci_dev *hostbridge; - - hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82815_MC, - PCI_ANY_ID, PCI_ANY_ID, - NULL); - - if (!hostbridge) - return 2; /* 2-M */ - - if (hostbridge->revision < 5) { - dprintk("hostbridge does not support speedstep\n"); - speedstep_chipset_dev = NULL; - pci_dev_put(hostbridge); - return 0; - } - - pci_dev_put(hostbridge); - return 2; /* 2-M */ - } - - return 0; -} - -static void get_freq_data(void *_speed) -{ - unsigned int *speed = _speed; - - *speed = speedstep_get_frequency(speedstep_processor); -} - -static unsigned int speedstep_get(unsigned int cpu) -{ - unsigned int speed; - - /* You're supposed to ensure CPU is online. */ - if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0) - BUG(); - - dprintk("detected %u kHz as current frequency\n", speed); - return speed; -} - -/** - * speedstep_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency - * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int speedstep_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0, policy_cpu; - struct cpufreq_freqs freqs; - int i; - - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], - target_freq, relation, &newstate)) - return -EINVAL; - - policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); - freqs.old = speedstep_get(policy_cpu); - freqs.new = speedstep_freqs[newstate].frequency; - freqs.cpu = policy->cpu; - - dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); - - /* no transition necessary */ - if (freqs.old == freqs.new) - return 0; - - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, - true); - - for_each_cpu(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - return 0; -} - - -/** - * speedstep_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within speedstep_low_freq and speedstep_high_freq, with - * at least one border included. - */ -static int speedstep_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); -} - -struct get_freqs { - struct cpufreq_policy *policy; - int ret; -}; - -static void get_freqs_on_cpu(void *_get_freqs) -{ - struct get_freqs *get_freqs = _get_freqs; - - get_freqs->ret = - speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, - &get_freqs->policy->cpuinfo.transition_latency, - &speedstep_set_state); -} - -static int speedstep_cpu_init(struct cpufreq_policy *policy) -{ - int result; - unsigned int policy_cpu, speed; - struct get_freqs gf; - - /* only run on CPU to be set, or on its sibling */ -#ifdef CONFIG_SMP - cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); -#endif - policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); - - /* detect low and high frequency and transition latency */ - gf.policy = policy; - smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1); - if (gf.ret) - return gf.ret; - - /* get current speed setting */ - speed = speedstep_get(policy_cpu); - if (!speed) - return -EIO; - - dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) - ? "low" : "high", - (speed / 1000)); - - /* cpuinfo and default policy values */ - policy->cur = speed; - - result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); - if (result) - return result; - - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); - - return 0; -} - - -static int speedstep_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static struct freq_attr *speedstep_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver speedstep_driver = { - .name = "speedstep-ich", - .verify = speedstep_verify, - .target = speedstep_target, - .init = speedstep_cpu_init, - .exit = speedstep_cpu_exit, - .get = speedstep_get, - .owner = THIS_MODULE, - .attr = speedstep_attr, -}; - - -/** - * speedstep_init - initializes the SpeedStep CPUFreq driver - * - * Initializes the SpeedStep support. Returns -ENODEV on unsupported - * devices, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init speedstep_init(void) -{ - /* detect processor */ - speedstep_processor = speedstep_detect_processor(); - if (!speedstep_processor) { - dprintk("Intel(R) SpeedStep(TM) capable processor " - "not found\n"); - return -ENODEV; - } - - /* detect chipset */ - if (!speedstep_detect_chipset()) { - dprintk("Intel(R) SpeedStep(TM) for this chipset not " - "(yet) available.\n"); - return -ENODEV; - } - - /* activate speedstep support */ - if (speedstep_activate()) { - pci_dev_put(speedstep_chipset_dev); - return -EINVAL; - } - - if (speedstep_find_register()) - return -ENODEV; - - return cpufreq_register_driver(&speedstep_driver); -} - - -/** - * speedstep_exit - unregisters SpeedStep support - * - * Unregisters SpeedStep support. - */ -static void __exit speedstep_exit(void) -{ - pci_dev_put(speedstep_chipset_dev); - cpufreq_unregister_driver(&speedstep_driver); -} - - -MODULE_AUTHOR("Dave Jones <davej@redhat.com>, " - "Dominik Brodowski <linux@brodo.de>"); -MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets " - "with ICH-M southbridges."); -MODULE_LICENSE("GPL"); - -module_init(speedstep_init); -module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c deleted file mode 100644 index a94ec6be69fa..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ /dev/null @@ -1,481 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> - * - * Licensed under the terms of the GNU GPL License version 2. - * - * Library for common functions for Intel SpeedStep v.1 and v.2 support - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/cpufreq.h> - -#include <asm/msr.h> -#include <asm/tsc.h> -#include "speedstep-lib.h" - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "speedstep-lib", msg) - -#define PFX "speedstep-lib: " - -#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK -static int relaxed_check; -#else -#define relaxed_check 0 -#endif - -/********************************************************************* - * GET PROCESSOR CORE SPEED IN KHZ * - *********************************************************************/ - -static unsigned int pentium3_get_frequency(enum speedstep_processor processor) -{ - /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ - struct { - unsigned int ratio; /* Frequency Multiplier (x10) */ - u8 bitmap; /* power on configuration bits - [27, 25:22] (in MSR 0x2a) */ - } msr_decode_mult[] = { - { 30, 0x01 }, - { 35, 0x05 }, - { 40, 0x02 }, - { 45, 0x06 }, - { 50, 0x00 }, - { 55, 0x04 }, - { 60, 0x0b }, - { 65, 0x0f }, - { 70, 0x09 }, - { 75, 0x0d }, - { 80, 0x0a }, - { 85, 0x26 }, - { 90, 0x20 }, - { 100, 0x2b }, - { 0, 0xff } /* error or unknown value */ - }; - - /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ - struct { - unsigned int value; /* Front Side Bus speed in MHz */ - u8 bitmap; /* power on configuration bits [18: 19] - (in MSR 0x2a) */ - } msr_decode_fsb[] = { - { 66, 0x0 }, - { 100, 0x2 }, - { 133, 0x1 }, - { 0, 0xff} - }; - - u32 msr_lo, msr_tmp; - int i = 0, j = 0; - - /* read MSR 0x2a - we only need the low 32 bits */ - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - msr_tmp = msr_lo; - - /* decode the FSB */ - msr_tmp &= 0x00c0000; - msr_tmp >>= 18; - while (msr_tmp != msr_decode_fsb[i].bitmap) { - if (msr_decode_fsb[i].bitmap == 0xff) - return 0; - i++; - } - - /* decode the multiplier */ - if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) { - dprintk("workaround for early PIIIs\n"); - msr_lo &= 0x03c00000; - } else - msr_lo &= 0x0bc00000; - msr_lo >>= 22; - while (msr_lo != msr_decode_mult[j].bitmap) { - if (msr_decode_mult[j].bitmap == 0xff) - return 0; - j++; - } - - dprintk("speed is %u\n", - (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); - - return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100; -} - - -static unsigned int pentiumM_get_frequency(void) -{ - u32 msr_lo, msr_tmp; - - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - - /* see table B-2 of 24547212.pdf */ - if (msr_lo & 0x00040000) { - printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n", - msr_lo, msr_tmp); - return 0; - } - - msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", - msr_tmp, (msr_tmp * 100 * 1000)); - - return msr_tmp * 100 * 1000; -} - -static unsigned int pentium_core_get_frequency(void) -{ - u32 fsb = 0; - u32 msr_lo, msr_tmp; - int ret; - - rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); - /* see table B-2 of 25366920.pdf */ - switch (msr_lo & 0x07) { - case 5: - fsb = 100000; - break; - case 1: - fsb = 133333; - break; - case 3: - fsb = 166667; - break; - case 2: - fsb = 200000; - break; - case 0: - fsb = 266667; - break; - case 4: - fsb = 333333; - break; - default: - printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); - } - - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", - msr_lo, msr_tmp); - - msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", - msr_tmp, (msr_tmp * fsb)); - - ret = (msr_tmp * fsb); - return ret; -} - - -static unsigned int pentium4_get_frequency(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - u32 msr_lo, msr_hi, mult; - unsigned int fsb = 0; - unsigned int ret; - u8 fsb_code; - - /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency - * to System Bus Frequency Ratio Field in the Processor Frequency - * Configuration Register of the MSR. Therefore the current - * frequency cannot be calculated and has to be measured. - */ - if (c->x86_model < 2) - return cpu_khz; - - rdmsr(0x2c, msr_lo, msr_hi); - - dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); - - /* decode the FSB: see IA-32 Intel (C) Architecture Software - * Developer's Manual, Volume 3: System Prgramming Guide, - * revision #12 in Table B-1: MSRs in the Pentium 4 and - * Intel Xeon Processors, on page B-4 and B-5. - */ - fsb_code = (msr_lo >> 16) & 0x7; - switch (fsb_code) { - case 0: - fsb = 100 * 1000; - break; - case 1: - fsb = 13333 * 10; - break; - case 2: - fsb = 200 * 1000; - break; - } - - if (!fsb) - printk(KERN_DEBUG PFX "couldn't detect FSB speed. " - "Please send an e-mail to <linux@brodo.de>\n"); - - /* Multiplier. */ - mult = msr_lo >> 24; - - dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", - fsb, mult, (fsb * mult)); - - ret = (fsb * mult); - return ret; -} - - -/* Warning: may get called from smp_call_function_single. */ -unsigned int speedstep_get_frequency(enum speedstep_processor processor) -{ - switch (processor) { - case SPEEDSTEP_CPU_PCORE: - return pentium_core_get_frequency(); - case SPEEDSTEP_CPU_PM: - return pentiumM_get_frequency(); - case SPEEDSTEP_CPU_P4D: - case SPEEDSTEP_CPU_P4M: - return pentium4_get_frequency(); - case SPEEDSTEP_CPU_PIII_T: - case SPEEDSTEP_CPU_PIII_C: - case SPEEDSTEP_CPU_PIII_C_EARLY: - return pentium3_get_frequency(processor); - default: - return 0; - }; - return 0; -} -EXPORT_SYMBOL_GPL(speedstep_get_frequency); - - -/********************************************************************* - * DETECT SPEEDSTEP-CAPABLE PROCESSOR * - *********************************************************************/ - -unsigned int speedstep_detect_processor(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - u32 ebx, msr_lo, msr_hi; - - dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); - - if ((c->x86_vendor != X86_VENDOR_INTEL) || - ((c->x86 != 6) && (c->x86 != 0xF))) - return 0; - - if (c->x86 == 0xF) { - /* Intel Mobile Pentium 4-M - * or Intel Mobile Pentium 4 with 533 MHz FSB */ - if (c->x86_model != 2) - return 0; - - ebx = cpuid_ebx(0x00000001); - ebx &= 0x000000FF; - - dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); - - switch (c->x86_mask) { - case 4: - /* - * B-stepping [M-P4-M] - * sample has ebx = 0x0f, production has 0x0e. - */ - if ((ebx == 0x0e) || (ebx == 0x0f)) - return SPEEDSTEP_CPU_P4M; - break; - case 7: - /* - * C-stepping [M-P4-M] - * needs to have ebx=0x0e, else it's a celeron: - * cf. 25130917.pdf / page 7, footnote 5 even - * though 25072120.pdf / page 7 doesn't say - * samples are only of B-stepping... - */ - if (ebx == 0x0e) - return SPEEDSTEP_CPU_P4M; - break; - case 9: - /* - * D-stepping [M-P4-M or M-P4/533] - * - * this is totally strange: CPUID 0x0F29 is - * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. - * The latter need to be sorted out as they don't - * support speedstep. - * Celerons with CPUID 0x0F29 may have either - * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything - * specific. - * M-P4-Ms may have either ebx=0xe or 0xf [see above] - * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] - * also, M-P4M HTs have ebx=0x8, too - * For now, they are distinguished by the model_id - * string - */ - if ((ebx == 0x0e) || - (strstr(c->x86_model_id, - "Mobile Intel(R) Pentium(R) 4") != NULL)) - return SPEEDSTEP_CPU_P4M; - break; - default: - break; - } - return 0; - } - - switch (c->x86_model) { - case 0x0B: /* Intel PIII [Tualatin] */ - /* cpuid_ebx(1) is 0x04 for desktop PIII, - * 0x06 for mobile PIII-M */ - ebx = cpuid_ebx(0x00000001); - dprintk("ebx is %x\n", ebx); - - ebx &= 0x000000FF; - - if (ebx != 0x06) - return 0; - - /* So far all PIII-M processors support SpeedStep. See - * Intel's 24540640.pdf of June 2003 - */ - return SPEEDSTEP_CPU_PIII_T; - - case 0x08: /* Intel PIII [Coppermine] */ - - /* all mobile PIII Coppermines have FSB 100 MHz - * ==> sort out a few desktop PIIIs. */ - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", - msr_lo, msr_hi); - msr_lo &= 0x00c0000; - if (msr_lo != 0x0080000) - return 0; - - /* - * If the processor is a mobile version, - * platform ID has bit 50 set - * it has SpeedStep technology if either - * bit 56 or 57 is set - */ - rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", - msr_lo, msr_hi); - if ((msr_hi & (1<<18)) && - (relaxed_check ? 1 : (msr_hi & (3<<24)))) { - if (c->x86_mask == 0x01) { - dprintk("early PIII version\n"); - return SPEEDSTEP_CPU_PIII_C_EARLY; - } else - return SPEEDSTEP_CPU_PIII_C; - } - - default: - return 0; - } -} -EXPORT_SYMBOL_GPL(speedstep_detect_processor); - - -/********************************************************************* - * DETECT SPEEDSTEP SPEEDS * - *********************************************************************/ - -unsigned int speedstep_get_freqs(enum speedstep_processor processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)) -{ - unsigned int prev_speed; - unsigned int ret = 0; - unsigned long flags; - struct timeval tv1, tv2; - - if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) - return -EINVAL; - - dprintk("trying to determine both speeds\n"); - - /* get current speed */ - prev_speed = speedstep_get_frequency(processor); - if (!prev_speed) - return -EIO; - - dprintk("previous speed is %u\n", prev_speed); - - local_irq_save(flags); - - /* switch to low state */ - set_state(SPEEDSTEP_LOW); - *low_speed = speedstep_get_frequency(processor); - if (!*low_speed) { - ret = -EIO; - goto out; - } - - dprintk("low speed is %u\n", *low_speed); - - /* start latency measurement */ - if (transition_latency) - do_gettimeofday(&tv1); - - /* switch to high state */ - set_state(SPEEDSTEP_HIGH); - - /* end latency measurement */ - if (transition_latency) - do_gettimeofday(&tv2); - - *high_speed = speedstep_get_frequency(processor); - if (!*high_speed) { - ret = -EIO; - goto out; - } - - dprintk("high speed is %u\n", *high_speed); - - if (*low_speed == *high_speed) { - ret = -ENODEV; - goto out; - } - - /* switch to previous state, if necessary */ - if (*high_speed != prev_speed) - set_state(SPEEDSTEP_LOW); - - if (transition_latency) { - *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + - tv2.tv_usec - tv1.tv_usec; - dprintk("transition latency is %u uSec\n", *transition_latency); - - /* convert uSec to nSec and add 20% for safety reasons */ - *transition_latency *= 1200; - - /* check if the latency measurement is too high or too low - * and set it to a safe value (500uSec) in that case - */ - if (*transition_latency > 10000000 || - *transition_latency < 50000) { - printk(KERN_WARNING PFX "frequency transition " - "measured seems out of range (%u " - "nSec), falling back to a safe one of" - "%u nSec.\n", - *transition_latency, 500000); - *transition_latency = 500000; - } - } - -out: - local_irq_restore(flags); - return ret; -} -EXPORT_SYMBOL_GPL(speedstep_get_freqs); - -#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK -module_param(relaxed_check, int, 0444); -MODULE_PARM_DESC(relaxed_check, - "Don't do all checks for speedstep capability."); -#endif - -MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); -MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); -MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h deleted file mode 100644 index 70d9cea1219d..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> - * - * Licensed under the terms of the GNU GPL License version 2. - * - * Library for common functions for Intel SpeedStep v.1 and v.2 support - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - - - -/* processors */ -enum speedstep_processor { - SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ - SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ - SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ - SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ -/* the following processors are not speedstep-capable and are not auto-detected - * in speedstep_detect_processor(). However, their speed can be detected using - * the speedstep_get_frequency() call. */ - SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ - SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ - SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ -}; - -/* speedstep states -- only two of them */ - -#define SPEEDSTEP_HIGH 0x00000000 -#define SPEEDSTEP_LOW 0x00000001 - - -/* detect a speedstep-capable processor */ -extern enum speedstep_processor speedstep_detect_processor(void); - -/* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); - - -/* detect the low and high speeds of the processor. The callback - * set_state"'s first argument is either SPEEDSTEP_HIGH or - * SPEEDSTEP_LOW; the second argument is zero so that no - * cpufreq_notify_transition calls are initiated. - */ -extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c deleted file mode 100644 index 91bc25b67bc1..000000000000 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ /dev/null @@ -1,467 +0,0 @@ -/* - * Intel SpeedStep SMI driver. - * - * (C) 2003 Hiroshi Miura <miura@da-cha.org> - * - * Licensed under the terms of the GNU GPL License version 2. - * - */ - - -/********************************************************************* - * SPEEDSTEP - DEFINITIONS * - *********************************************************************/ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <asm/ist.h> - -#include "speedstep-lib.h" - -/* speedstep system management interface port/command. - * - * These parameters are got from IST-SMI BIOS call. - * If user gives it, these are used. - * - */ -static int smi_port; -static int smi_cmd; -static unsigned int smi_sig; - -/* info about the processor */ -static enum speedstep_processor speedstep_processor; - -/* - * There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, - {SPEEDSTEP_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -#define GET_SPEEDSTEP_OWNER 0 -#define GET_SPEEDSTEP_STATE 1 -#define SET_SPEEDSTEP_STATE 2 -#define GET_SPEEDSTEP_FREQS 4 - -/* how often shall the SMI call be tried if it failed, e.g. because - * of DMA activity going on? */ -#define SMI_TRIES 5 - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ - "speedstep-smi", msg) - -/** - * speedstep_smi_ownership - */ -static int speedstep_smi_ownership(void) -{ - u32 command, result, magic, dummy; - u32 function = GET_SPEEDSTEP_OWNER; - unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - magic = virt_to_phys(magic_data); - - dprintk("trying to obtain ownership with command %x at port %x\n", - command, smi_port); - - __asm__ __volatile__( - "push %%ebp\n" - "out %%al, (%%dx)\n" - "pop %%ebp\n" - : "=D" (result), - "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), - "=S" (dummy) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), - "D" (0), "S" (magic) - : "memory" - ); - - dprintk("result is %x\n", result); - - return result; -} - -/** - * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. - * @low: the low frequency value is placed here - * @high: the high frequency value is placed here - * - * Only available on later SpeedStep-enabled systems, returns false results or - * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing - * shows that the latter occurs if !(ist_info.event & 0xFFFF). - */ -static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high) -{ - u32 command, result = 0, edi, high_mhz, low_mhz, dummy; - u32 state = 0; - u32 function = GET_SPEEDSTEP_FREQS; - - if (!(ist_info.event & 0xFFFF)) { - dprintk("bug #1422 -- can't read freqs from BIOS\n"); - return -ENODEV; - } - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to determine frequencies with command %x at port %x\n", - command, smi_port); - - __asm__ __volatile__( - "push %%ebp\n" - "out %%al, (%%dx)\n" - "pop %%ebp" - : "=a" (result), - "=b" (high_mhz), - "=c" (low_mhz), - "=d" (state), "=D" (edi), "=S" (dummy) - : "a" (command), - "b" (function), - "c" (state), - "d" (smi_port), "S" (0), "D" (0) - ); - - dprintk("result %x, low_freq %u, high_freq %u\n", - result, low_mhz, high_mhz); - - /* abort if results are obviously incorrect... */ - if ((high_mhz + low_mhz) < 600) - return -EINVAL; - - *high = high_mhz * 1000; - *low = low_mhz * 1000; - - return result; -} - -/** - * speedstep_get_state - set the SpeedStep state - * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - */ -static int speedstep_get_state(void) -{ - u32 function = GET_SPEEDSTEP_STATE; - u32 result, state, edi, command, dummy; - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to determine current setting with command %x " - "at port %x\n", command, smi_port); - - __asm__ __volatile__( - "push %%ebp\n" - "out %%al, (%%dx)\n" - "pop %%ebp\n" - : "=a" (result), - "=b" (state), "=D" (edi), - "=c" (dummy), "=d" (dummy), "=S" (dummy) - : "a" (command), "b" (function), "c" (0), - "d" (smi_port), "S" (0), "D" (0) - ); - - dprintk("state is %x, result is %x\n", state, result); - - return state & 1; -} - - -/** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - */ -static void speedstep_set_state(unsigned int state) -{ - unsigned int result = 0, command, new_state, dummy; - unsigned long flags; - unsigned int function = SET_SPEEDSTEP_STATE; - unsigned int retry = 0; - - if (state > 0x1) - return; - - /* Disable IRQs */ - local_irq_save(flags); - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to set frequency to state %u " - "with command %x at port %x\n", - state, command, smi_port); - - do { - if (retry) { - dprintk("retry %u, previous result %u, waiting...\n", - retry, result); - mdelay(retry * 50); - } - retry++; - __asm__ __volatile__( - "push %%ebp\n" - "out %%al, (%%dx)\n" - "pop %%ebp" - : "=b" (new_state), "=D" (result), - "=c" (dummy), "=a" (dummy), - "=d" (dummy), "=S" (dummy) - : "a" (command), "b" (function), "c" (state), - "d" (smi_port), "S" (0), "D" (0) - ); - } while ((new_state != state) && (retry <= SMI_TRIES)); - - /* enable IRQs */ - local_irq_restore(flags); - - if (new_state == state) - dprintk("change to %u MHz succeeded after %u tries " - "with result %u\n", - (speedstep_freqs[new_state].frequency / 1000), - retry, result); - else - printk(KERN_ERR "cpufreq: change to state %u " - "failed with new_state %u and result %u\n", - state, new_state, result); - - return; -} - - -/** - * speedstep_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: new freq - * @relation: - * - * Sets a new CPUFreq policy/freq. - */ -static int speedstep_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int newstate = 0; - struct cpufreq_freqs freqs; - - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], - target_freq, relation, &newstate)) - return -EINVAL; - - freqs.old = speedstep_freqs[speedstep_get_state()].frequency; - freqs.new = speedstep_freqs[newstate].frequency; - freqs.cpu = 0; /* speedstep.c is UP only driver */ - - if (freqs.old == freqs.new) - return 0; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - speedstep_set_state(newstate); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return 0; -} - - -/** - * speedstep_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within speedstep_low_freq and speedstep_high_freq, with - * at least one border included. - */ -static int speedstep_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); -} - - -static int speedstep_cpu_init(struct cpufreq_policy *policy) -{ - int result; - unsigned int speed, state; - unsigned int *low, *high; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - result = speedstep_smi_ownership(); - if (result) { - dprintk("fails in acquiring ownership of a SMI interface.\n"); - return -EINVAL; - } - - /* detect low and high frequency */ - low = &speedstep_freqs[SPEEDSTEP_LOW].frequency; - high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency; - - result = speedstep_smi_get_freqs(low, high); - if (result) { - /* fall back to speedstep_lib.c dection mechanism: - * try both states out */ - dprintk("could not detect low and high frequencies " - "by SMI call.\n"); - result = speedstep_get_freqs(speedstep_processor, - low, high, - NULL, - &speedstep_set_state); - - if (result) { - dprintk("could not detect two different speeds" - " -- aborting.\n"); - return result; - } else - dprintk("workaround worked.\n"); - } - - /* get current speed setting */ - state = speedstep_get_state(); - speed = speedstep_freqs[state].frequency; - - dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) - ? "low" : "high", - (speed / 1000)); - - /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = speed; - - result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); - if (result) - return result; - - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); - - return 0; -} - -static int speedstep_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int speedstep_get(unsigned int cpu) -{ - if (cpu) - return -ENODEV; - return speedstep_get_frequency(speedstep_processor); -} - - -static int speedstep_resume(struct cpufreq_policy *policy) -{ - int result = speedstep_smi_ownership(); - - if (result) - dprintk("fails in re-acquiring ownership of a SMI interface.\n"); - - return result; -} - -static struct freq_attr *speedstep_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver speedstep_driver = { - .name = "speedstep-smi", - .verify = speedstep_verify, - .target = speedstep_target, - .init = speedstep_cpu_init, - .exit = speedstep_cpu_exit, - .get = speedstep_get, - .resume = speedstep_resume, - .owner = THIS_MODULE, - .attr = speedstep_attr, -}; - -/** - * speedstep_init - initializes the SpeedStep CPUFreq driver - * - * Initializes the SpeedStep support. Returns -ENODEV on unsupported - * BIOS, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init speedstep_init(void) -{ - speedstep_processor = speedstep_detect_processor(); - - switch (speedstep_processor) { - case SPEEDSTEP_CPU_PIII_T: - case SPEEDSTEP_CPU_PIII_C: - case SPEEDSTEP_CPU_PIII_C_EARLY: - break; - default: - speedstep_processor = 0; - } - - if (!speedstep_processor) { - dprintk("No supported Intel CPU detected.\n"); - return -ENODEV; - } - - dprintk("signature:0x%.8lx, command:0x%.8lx, " - "event:0x%.8lx, perf_level:0x%.8lx.\n", - ist_info.signature, ist_info.command, - ist_info.event, ist_info.perf_level); - - /* Error if no IST-SMI BIOS or no PARM - sig= 'ISGE' aka 'Intel Speedstep Gate E' */ - if ((ist_info.signature != 0x47534943) && ( - (smi_port == 0) || (smi_cmd == 0))) - return -ENODEV; - - if (smi_sig == 1) - smi_sig = 0x47534943; - else - smi_sig = ist_info.signature; - - /* setup smi_port from MODLULE_PARM or BIOS */ - if ((smi_port > 0xff) || (smi_port < 0)) - return -EINVAL; - else if (smi_port == 0) - smi_port = ist_info.command & 0xff; - - if ((smi_cmd > 0xff) || (smi_cmd < 0)) - return -EINVAL; - else if (smi_cmd == 0) - smi_cmd = (ist_info.command >> 16) & 0xff; - - return cpufreq_register_driver(&speedstep_driver); -} - - -/** - * speedstep_exit - unregisters SpeedStep support - * - * Unregisters SpeedStep support. - */ -static void __exit speedstep_exit(void) -{ - cpufreq_unregister_driver(&speedstep_driver); -} - -module_param(smi_port, int, 0444); -module_param(smi_cmd, int, 0444); -module_param(smi_sig, uint, 0444); - -MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value " - "-- Intel's default setting is 0xb2"); -MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value " - "-- Intel's default setting is 0x82"); -MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the " - "SMI interface."); - -MODULE_AUTHOR("Hiroshi Miura"); -MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface."); -MODULE_LICENSE("GPL"); - -module_init(speedstep_init); -module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index df86bc8c859d..1edf5ba4fb2b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -29,10 +29,10 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { + u64 misc_enable; + /* Unmask CPUID levels if masked: */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - u64 misc_enable; - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { @@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) * (model 2) with the same problem. */ if (c->x86 == 15) { - u64 misc_enable; - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { @@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) } } #endif + + /* + * If fast string is not enabled in IA32_MISC_ENABLE for any reason, + * clear the fast string and enhanced fast string CPU capabilities. + */ + if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { + printk(KERN_INFO "Disabled fast string operations\n"); + setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); + setup_clear_cpu_cap(X86_FEATURE_ERMS); + } + } } #ifdef CONFIG_X86_32 @@ -400,12 +411,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) switch (c->x86_model) { case 5: - if (c->x86_mask == 0) { - if (l2 == 0) - p = "Celeron (Covington)"; - else if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - } + if (l2 == 0) + p = "Celeron (Covington)"; + else if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; break; case 6: diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 1ce1af2899df..c105c533ed94 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -327,7 +327,6 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); - l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; } @@ -454,27 +453,16 @@ int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, { int ret = 0; -#define SUBCACHE_MASK (3UL << 20) -#define SUBCACHE_INDEX 0xfff - - /* - * check whether this slot is already used or - * the index is already disabled - */ + /* check if @slot is already used or the index is already disabled */ ret = amd_get_l3_disable_slot(l3, slot); if (ret >= 0) return -EINVAL; - /* - * check whether the other slot has disabled the - * same index already - */ - if (index == amd_get_l3_disable_slot(l3, !slot)) + if (index > l3->indices) return -EINVAL; - /* do not allow writes outside of allowed bits */ - if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || - ((index & SUBCACHE_INDEX) > l3->indices)) + /* check whether the other slot has disabled the same index already */ + if (index == amd_get_l3_disable_slot(l3, !slot)) return -EINVAL; amd_l3_disable_index(l3, cpu, slot, index); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3385ea26f684..ff1ae9b6464d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -105,20 +105,6 @@ static int cpu_missing; ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); -static int default_decode_mce(struct notifier_block *nb, unsigned long val, - void *data) -{ - pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); - pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); - - return NOTIFY_STOP; -} - -static struct notifier_block mce_dec_nb = { - .notifier_call = default_decode_mce, - .priority = -1, -}; - /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL @@ -212,6 +198,8 @@ void mce_log(struct mce *mce) static void print_mce(struct mce *m) { + int ret = 0; + pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); @@ -239,7 +227,11 @@ static void print_mce(struct mce *m) * Print out human-readable details about the MCE error, * (if the CPU has an implementation for that) */ - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); + ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); + if (ret == NOTIFY_STOP) + return; + + pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -590,7 +582,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { mce_log(&m); atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); - add_taint(TAINT_MACHINE_CHECK); } /* @@ -1722,8 +1713,6 @@ __setup("mce", mcheck_enable); int __init mcheck_init(void) { - atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); - mcheck_intel_therm_init(); return 0; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 167f97b5596e..bb0adad35143 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -509,6 +509,7 @@ recurse: out_free: if (b) { kobject_put(&b->kobj); + list_del(&b->miscj); kfree(b); } return err; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9da97f..27c625178bf1 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -187,8 +187,6 @@ static int therm_throt_process(bool new_event, int event, int level) this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); - - add_taint(TAINT_MACHINE_CHECK); return 1; } if (old_event) { @@ -355,7 +353,6 @@ static void notify_thresholds(__u64 msr_val) static void intel_thermal_interrupt(void) { __u64 msr_val; - struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); rdmsrl(MSR_IA32_THERM_STATUS, msr_val); @@ -367,19 +364,19 @@ static void intel_thermal_interrupt(void) CORE_LEVEL) != 0) mce_log_therm_throt_event(CORE_THROTTLED | msr_val); - if (cpu_has(c, X86_FEATURE_PLN)) + if (this_cpu_has(X86_FEATURE_PLN)) if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, CORE_LEVEL) != 0) mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); - if (cpu_has(c, X86_FEATURE_PTS)) { + if (this_cpu_has(X86_FEATURE_PTS)) { rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, PACKAGE_LEVEL) != 0) mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); - if (cpu_has(c, X86_FEATURE_PLN)) + if (this_cpu_has(X86_FEATURE_PLN)) if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, @@ -393,7 +390,6 @@ static void unexpected_thermal_interrupt(void) { printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); } static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; @@ -446,18 +442,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = lvtthmr_init; /* * The initial value of thermal LVT entries on all APs always reads * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI * sequence to them and LVT registers are reset to 0s except for * the mask bits which are set to 1s when APs receive INIT IPI. - * Always restore the value that BIOS has programmed on AP based on - * BSP's info we saved since BIOS is always setting the same value - * for all threads/cores + * If BIOS takes over the thermal interrupt and sets its interrupt + * delivery mode to SMI (not fixed), it restores the value that the + * BIOS has programmed on AP based on BSP's info we saved since BIOS + * is always setting the same value for all threads/cores. */ - apic_write(APIC_LVTTHMR, lvtthmr_init); + if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) + apic_write(APIC_LVTTHMR, lvtthmr_init); - h = lvtthmr_init; if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e638689279d3..3a0338b4b179 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -31,6 +31,7 @@ #include <asm/nmi.h> #include <asm/compat.h> #include <asm/smp.h> +#include <asm/alternative.h> #if 0 #undef wrmsrl @@ -363,12 +364,18 @@ again: return new_raw_count; } -/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ static inline int x86_pmu_addr_offset(int index) { - if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) - return index << 1; - return index; + int offset; + + /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ + alternative_io(ASM_NOP2, + "shll $1, %%eax", + X86_FEATURE_PERFCTR_CORE, + "=a" (offset), + "a" (index)); + + return offset; } static inline unsigned int x86_pmu_config_addr(int index) @@ -1766,17 +1773,6 @@ static struct pmu pmu = { * callchain support */ -static void -backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - static int backtrace_stack(void *data, char *name) { return 0; @@ -1790,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, .walk_stack = print_context_stack_bp, diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index cf4e369cea67..fe29c1d2219e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids */ static const u64 amd_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ }; static u64 amd_pmu_event_map(int hw_event) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 447a28de6f09..41178c826c48 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -36,7 +36,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; -static struct event_constraint intel_core_event_constraints[] = +static struct event_constraint intel_core_event_constraints[] __read_mostly = { INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ @@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct event_constraint intel_core2_event_constraints[] = +static struct event_constraint intel_core2_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct event_constraint intel_nehalem_event_constraints[] = +static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct extra_reg intel_nehalem_extra_regs[] = +static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), EVENT_EXTRA_END }; -static struct event_constraint intel_nehalem_percore_constraints[] = +static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = { INTEL_EVENT_CONSTRAINT(0xb7, 0), EVENT_CONSTRAINT_END }; -static struct event_constraint intel_westmere_event_constraints[] = +static struct event_constraint intel_westmere_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct event_constraint intel_snb_event_constraints[] = +static struct event_constraint intel_snb_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] = EVENT_CONSTRAINT_END }; -static struct extra_reg intel_westmere_extra_regs[] = +static struct extra_reg intel_westmere_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), EVENT_EXTRA_END }; -static struct event_constraint intel_westmere_percore_constraints[] = +static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = { INTEL_EVENT_CONSTRAINT(0xb7, 0), INTEL_EVENT_CONSTRAINT(0xbb, 0), EVENT_CONSTRAINT_END }; -static struct event_constraint intel_gen_event_constraints[] = +static struct event_constraint intel_gen_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -1440,6 +1440,11 @@ static __init int intel_pmu_init(void) x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.extra_regs = intel_nehalem_extra_regs; + /* UOPS_ISSUED.STALLED_CYCLES */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; + if (ebx & 0x40) { /* * Erratum AAJ80 detected, we work it around by using @@ -1480,6 +1485,12 @@ static __init int intel_pmu_init(void) x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; x86_pmu.extra_regs = intel_westmere_extra_regs; + + /* UOPS_ISSUED.STALLED_CYCLES */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; + pr_cont("Westmere events, "); break; @@ -1491,6 +1502,12 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_events; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; + pr_cont("SandyBridge events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index e93fcd55fae1..ead584fb6a7d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = { .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, .escr_emask = - P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_X87_ASSIST] = { @@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - data.addr = 0; - data.raw = NULL; + perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); @@ -1197,7 +1196,7 @@ static __init int p4_pmu_init(void) { unsigned int low, high; - /* If we get stripped -- indexig fails */ + /* If we get stripped -- indexing fails */ BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); rdmsr(MSR_IA32_MISC_ENABLE, low, high); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index e2a3f0606da4..1aae78f775fc 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo, } EXPORT_SYMBOL_GPL(print_context_stack_bp); - -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - printk(data); - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s%s\n", (char *)data, msg); -} - static int print_trace_stack(void *data, char *name) { printk("%s <%s> ", (char *)data, name); @@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, .address = print_trace_address, .walk_stack = print_context_stack, @@ -279,7 +263,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); - sysfs_printk_last_file(); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) return 1; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index a93742a57468..0ba15a6cc57e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -260,9 +260,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) return mod_code_status; } -static unsigned char *ftrace_nop_replace(void) +static const unsigned char *ftrace_nop_replace(void) { - return ideal_nop5; + return ideal_nops[NOP_ATOMIC5]; } static int diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index d6d6bb361931..3bb08509a7a1 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -23,7 +23,6 @@ static void __init i386_default_early_setup(void) { /* Initialize 32bit specific setup functions */ - x86_init.resources.probe_roms = probe_roms; x86_init.resources.reserve_resources = i386_reserve_resources; x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index bfe8f729e086..6781765b3a0d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -217,7 +217,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { } /* * Common hpet info */ -static unsigned long hpet_period; +static unsigned long hpet_freq; static void hpet_legacy_set_mode(enum clock_event_mode mode, struct clock_event_device *evt); @@ -232,7 +232,6 @@ static struct clock_event_device hpet_clockevent = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = hpet_legacy_set_mode, .set_next_event = hpet_legacy_next_event, - .shift = 32, .irq = 0, .rating = 50, }; @@ -290,28 +289,12 @@ static void hpet_legacy_clockevent_register(void) hpet_enable_legacy_int(); /* - * The mult factor is defined as (include/linux/clockchips.h) - * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h) - * hpet_period is in units of femtoseconds (per cycle), so - * mult/2^shift = cyc/ns = 10^6/hpet_period - * mult = (10^6 * 2^shift)/hpet_period - * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period - */ - hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC, - hpet_period, hpet_clockevent.shift); - /* Calculate the min / max delta */ - hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, - &hpet_clockevent); - /* Setup minimum reprogramming delta. */ - hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, - &hpet_clockevent); - - /* * Start hpet with the boot cpu mask and make it * global after the IO_APIC has been initialized. */ hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); - clockevents_register_device(&hpet_clockevent); + clockevents_config_and_register(&hpet_clockevent, hpet_freq, + HPET_MIN_PROG_DELTA, 0x7FFFFFFF); global_clock_event = &hpet_clockevent; printk(KERN_DEBUG "hpet clockevent registered\n"); } @@ -549,7 +532,6 @@ static int hpet_setup_irq(struct hpet_dev *dev) static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) { struct clock_event_device *evt = &hdev->evt; - uint64_t hpet_freq; WARN_ON(cpu != smp_processor_id()); if (!(hdev->flags & HPET_DEV_VALID)) @@ -571,24 +553,10 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) evt->set_mode = hpet_msi_set_mode; evt->set_next_event = hpet_msi_next_event; - evt->shift = 32; - - /* - * The period is a femto seconds value. We need to calculate the - * scaled math multiplication factor for nanosecond to hpet tick - * conversion. - */ - hpet_freq = FSEC_PER_SEC; - do_div(hpet_freq, hpet_period); - evt->mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, evt->shift); - /* Calculate the max delta */ - evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt); - /* 5 usec minimum reprogramming delta. */ - evt->min_delta_ns = 5000; - evt->cpumask = cpumask_of(hdev->cpu); - clockevents_register_device(evt); + + clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, + 0x7FFFFFFF); } #ifdef CONFIG_HPET @@ -792,7 +760,6 @@ static struct clocksource clocksource_hpet = { static int hpet_clocksource_register(void) { u64 start, now; - u64 hpet_freq; cycle_t t1; /* Start the counter */ @@ -819,24 +786,7 @@ static int hpet_clocksource_register(void) return -ENODEV; } - /* - * The definition of mult is (include/linux/clocksource.h) - * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc - * so we first need to convert hpet_period to ns/cyc units: - * mult/2^shift = ns/cyc = hpet_period/10^6 - * mult = (hpet_period * 2^shift)/10^6 - * mult = (hpet_period << shift)/FSEC_PER_NSEC - */ - - /* Need to convert hpet_period (fsec/cyc) to cyc/sec: - * - * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc) - * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period - */ - hpet_freq = FSEC_PER_SEC; - do_div(hpet_freq, hpet_period); clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); - return 0; } @@ -845,7 +795,9 @@ static int hpet_clocksource_register(void) */ int __init hpet_enable(void) { + unsigned long hpet_period; unsigned int id; + u64 freq; int i; if (!is_hpet_capable()) @@ -884,6 +836,14 @@ int __init hpet_enable(void) goto out_nohpet; /* + * The period is a femto seconds value. Convert it to a + * frequency. + */ + freq = FSEC_PER_SEC; + do_div(freq, hpet_period); + hpet_freq = freq; + + /* * Read the HPET ID register to retrieve the IRQ routing * information and the number of channels */ diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 2dfd31597443..fb66dc9e36cb 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -93,7 +93,6 @@ static struct clock_event_device pit_ce = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = init_pit_timer, .set_next_event = pit_next_event, - .shift = 32, .irq = 0, }; @@ -108,90 +107,12 @@ void __init setup_pit_timer(void) * IO_APIC has been initialized. */ pit_ce.cpumask = cpumask_of(smp_processor_id()); - pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift); - pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce); - pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce); - clockevents_register_device(&pit_ce); + clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF); global_clock_event = &pit_ce; } #ifndef CONFIG_X86_64 -/* - * Since the PIT overflows every tick, its not very useful - * to just read by itself. So use jiffies to emulate a free - * running counter: - */ -static cycle_t pit_read(struct clocksource *cs) -{ - static int old_count; - static u32 old_jifs; - unsigned long flags; - int count; - u32 jifs; - - raw_spin_lock_irqsave(&i8253_lock, flags); - /* - * Although our caller may have the read side of xtime_lock, - * this is now a seqlock, and we are cheating in this routine - * by having side effects on state that we cannot undo if - * there is a collision on the seqlock and our caller has to - * retry. (Namely, old_jifs and old_count.) So we must treat - * jiffies as volatile despite the lock. We read jiffies - * before latching the timer count to guarantee that although - * the jiffies value might be older than the count (that is, - * the counter may underflow between the last point where - * jiffies was incremented and the point where we latch the - * count), it cannot be newer. - */ - jifs = jiffies; - outb_pit(0x00, PIT_MODE); /* latch the count ASAP */ - count = inb_pit(PIT_CH0); /* read the latched count */ - count |= inb_pit(PIT_CH0) << 8; - - /* VIA686a test code... reset the latch if count > max + 1 */ - if (count > LATCH) { - outb_pit(0x34, PIT_MODE); - outb_pit(LATCH & 0xff, PIT_CH0); - outb_pit(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - /* - * It's possible for count to appear to go the wrong way for a - * couple of reasons: - * - * 1. The timer counter underflows, but we haven't handled the - * resulting interrupt and incremented jiffies yet. - * 2. Hardware problem with the timer, not giving us continuous time, - * the counter does small "jumps" upwards on some Pentium systems, - * (see c't 95/10 page 335 for Neptun bug.) - * - * Previous attempts to handle these cases intelligently were - * buggy, so we just do the simple thing now. - */ - if (count > old_count && jifs == old_jifs) - count = old_count; - - old_count = count; - old_jifs = jifs; - - raw_spin_unlock_irqrestore(&i8253_lock, flags); - - count = (LATCH - 1) - count; - - return (cycle_t)(jifs * LATCH) + count; -} - -static struct clocksource pit_cs = { - .name = "pit", - .rating = 110, - .read = pit_read, - .mask = CLOCKSOURCE_MASK(32), - .mult = 0, - .shift = 20, -}; - static int __init init_pit_clocksource(void) { /* @@ -205,10 +126,7 @@ static int __init init_pit_clocksource(void) pit_ce.mode != CLOCK_EVT_MODE_PERIODIC) return 0; - pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift); - - return clocksource_register(&pit_cs); + return clocksource_i8253_init(); } arch_initcall(init_pit_clocksource); - #endif /* !CONFIG_X86_64 */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 1cb0b9fc78dc..6c0802eb2f7f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -249,7 +249,7 @@ void fixup_irqs(void) data = irq_desc_get_irq_data(desc); affinity = data->affinity; - if (!irq_has_action(irq) || + if (!irq_has_action(irq) || irqd_is_per_cpu(data) || cpumask_subset(affinity, cpu_online_mask)) { raw_spin_unlock(&desc->lock); continue; @@ -276,7 +276,8 @@ void fixup_irqs(void) else if (!(warned++)) set_affinity = 0; - if (!irqd_can_move_in_process_context(data) && chip->irq_unmask) + if (!irqd_can_move_in_process_context(data) && + !irqd_irq_disabled(data) && chip->irq_unmask) chip->irq_unmask(data); raw_spin_unlock(&desc->lock); diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 961b6b30ba90..3fee346ef545 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -34,7 +34,7 @@ void arch_jump_label_transform(struct jump_entry *entry, code.offset = entry->target - (entry->code + JUMP_LABEL_NOP_SIZE); } else - memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); + memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); get_online_cpus(); mutex_lock(&text_mutex); text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); @@ -44,7 +44,8 @@ void arch_jump_label_transform(struct jump_entry *entry, void arch_jump_label_text_poke_early(jump_label_t addr) { - text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); + text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5], + JUMP_LABEL_NOP_SIZE); } #endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c969fd9d1566..f1a6244d7d93 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; /* This is possible if op is under delayed unoptimizing */ if (kprobe_disabled(&op->kp)) return; - preempt_disable(); + local_irq_save(flags); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); } else { @@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); } - preempt_enable_no_resched(); + local_irq_restore(flags); } static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f98d3eafe07a..6389a6bca11b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -26,8 +26,6 @@ #include <asm/x86_init.h> #include <asm/reboot.h> -#define KVM_SCALE 22 - static int kvmclock = 1; static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; @@ -120,8 +118,6 @@ static struct clocksource kvm_clock = { .read = kvm_clock_get_cycles, .rating = 400, .mask = CLOCKSOURCE_MASK(64), - .mult = 1 << KVM_SCALE, - .shift = KVM_SCALE, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -203,7 +199,7 @@ void __init kvmclock_init(void) machine_ops.crash_shutdown = kvm_crash_shutdown; #endif kvm_get_preset_lpj(); - clocksource_register(&kvm_clock); + clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index ab23f1ad4bf1..52f256f2cc81 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -24,6 +24,7 @@ #include <linux/bug.h> #include <linux/mm.h> #include <linux/gfp.h> +#include <linux/jump_label.h> #include <asm/system.h> #include <asm/page.h> diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5a532ce646bf..6f9bfffb2720 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -715,17 +715,15 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) } } -static int +static int __init check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) { - int ret = 0; - if (!mpc_new_phys || count <= mpc_new_length) { WARN(1, "update_mptable: No spare slots (length: %x)\n", count); return -1; } - return ret; + return 0; } #else /* CONFIG_X86_IO_APIC */ static diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c index 55d745ec1181..35ccf75696eb 100644 --- a/arch/x86/kernel/pci-iommu_table.c +++ b/arch/x86/kernel/pci-iommu_table.c @@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start, struct iommu_table_entry *finish) { struct iommu_table_entry *p, *q, *x; - char sym_p[KSYM_SYMBOL_LEN]; - char sym_q[KSYM_SYMBOL_LEN]; /* Simple cyclic dependency checker. */ for (p = start; p < finish; p++) { q = find_dependents_of(start, finish, p); x = find_dependents_of(start, finish, q); if (p == x) { - sprint_symbol(sym_p, (unsigned long)p->detect); - sprint_symbol(sym_q, (unsigned long)q->detect); - - printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \ - " on %s and vice-versa. BREAKING IT.\n", - sym_p, sym_q); + printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n", + p->detect, q->detect); /* Heavy handed way..*/ x->depend = 0; } @@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start, for (p = start; p < finish; p++) { q = find_dependents_of(p, finish, p); if (q && q > p) { - sprint_symbol(sym_p, (unsigned long)p->detect); - sprint_symbol(sym_q, (unsigned long)q->detect); - - printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\ - "should be called before %s!\n", - sym_p, sym_q); + printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n", + p->detect, q->detect); } } } diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms.c index 071e7fea42e5..ba0a4cce53be 100644 --- a/arch/x86/kernel/probe_roms_32.c +++ b/arch/x86/kernel/probe_roms.c @@ -73,6 +73,107 @@ static struct resource video_rom_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM }; +/* does this oprom support the given pci device, or any of the devices + * that the driver supports? + */ +static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device) +{ + struct pci_driver *drv = pdev->driver; + const struct pci_device_id *id; + + if (pdev->vendor == vendor && pdev->device == device) + return true; + + for (id = drv ? drv->id_table : NULL; id && id->vendor; id++) + if (id->vendor == vendor && id->device == device) + break; + + return id && id->vendor; +} + +static bool probe_list(struct pci_dev *pdev, unsigned short vendor, + const unsigned char *rom_list) +{ + unsigned short device; + + do { + if (probe_kernel_address(rom_list, device) != 0) + device = 0; + + if (device && match_id(pdev, vendor, device)) + break; + + rom_list += 2; + } while (device); + + return !!device; +} + +static struct resource *find_oprom(struct pci_dev *pdev) +{ + struct resource *oprom = NULL; + int i; + + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { + struct resource *res = &adapter_rom_resources[i]; + unsigned short offset, vendor, device, list, rev; + const unsigned char *rom; + + if (res->end == 0) + break; + + rom = isa_bus_to_virt(res->start); + if (probe_kernel_address(rom + 0x18, offset) != 0) + continue; + + if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) + continue; + + if (probe_kernel_address(rom + offset + 0x6, device) != 0) + continue; + + if (match_id(pdev, vendor, device)) { + oprom = res; + break; + } + + if (probe_kernel_address(rom + offset + 0x8, list) == 0 && + probe_kernel_address(rom + offset + 0xc, rev) == 0 && + rev >= 3 && list && + probe_list(pdev, vendor, rom + offset + list)) { + oprom = res; + break; + } + } + + return oprom; +} + +void *pci_map_biosrom(struct pci_dev *pdev) +{ + struct resource *oprom = find_oprom(pdev); + + if (!oprom) + return NULL; + + return ioremap(oprom->start, resource_size(oprom)); +} +EXPORT_SYMBOL(pci_map_biosrom); + +void pci_unmap_biosrom(void __iomem *image) +{ + iounmap(image); +} +EXPORT_SYMBOL(pci_unmap_biosrom); + +size_t pci_biosrom_size(struct pci_dev *pdev) +{ + struct resource *oprom = find_oprom(pdev); + + return oprom ? resource_size(oprom) : 0; +} +EXPORT_SYMBOL(pci_biosrom_size); + #define ROMSIGNATURE 0xaa55 static int __init romsignature(const unsigned char *rom) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d46cbe46b7ab..88a90a977f8e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { if (!need_resched()) { - if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) + if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); __monitor((void *)¤t_thread_info()->flags, 0, 0); @@ -465,7 +465,7 @@ static void mwait_idle(void) if (!need_resched()) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle(1, smp_processor_id()); - if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) + if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); __monitor((void *)¤t_thread_info()->flags, 0, 0); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 08c44b08bf5b..0c016f727695 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -enum reboot_type reboot_type = BOOT_KBD; +enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) @@ -478,9 +478,24 @@ void __attribute__((weak)) mach_reboot_fixups(void) { } +/* + * Windows compatible x86 hardware expects the following on reboot: + * + * 1) If the FADT has the ACPI reboot register flag set, try it + * 2) If still alive, write to the keyboard controller + * 3) If still alive, write to the ACPI reboot register again + * 4) If still alive, write to the keyboard controller again + * + * If the machine is still alive at this stage, it gives up. We default to + * following the same pattern, except that if we're still alive after (4) we'll + * try to force a triple fault and then cycle between hitting the keyboard + * controller and doing that + */ static void native_machine_emergency_restart(void) { int i; + int attempt = 0; + int orig_reboot_type = reboot_type; if (reboot_emergency) emergency_vmx_disable_all(); @@ -502,6 +517,13 @@ static void native_machine_emergency_restart(void) outb(0xfe, 0x64); /* pulse reset low */ udelay(50); } + if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { + attempt = 1; + reboot_type = BOOT_ACPI; + } else { + reboot_type = BOOT_TRIPLE; + } + break; case BOOT_TRIPLE: load_idt(&no_idt); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4be9b398470e..c3050af9306d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow); void __init setup_arch(char **cmdline_p) { - unsigned long flags; - #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); visws_early_detect(); @@ -1041,9 +1039,7 @@ void __init setup_arch(char **cmdline_p) mcheck_init(); - local_irq_save(flags); - arch_init_ideal_nop5(); - local_irq_restore(flags); + arch_init_ideal_nops(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4fd173cd8e57..40a24932a8a1 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs) goto badframe; sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; @@ -682,6 +679,7 @@ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) { + sigset_t blocked; int ret; /* Are we from a system call? */ @@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, */ regs->flags &= ~X86_EFLAGS_TF; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + sigaddset(&blocked, sig); + set_current_blocked(&blocked); tracehook_signal_handler(sig, info, ka, regs, test_thread_flag(TIF_SINGLESTEP)); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 513deac7228d..013e7eba83bb 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait) } /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. */ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); + scheduler_ipi(); /* * KVM uses this interrupt to force a cpu out of guest mode */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c2871d3c71b6..a3c430bdfb60 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void) void *mwait_ptr; struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c))) + if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)) return; - if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) + if (!this_cpu_has(X86_FEATURE_CLFLSH)) return; if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) return; diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6515733a289d..55d9bc03f696 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -9,15 +9,6 @@ #include <linux/uaccess.h> #include <asm/stacktrace.h> -static void save_stack_warning(void *data, char *msg) -{ -} - -static void -save_stack_warning_symbol(void *data, char *msg, unsigned long symbol) -{ -} - static int save_stack_stack(void *data, char *name) { return 0; @@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, .stack = save_stack_stack, .address = save_stack_address, .walk_stack = print_context_stack, }; static const struct stacktrace_ops save_stack_ops_nosched = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, .stack = save_stack_stack, .address = save_stack_address_nosched, .walk_stack = print_context_stack, diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index abce34d5c79d..32cbffb0c494 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -344,3 +344,4 @@ ENTRY(sys_call_table) .long sys_open_by_handle_at .long sys_clock_adjtime .long sys_syncfs + .long sys_sendmmsg /* 345 */ diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 787a5e499dd1..3f92ce07e525 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -161,7 +161,7 @@ static int test_NX(void) } #endif - return 0; + return ret; } static void test_exit(void) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 75ef4b18e9b7..6f164bd5e14d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -35,7 +35,7 @@ void iommu_shutdown_noop(void) { } struct x86_init_ops x86_init __initdata = { .resources = { - .probe_roms = x86_init_noop, + .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = default_machine_specific_memory_setup, }, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0ad47b819a8b..d6e2477feb18 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -73,9 +73,14 @@ #define MemAbs (1<<11) /* Memory operand is absolute displacement */ #define String (1<<12) /* String instruction (rep capable) */ #define Stack (1<<13) /* Stack instruction (push/pop) */ +#define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ -#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ +#define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ +#define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ +#define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ +#define Sse (1<<17) /* SSE Vector instruction */ /* Misc flags */ +#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ @@ -102,11 +107,14 @@ struct opcode { u32 flags; + u8 intercept; union { int (*execute)(struct x86_emulate_ctxt *ctxt); struct opcode *group; struct group_dual *gdual; + struct gprefix *gprefix; } u; + int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; struct group_dual { @@ -114,6 +122,13 @@ struct group_dual { struct opcode mod3[8]; }; +struct gprefix { + struct opcode pfx_no; + struct opcode pfx_66; + struct opcode pfx_f2; + struct opcode pfx_f3; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -248,42 +263,42 @@ struct group_dual { "w", "r", _LO32, "r", "", "r") /* Instruction has three operands and one operand is stored in ECX register */ -#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ - do { \ - unsigned long _tmp; \ - _type _clv = (_cl).val; \ - _type _srcv = (_src).val; \ - _type _dstv = (_dst).val; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "5", "2") \ - _op _suffix " %4,%1 \n" \ - _POST_EFLAGS("0", "5", "2") \ - : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ - : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ - ); \ - \ - (_cl).val = (unsigned long) _clv; \ - (_src).val = (unsigned long) _srcv; \ - (_dst).val = (unsigned long) _dstv; \ +#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ + do { \ + unsigned long _tmp; \ + _type _clv = (_cl).val; \ + _type _srcv = (_src).val; \ + _type _dstv = (_dst).val; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "5", "2") \ + _op _suffix " %4,%1 \n" \ + _POST_EFLAGS("0", "5", "2") \ + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ + ); \ + \ + (_cl).val = (unsigned long) _clv; \ + (_src).val = (unsigned long) _srcv; \ + (_dst).val = (unsigned long) _dstv; \ } while (0) -#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ - do { \ - switch ((_dst).bytes) { \ - case 2: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "w", unsigned short); \ - break; \ - case 4: \ - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "l", unsigned int); \ - break; \ - case 8: \ - ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ - "q", unsigned long)); \ - break; \ - } \ +#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 2: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "w", unsigned short); \ + break; \ + case 4: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "l", unsigned int); \ + break; \ + case 8: \ + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "q", unsigned long)); \ + break; \ + } \ } while (0) #define __emulate_1op(_op, _dst, _eflags, _suffix) \ @@ -346,13 +361,25 @@ struct group_dual { } while (0) /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ -#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ - do { \ - switch((_src).bytes) { \ - case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ - case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ - case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ - case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ +#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ + do { \ + switch((_src).bytes) { \ + case 1: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "b"); \ + break; \ + case 2: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "w"); \ + break; \ + case 4: \ + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "l"); \ + break; \ + case 8: \ + ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ + _eflags, "q")); \ + break; \ } \ } while (0) @@ -388,13 +415,33 @@ struct group_dual { (_type)_x; \ }) -#define insn_fetch_arr(_arr, _size, _eip) \ +#define insn_fetch_arr(_arr, _size, _eip) \ ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ (_eip) += (_size); \ }) +static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, + enum x86_intercept intercept, + enum x86_intercept_stage stage) +{ + struct x86_instruction_info info = { + .intercept = intercept, + .rep_prefix = ctxt->decode.rep_prefix, + .modrm_mod = ctxt->decode.modrm_mod, + .modrm_reg = ctxt->decode.modrm_reg, + .modrm_rm = ctxt->decode.modrm_rm, + .src_val = ctxt->decode.src.val64, + .src_bytes = ctxt->decode.src.bytes, + .dst_bytes = ctxt->decode.dst.bytes, + .ad_bytes = ctxt->decode.ad_bytes, + .next_rip = ctxt->eip, + }; + + return ctxt->ops->intercept(ctxt, &info, stage); +} + static inline unsigned long ad_mask(struct decode_cache *c) { return (1UL << (c->ad_bytes << 3)) - 1; @@ -430,6 +477,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel) register_address_increment(c, &c->eip, rel); } +static u32 desc_limit_scaled(struct desc_struct *desc) +{ + u32 limit = get_desc_limit(desc); + + return desc->g ? (limit << 12) | 0xfff : limit; +} + static void set_seg_override(struct decode_cache *c, int seg) { c->has_seg_override = true; @@ -442,11 +496,10 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) return 0; - return ops->get_cached_segment_base(seg, ctxt->vcpu); + return ops->get_cached_segment_base(ctxt, seg); } static unsigned seg_override(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct decode_cache *c) { if (!c->has_seg_override) @@ -455,18 +508,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt, return c->seg_override; } -static ulong linear(struct x86_emulate_ctxt *ctxt, - struct segmented_address addr) -{ - struct decode_cache *c = &ctxt->decode; - ulong la; - - la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; - if (c->ad_bytes != 8) - la &= (u32)-1; - return la; -} - static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { @@ -476,11 +517,21 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, return X86EMUL_PROPAGATE_FAULT; } +static int emulate_db(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, DB_VECTOR, 0, false); +} + static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) { return emulate_exception(ctxt, GP_VECTOR, err, true); } +static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err) +{ + return emulate_exception(ctxt, SS_VECTOR, err, true); +} + static int emulate_ud(struct x86_emulate_ctxt *ctxt) { return emulate_exception(ctxt, UD_VECTOR, 0, false); @@ -496,6 +547,128 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, DE_VECTOR, 0, false); } +static int emulate_nm(struct x86_emulate_ctxt *ctxt) +{ + return emulate_exception(ctxt, NM_VECTOR, 0, false); +} + +static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) +{ + u16 selector; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg); + return selector; +} + +static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, + unsigned seg) +{ + u16 dummy; + u32 base3; + struct desc_struct desc; + + ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg); + ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); +} + +static int __linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, bool fetch, + ulong *linear) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_struct desc; + bool usable; + ulong la; + u32 lim; + u16 sel; + unsigned cpl, rpl; + + la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + switch (ctxt->mode) { + case X86EMUL_MODE_REAL: + break; + case X86EMUL_MODE_PROT64: + if (((signed long)la << 16) >> 16 != la) + return emulate_gp(ctxt, 0); + break; + default: + usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, + addr.seg); + if (!usable) + goto bad; + /* code segment or read-only data segment */ + if (((desc.type & 8) || !(desc.type & 2)) && write) + goto bad; + /* unreadable code segment */ + if (!fetch && (desc.type & 8) && !(desc.type & 2)) + goto bad; + lim = desc_limit_scaled(&desc); + if ((desc.type & 8) || !(desc.type & 4)) { + /* expand-up segment */ + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } else { + /* exapand-down segment */ + if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) + goto bad; + lim = desc.d ? 0xffffffff : 0xffff; + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) + goto bad; + } + cpl = ctxt->ops->cpl(ctxt); + rpl = sel & 3; + cpl = max(cpl, rpl); + if (!(desc.type & 8)) { + /* data segment */ + if (cpl > desc.dpl) + goto bad; + } else if ((desc.type & 8) && !(desc.type & 4)) { + /* nonconforming code segment */ + if (cpl != desc.dpl) + goto bad; + } else if ((desc.type & 8) && (desc.type & 4)) { + /* conforming code segment */ + if (cpl < desc.dpl) + goto bad; + } + break; + } + if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) + la &= (u32)-1; + *linear = la; + return X86EMUL_CONTINUE; +bad: + if (addr.seg == VCPU_SREG_SS) + return emulate_ss(ctxt, addr.seg); + else + return emulate_gp(ctxt, addr.seg); +} + +static int linearize(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + unsigned size, bool write, + ulong *linear) +{ + return __linearize(ctxt, addr, size, write, false, linear); +} + + +static int segmented_read_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, false, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) @@ -505,10 +678,15 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, int size, cur_size; if (eip == fc->end) { + unsigned long linear; + struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; cur_size = fc->end - fc->start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); - rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, - size, ctxt->vcpu, &ctxt->exception); + rc = __linearize(ctxt, addr, size, false, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = ops->fetch(ctxt, linear, fc->data + cur_size, + size, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; fc->end += size; @@ -551,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, } static int read_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct segmented_address addr, u16 *size, unsigned long *address, int op_bytes) { @@ -560,13 +737,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, if (op_bytes == 2) op_bytes = 3; *address = 0; - rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, size, 2); if (rc != X86EMUL_CONTINUE) return rc; addr.ea += 2; - rc = ops->read_std(linear(ctxt, addr), address, op_bytes, - ctxt->vcpu, &ctxt->exception); + rc = segmented_read_std(ctxt, addr, address, op_bytes); return rc; } @@ -623,7 +798,63 @@ static void fetch_register_operand(struct operand *op) } } -static void decode_register_operand(struct operand *op, +static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; + case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; + case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; + case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; + case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; + case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; + case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; + case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; + case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; + case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; + case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; + case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; + case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; + case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; + case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, + int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; + case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; + case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; + case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; + case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; + case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; + case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; + case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; +#ifdef CONFIG_X86_64 + case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; + case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; + case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; + case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; + case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; + case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; + case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; + case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; +#endif + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void decode_register_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op, struct decode_cache *c, int inhibit_bytereg) { @@ -632,6 +863,15 @@ static void decode_register_operand(struct operand *op, if (!(c->d & ModRM)) reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); + + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = reg; + read_sse_reg(ctxt, &op->vec_val, reg); + return; + } + op->type = OP_REG; if ((c->d & ByteOp) && !inhibit_bytereg) { op->addr.reg = decode_register(reg, c->regs, highbyte_regs); @@ -671,6 +911,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; op->addr.reg = decode_register(c->modrm_rm, c->regs, c->d & ByteOp); + if (c->d & Sse) { + op->type = OP_XMM; + op->bytes = 16; + op->addr.xmm = c->modrm_rm; + read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); + return rc; + } fetch_register_operand(op); return rc; } @@ -819,8 +1066,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, if (mc->pos < mc->end) goto read_cached; - rc = ops->read_emulated(addr, mc->data + mc->end, n, - &ctxt->exception, ctxt->vcpu); + rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, + &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; mc->end += n; @@ -834,6 +1081,50 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } +static int segmented_read(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, false, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return read_emulated(ctxt, ctxt->ops, linear, data, size); +} + +static int segmented_write(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_emulated(ctxt, linear, data, size, + &ctxt->exception); +} + +static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + const void *orig_data, const void *data, + unsigned size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data, + size, &ctxt->exception); +} + static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned int size, unsigned short port, @@ -854,7 +1145,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (n == 0) n = 1; rc->pos = rc->end = 0; - if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) + if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) return 0; rc->end = n * size; } @@ -864,28 +1155,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, return 1; } -static u32 desc_limit_scaled(struct desc_struct *desc) -{ - u32 limit = get_desc_limit(desc); - - return desc->g ? (limit << 12) | 0xfff : limit; -} - static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, u16 selector, struct desc_ptr *dt) { if (selector & 1 << 2) { struct desc_struct desc; + u16 sel; + memset (dt, 0, sizeof *dt); - if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, - ctxt->vcpu)) + if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ dt->address = get_desc_base(&desc); } else - ops->get_gdt(dt, ctxt->vcpu); + ops->get_gdt(ctxt, dt); } /* allowed just for 8 bytes segments */ @@ -903,8 +1188,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (dt.size < index * 8 + 7) return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -925,8 +1209,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, - &ctxt->exception); + ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); return ret; } @@ -986,7 +1269,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; dpl = seg_desc.dpl; - cpl = ops->cpl(ctxt->vcpu); + cpl = ops->cpl(ctxt); switch (seg) { case VCPU_SREG_SS: @@ -1042,8 +1325,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; } load: - ops->set_segment_selector(selector, seg, ctxt->vcpu); - ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); + ops->set_segment(ctxt, selector, &seg_desc, 0, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1069,8 +1351,7 @@ static void write_register_operand(struct operand *op) } } -static inline int writeback(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int writeback(struct x86_emulate_ctxt *ctxt) { int rc; struct decode_cache *c = &ctxt->decode; @@ -1081,23 +1362,22 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, break; case OP_MEM: if (c->lock_prefix) - rc = ops->cmpxchg_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.orig_val, - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_cmpxchg(ctxt, + c->dst.addr.mem, + &c->dst.orig_val, + &c->dst.val, + c->dst.bytes); else - rc = ops->write_emulated( - linear(ctxt, c->dst.addr.mem), - &c->dst.val, - c->dst.bytes, - &ctxt->exception, - ctxt->vcpu); + rc = segmented_write(ctxt, + c->dst.addr.mem, + &c->dst.val, + c->dst.bytes); if (rc != X86EMUL_CONTINUE) return rc; break; + case OP_XMM: + write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); + break; case OP_NONE: /* no writeback */ break; @@ -1107,21 +1387,21 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline void emulate_push(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_push(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + struct segmented_address addr; - c->dst.type = OP_MEM; - c->dst.bytes = c->op_bytes; - c->dst.val = c->src.val; register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); - c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); - c->dst.addr.mem.seg = VCPU_SREG_SS; + addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); + addr.seg = VCPU_SREG_SS; + + /* Disable writeback. */ + c->dst.type = OP_NONE; + return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); } static int emulate_pop(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, void *dest, int len) { struct decode_cache *c = &ctxt->decode; @@ -1130,7 +1410,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); addr.seg = VCPU_SREG_SS; - rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); + rc = segmented_read(ctxt, addr, dest, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -1138,6 +1418,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, return rc; } +static int em_pop(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + return emulate_pop(ctxt, &c->dst.val, c->op_bytes); +} + static int emulate_popf(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, void *dest, int len) @@ -1145,9 +1432,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int rc; unsigned long val, change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - int cpl = ops->cpl(ctxt->vcpu); + int cpl = ops->cpl(ctxt); - rc = emulate_pop(ctxt, ops, &val, len); + rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) return rc; @@ -1179,14 +1466,24 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, return rc; } -static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static int em_popf(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); + c->dst.type = OP_REG; + c->dst.addr.reg = &ctxt->eflags; + c->dst.bytes = c->op_bytes; + return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); +} - emulate_push(ctxt, ops); +static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, int seg) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.val = get_segment_selector(ctxt, seg); + + return em_push(ctxt); } static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, @@ -1196,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, unsigned long selector; int rc; - rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); + rc = emulate_pop(ctxt, &selector, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1204,8 +1501,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } -static int emulate_pusha(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_pusha(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long old_esp = c->regs[VCPU_REGS_RSP]; @@ -1216,23 +1512,25 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt, (reg == VCPU_REGS_RSP) ? (c->src.val = old_esp) : (c->src.val = c->regs[reg]); - emulate_push(ctxt, ops); - - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; ++reg; } - /* Disable writeback. */ - c->dst.type = OP_NONE; - return rc; } -static int emulate_popa(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_pushf(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.val = (unsigned long)ctxt->eflags; + return em_push(ctxt); +} + +static int em_popa(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1245,7 +1543,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, --reg; } - rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); + rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -1265,37 +1563,32 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add limit checks */ c->src.val = ctxt->eflags; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); - c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = c->eip; - emulate_push(ctxt, ops); - rc = writeback(ctxt, ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; - c->dst.type = OP_NONE; - - ops->get_idt(&dt, ctxt->vcpu); + ops->get_idt(ctxt, &dt); eip_addr = dt.address + (irq << 2); cs_addr = dt.address + (irq << 2) + 2; - rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); + rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; @@ -1339,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, /* TODO: Add stack limit check */ - rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1347,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, if (temp_eip & ~0xffff) return emulate_gp(ctxt, 0); - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); + rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; @@ -1394,15 +1687,31 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, } } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_jmp_far(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + unsigned short sel; + + memcpy(&sel, c->src.valptr + c->op_bytes, 2); + + rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); + if (rc != X86EMUL_CONTINUE) + return rc; + + c->eip = 0; + memcpy(&c->eip, c->src.valptr, c->op_bytes); + return X86EMUL_CONTINUE; +} + +static int em_grp1a(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; - return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); + return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); } -static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) +static int em_grp2(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; switch (c->modrm_reg) { @@ -1429,10 +1738,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); break; } + return X86EMUL_CONTINUE; } -static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp3(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; unsigned long *rax = &c->regs[VCPU_REGS_RAX]; @@ -1471,10 +1780,10 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp45(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + int rc = X86EMUL_CONTINUE; switch (c->modrm_reg) { case 0: /* inc */ @@ -1488,21 +1797,23 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, old_eip = c->eip; c->eip = c->src.val; c->src.val = old_eip; - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 4: /* jmp abs */ c->eip = c->src.val; break; + case 5: /* jmp far */ + rc = em_jmp_far(ctxt); + break; case 6: /* push */ - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } - return X86EMUL_CONTINUE; + return rc; } -static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int em_grp9(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; u64 old = c->dst.orig_val64; @@ -1528,12 +1839,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, int rc; unsigned long cs; - rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); + rc = emulate_pop(ctxt, &c->eip, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; if (c->op_bytes == 4) c->eip = (u32)c->eip; - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); @@ -1562,8 +1873,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, struct desc_struct *cs, struct desc_struct *ss) { + u16 selector; + memset(cs, 0, sizeof(struct desc_struct)); - ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); + ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); memset(ss, 0, sizeof(struct desc_struct)); cs->l = 0; /* will be adjusted later */ @@ -1593,44 +1906,44 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; /* syscall is not available in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) return emulate_ud(ctxt); + ops->get_msr(ctxt, MSR_EFER, &efer); setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; cs_sel = (u16)(msr_data & 0xfffc); ss_sel = (u16)(msr_data + 8); - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->regs[VCPU_REGS_RCX] = c->eip; - if (is_long_mode(ctxt->vcpu)) { + if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; - ops->get_msr(ctxt->vcpu, + ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? MSR_LSTAR : MSR_CSTAR, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); + ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~(msr_data | EFLG_RF); #endif } else { /* legacy mode */ - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); + ops->get_msr(ctxt, MSR_STAR, &msr_data); c->eip = (u32)msr_data; ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); @@ -1646,7 +1959,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; + u64 efer = 0; + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); /* inject #GP if in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL) return emulate_gp(ctxt, 0); @@ -1659,7 +1974,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) setup_syscalls_segments(ctxt, ops, &cs, &ss); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (ctxt->mode) { case X86EMUL_MODE_PROT32: if ((msr_data & 0xfffc) == 0x0) @@ -1676,21 +1991,18 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel &= ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; ss_sel &= ~SELECTOR_RPL_MASK; - if (ctxt->mode == X86EMUL_MODE_PROT64 - || is_long_mode(ctxt->vcpu)) { + if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { cs.d = 0; cs.l = 1; } - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); c->eip = msr_data; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); c->regs[VCPU_REGS_RSP] = msr_data; return X86EMUL_CONTINUE; @@ -1719,7 +2031,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs.dpl = 3; ss.dpl = 3; - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (usermode) { case X86EMUL_MODE_PROT32: cs_sel = (u16)(msr_data + 16); @@ -1739,10 +2051,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) cs_sel |= SELECTOR_RPL_MASK; ss_sel |= SELECTOR_RPL_MASK; - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); c->eip = c->regs[VCPU_REGS_RDX]; c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; @@ -1759,7 +2069,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_VM86) return true; iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - return ops->cpl(ctxt->vcpu) > iopl; + return ops->cpl(ctxt) > iopl; } static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, @@ -1769,11 +2079,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, struct desc_struct tr_seg; u32 base3; int r; - u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; + u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7; unsigned mask = (1 << len) - 1; unsigned long base; - ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); + ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; if (desc_limit_scaled(&tr_seg) < 103) @@ -1782,13 +2092,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, #ifdef CONFIG_X86_64 base |= ((u64)base3) << 32; #endif - r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); + r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) return false; - r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, - NULL); + r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); if (r != X86EMUL_CONTINUE) return false; if ((perm >> bit_idx) & mask) @@ -1829,11 +2138,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, tss->si = c->regs[VCPU_REGS_RSI]; tss->di = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, @@ -1858,11 +2167,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); + set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* * Now load segment descriptors. If fault happenes at this stage @@ -1896,7 +2205,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -1904,13 +2213,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, save_state_to_tss16(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -1919,10 +2228,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; @@ -1937,7 +2246,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; - tss->cr3 = ops->get_cr(3, ctxt->vcpu); + tss->cr3 = ops->get_cr(ctxt, 3); tss->eip = c->eip; tss->eflags = ctxt->eflags; tss->eax = c->regs[VCPU_REGS_RAX]; @@ -1949,13 +2258,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->esi = c->regs[VCPU_REGS_RSI]; tss->edi = c->regs[VCPU_REGS_RDI]; - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); - tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); - tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); + tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); + tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); + tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR); } static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, @@ -1965,7 +2274,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int ret; - if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) + if (ops->set_cr(ctxt, 3, tss->cr3)) return emulate_gp(ctxt, 0); c->eip = tss->eip; ctxt->eflags = tss->eflags | 2; @@ -1982,13 +2291,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * SDM says that segment selectors are loaded before segment * descriptors */ - ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); - ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); - ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); + set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); + set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); + set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); /* * Now load segment descriptors. If fault happenes at this stage @@ -2028,7 +2337,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2036,13 +2345,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, save_state_to_tss32(ctxt, ops, &tss_seg); - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ @@ -2051,10 +2360,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(new_tss_base, + ret = ops->write_std(ctxt, new_tss_base, &tss_seg.prev_task_link, sizeof tss_seg.prev_task_link, - ctxt->vcpu, &ctxt->exception); + &ctxt->exception); if (ret != X86EMUL_CONTINUE) /* FIXME: need to provide precise fault address */ return ret; @@ -2070,9 +2379,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, { struct desc_struct curr_tss_desc, next_tss_desc; int ret; - u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); + u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); ulong old_tss_base = - ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); + ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); u32 desc_limit; /* FIXME: old_tss_base == ~0 ? */ @@ -2088,7 +2397,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if (reason != TASK_SWITCH_IRET) { if ((tss_selector & 3) > next_tss_desc.dpl || - ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) + ops->cpl(ctxt) > next_tss_desc.dpl) return emulate_gp(ctxt, 0); } @@ -2132,9 +2441,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, &next_tss_desc); } - ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); - ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); - ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); + ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); + ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); if (has_error_code) { struct decode_cache *c = &ctxt->decode; @@ -2142,7 +2450,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; c->lock_prefix = 0; c->src.val = (unsigned long) error_code; - emulate_push(ctxt, ops); + ret = em_push(ctxt); } return ret; @@ -2162,13 +2470,10 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, has_error_code, error_code); - if (rc == X86EMUL_CONTINUE) { - rc = writeback(ctxt, ops); - if (rc == X86EMUL_CONTINUE) - ctxt->eip = c->eip; - } + if (rc == X86EMUL_CONTINUE) + ctxt->eip = c->eip; - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, @@ -2182,12 +2487,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, op->addr.mem.seg = seg; } -static int em_push(struct x86_emulate_ctxt *ctxt) -{ - emulate_push(ctxt, ctxt->ops); - return X86EMUL_CONTINUE; -} - static int em_das(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2234,7 +2533,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ulong old_eip; int rc; - old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = c->eip; memcpy(&sel, c->src.valptr + c->op_bytes, 2); @@ -2245,20 +2544,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) memcpy(&c->eip, c->src.valptr, c->op_bytes); c->src.val = old_cs; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); + rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) return rc; c->src.val = old_eip; - emulate_push(ctxt, ctxt->ops); - rc = writeback(ctxt, ctxt->ops); - if (rc != X86EMUL_CONTINUE) - return rc; - - c->dst.type = OP_NONE; - - return X86EMUL_CONTINUE; + return em_push(ctxt); } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) @@ -2269,13 +2560,79 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); + rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); return X86EMUL_CONTINUE; } +static int em_add(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_or(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_adc(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sbb(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_and(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_sub(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_xor(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); + return X86EMUL_CONTINUE; +} + +static int em_cmp(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + static int em_imul(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -2306,13 +2663,10 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { - unsigned cpl = ctxt->ops->cpl(ctxt->vcpu); struct decode_cache *c = &ctxt->decode; u64 tsc = 0; - if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) - return emulate_gp(ctxt, 0); - ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); + ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); c->regs[VCPU_REGS_RAX] = (u32)tsc; c->regs[VCPU_REGS_RDX] = tsc >> 32; return X86EMUL_CONTINUE; @@ -2325,22 +2679,375 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_movdqu(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); + return X86EMUL_CONTINUE; +} + +static int em_invlpg(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + ulong linear; + + rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); + if (rc == X86EMUL_CONTINUE) + ctxt->ops->invlpg(ctxt, linear); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_clts(struct x86_emulate_ctxt *ctxt) +{ + ulong cr0; + + cr0 = ctxt->ops->get_cr(ctxt, 0); + cr0 &= ~X86_CR0_TS; + ctxt->ops->set_cr(ctxt, 0, cr0); + return X86EMUL_CONTINUE; +} + +static int em_vmcall(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + if (c->modrm_mod != 3 || c->modrm_rm != 1) + return X86EMUL_UNHANDLEABLE; + + rc = ctxt->ops->fix_hypercall(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + /* Let the processor re-execute the fixed hypercall */ + c->eip = ctxt->eip; + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_lgdt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_gdt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_vmmcall(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + rc = ctxt->ops->fix_hypercall(ctxt); + + /* Disable writeback. */ + c->dst.type = OP_NONE; + return rc; +} + +static int em_lidt(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + struct desc_ptr desc_ptr; + int rc; + + rc = read_descriptor(ctxt, c->src.addr.mem, + &desc_ptr.size, &desc_ptr.address, + c->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->set_idt(ctxt, &desc_ptr); + /* Disable writeback. */ + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static int em_smsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = 2; + c->dst.val = ctxt->ops->get_cr(ctxt, 0); + return X86EMUL_CONTINUE; +} + +static int em_lmsw(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) + | (c->src.val & 0x0f)); + c->dst.type = OP_NONE; + return X86EMUL_CONTINUE; +} + +static bool valid_cr(int nr) +{ + switch (nr) { + case 0: + case 2 ... 4: + case 8: + return true; + default: + return false; + } +} + +static int check_cr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + if (!valid_cr(c->modrm_reg)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_cr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int cr = c->modrm_reg; + u64 efer = 0; + + static u64 cr_reserved_bits[] = { + 0xffffffff00000000ULL, + 0, 0, 0, /* CR3 checked later */ + CR4_RESERVED_BITS, + 0, 0, 0, + CR8_RESERVED_BITS, + }; + + if (!valid_cr(cr)) + return emulate_ud(ctxt); + + if (new_val & cr_reserved_bits[cr]) + return emulate_gp(ctxt, 0); + + switch (cr) { + case 0: { + u64 cr4; + if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || + ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) + return emulate_gp(ctxt, 0); + + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && + !(cr4 & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + case 3: { + u64 rsvd = 0; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) + rsvd = CR3_L_MODE_RESERVED_BITS; + else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) + rsvd = CR3_PAE_RESERVED_BITS; + else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) + rsvd = CR3_NONPAE_RESERVED_BITS; + + if (new_val & rsvd) + return emulate_gp(ctxt, 0); + + break; + } + case 4: { + u64 cr4; + + cr4 = ctxt->ops->get_cr(ctxt, 4); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) + return emulate_gp(ctxt, 0); + + break; + } + } + + return X86EMUL_CONTINUE; +} + +static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) +{ + unsigned long dr7; + + ctxt->ops->get_dr(ctxt, 7, &dr7); + + /* Check if DR7.Global_Enable is set */ + return dr7 & (1 << 13); +} + +static int check_dr_read(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + int dr = c->modrm_reg; + u64 cr4; + + if (dr > 7) + return emulate_ud(ctxt); + + cr4 = ctxt->ops->get_cr(ctxt, 4); + if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) + return emulate_ud(ctxt); + + if (check_dr7_gd(ctxt)) + return emulate_db(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_dr_write(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + u64 new_val = c->src.val64; + int dr = c->modrm_reg; + + if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) + return emulate_gp(ctxt, 0); + + return check_dr_read(ctxt); +} + +static int check_svme(struct x86_emulate_ctxt *ctxt) +{ + u64 efer; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + + if (!(efer & EFER_SVME)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_svme_pa(struct x86_emulate_ctxt *ctxt) +{ + u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; + + /* Valid physical address? */ + if (rax & 0xffff000000000000ULL) + return emulate_gp(ctxt, 0); + + return check_svme(ctxt); +} + +static int check_rdtsc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); + + if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) + return emulate_ud(ctxt); + + return X86EMUL_CONTINUE; +} + +static int check_rdpmc(struct x86_emulate_ctxt *ctxt) +{ + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); + u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; + + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || + (rcx > 3)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + +static int check_perm_in(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->dst.bytes = min(c->dst.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + +static int check_perm_out(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + + c->src.bytes = min(c->src.bytes, 4u); + if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) + return emulate_gp(ctxt, 0); + + return X86EMUL_CONTINUE; +} + #define D(_y) { .flags = (_y) } +#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } +#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } #define N D(0) +#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } -#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } +#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define II(_f, _e, _i) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } +#define IIP(_f, _e, _i, _p) \ + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ + .check_perm = (_p) } +#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } #define D2bv(_f) D((_f) | ByteOp), D(_f) +#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) -#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ - D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ - D2bv(((_f) & ~Lock) | DstAcc | SrcImm) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +static struct opcode group7_rm1[] = { + DI(SrcNone | ModRM | Priv, monitor), + DI(SrcNone | ModRM | Priv, mwait), + N, N, N, N, N, N, +}; + +static struct opcode group7_rm3[] = { + DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), + II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), + DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), + DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), + DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), +}; + +static struct opcode group7_rm7[] = { + N, + DIP(SrcNone | ModRM, rdtscp, check_rdtsc), + N, N, N, N, N, N, +}; static struct opcode group1[] = { - X7(D(Lock)), N + I(Lock, em_add), + I(Lock, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(0, em_cmp), }; static struct opcode group1A[] = { @@ -2366,16 +3073,28 @@ static struct opcode group5[] = { D(SrcMem | ModRM | Stack), N, }; +static struct opcode group6[] = { + DI(ModRM | Prot, sldt), + DI(ModRM | Prot, str), + DI(ModRM | Prot | Priv, lldt), + DI(ModRM | Prot | Priv, ltr), + N, N, N, N, +}; + static struct group_dual group7 = { { - N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), - D(SrcMem | ModRM | ByteOp | Priv | NoAccess), + DI(ModRM | Mov | DstMem | Priv, sgdt), + DI(ModRM | Mov | DstMem | Priv, sidt), + II(ModRM | SrcMem | Priv, em_lgdt, lgdt), + II(ModRM | SrcMem | Priv, em_lidt, lidt), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), + II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), }, { - D(SrcNone | ModRM | Priv | VendorSpecific), N, - N, D(SrcNone | ModRM | Priv | VendorSpecific), - D(SrcNone | ModRM | DstMem | Mov), N, - D(SrcMem16 | ModRM | Mov | Priv), N, + I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), + EXT(0, group7_rm1), + N, EXT(0, group7_rm3), + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), } }; static struct opcode group8[] = { @@ -2394,35 +3113,40 @@ static struct opcode group11[] = { I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), }; +static struct gprefix pfx_0f_6f_0f_7f = { + N, N, N, I(Sse, em_movdqu), +}; + static struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - D6ALU(Lock), + I6ALU(Lock, em_add), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x08 - 0x0F */ - D6ALU(Lock), + I6ALU(Lock, em_or), D(ImplicitOps | Stack | No64), N, /* 0x10 - 0x17 */ - D6ALU(Lock), + I6ALU(Lock, em_adc), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x18 - 0x1F */ - D6ALU(Lock), + I6ALU(Lock, em_sbb), D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), /* 0x20 - 0x27 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_and), N, N, /* 0x28 - 0x2F */ - D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - D6ALU(Lock), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - D6ALU(0), N, N, + I6ALU(0, em_cmp), N, N, /* 0x40 - 0x4F */ X16(D(DstReg)), /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ - X8(D(DstReg | Stack)), + X8(I(DstReg | Stack, em_pop)), /* 0x60 - 0x67 */ - D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), + I(ImplicitOps | Stack | No64, em_pusha), + I(ImplicitOps | Stack | No64, em_popa), N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , N, N, N, N, /* 0x68 - 0x6F */ @@ -2430,8 +3154,8 @@ static struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - D2bv(DstDI | Mov | String), /* insb, insw/insd */ - D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ + D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ + D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), /* 0x80 - 0x87 */ @@ -2446,21 +3170,22 @@ static struct opcode opcode_table[256] = { D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), /* 0x90 - 0x97 */ - X8(D(SrcAcc | DstReg)), + DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, - D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, + II(ImplicitOps | Stack, em_pushf, pushf), + II(ImplicitOps | Stack, em_popf, popf), N, N, /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - D2bv(SrcSI | DstDI | String), + I2bv(SrcSI | DstDI | String, em_cmp), /* 0xA8 - 0xAF */ D2bv(DstAcc | SrcImm), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - D2bv(SrcAcc | DstDI | String), + I2bv(SrcAcc | DstDI | String, em_cmp), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ @@ -2473,7 +3198,8 @@ static struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ N, N, N, D(ImplicitOps | Stack), - D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), + D(ImplicitOps), DI(SrcImmByte, intn), + D(ImplicitOps | No64), DI(ImplicitOps, iret), /* 0xD0 - 0xD7 */ D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), N, N, N, N, @@ -2481,14 +3207,17 @@ static struct opcode opcode_table[256] = { N, N, N, N, N, N, N, N, /* 0xE0 - 0xE7 */ X4(D(SrcImmByte)), - D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), + D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), /* 0xE8 - 0xEF */ D(SrcImm | Stack), D(SrcImm | ImplicitOps), D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), - D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), + D2bvIP(SrcNone | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), /* 0xF0 - 0xF7 */ - N, N, N, N, - D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), + N, DI(ImplicitOps, icebp), N, N, + DI(ImplicitOps | Priv, hlt), D(ImplicitOps), + G(ByteOp, group3), G(0, group3), /* 0xF8 - 0xFF */ D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), @@ -2496,20 +3225,24 @@ static struct opcode opcode_table[256] = { static struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ - N, GD(0, &group7), N, N, - N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, - D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, + G(0, group6), GD(0, &group7), N, N, + N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, + DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, /* 0x10 - 0x1F */ N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, /* 0x20 - 0x2F */ - D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), - D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), + DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), + DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), + DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), + DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), N, N, N, N, N, N, N, N, N, N, N, N, /* 0x30 - 0x3F */ - D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), - D(ImplicitOps | Priv), N, + DI(ImplicitOps | Priv, wrmsr), + IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), + DI(ImplicitOps | Priv, rdmsr), + DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), N, N, N, N, N, N, N, N, N, N, @@ -2518,21 +3251,27 @@ static struct opcode twobyte_table[256] = { /* 0x50 - 0x5F */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0x60 - 0x6F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x70 - 0x7F */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, N, + N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), /* 0x80 - 0x8F */ X16(D(SrcImm)), /* 0x90 - 0x9F */ X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp), + DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), N, N, /* 0xA8 - 0xAF */ D(ImplicitOps | Stack), D(ImplicitOps | Stack), - N, D(DstMem | SrcReg | ModRM | BitOp | Lock), + DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), D(DstMem | SrcReg | Src2ImmByte | ModRM), D(DstMem | SrcReg | Src2CL | ModRM), D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), @@ -2564,10 +3303,13 @@ static struct opcode twobyte_table[256] = { #undef G #undef GD #undef I +#undef GP +#undef EXT #undef D2bv +#undef D2bvIP #undef I2bv -#undef D6ALU +#undef I6ALU static unsigned imm_size(struct decode_cache *c) { @@ -2625,8 +3367,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; - int def_op_bytes, def_ad_bytes, dual, goffset; - struct opcode opcode, *g_mod012, *g_mod3; + int def_op_bytes, def_ad_bytes, goffset, simd_prefix; + bool op_prefix = false; + struct opcode opcode; struct operand memop = { .type = OP_NONE }; c->eip = ctxt->eip; @@ -2634,7 +3377,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->fetch.end = c->fetch.start + insn_len; if (insn_len > 0) memcpy(c->fetch.data, insn, insn_len); - ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); switch (mode) { case X86EMUL_MODE_REAL: @@ -2662,6 +3404,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) for (;;) { switch (c->b = insn_fetch(u8, 1, c->eip)) { case 0x66: /* operand-size override */ + op_prefix = true; /* switch between 2/4 bytes */ c->op_bytes = def_op_bytes ^ 6; break; @@ -2692,10 +3435,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c->lock_prefix = 1; break; case 0xf2: /* REPNE/REPNZ */ - c->rep_prefix = REPNE_PREFIX; - break; case 0xf3: /* REP/REPE/REPZ */ - c->rep_prefix = REPE_PREFIX; + c->rep_prefix = c->b; break; default: goto done_prefixes; @@ -2722,29 +3463,49 @@ done_prefixes: } c->d = opcode.flags; - if (c->d & Group) { - dual = c->d & GroupDual; - c->modrm = insn_fetch(u8, 1, c->eip); - --c->eip; - - if (c->d & GroupDual) { - g_mod012 = opcode.u.gdual->mod012; - g_mod3 = opcode.u.gdual->mod3; - } else - g_mod012 = g_mod3 = opcode.u.group; - - c->d &= ~(Group | GroupDual); - - goffset = (c->modrm >> 3) & 7; + while (c->d & GroupMask) { + switch (c->d & GroupMask) { + case Group: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + opcode = opcode.u.group[goffset]; + break; + case GroupDual: + c->modrm = insn_fetch(u8, 1, c->eip); + --c->eip; + goffset = (c->modrm >> 3) & 7; + if ((c->modrm >> 6) == 3) + opcode = opcode.u.gdual->mod3[goffset]; + else + opcode = opcode.u.gdual->mod012[goffset]; + break; + case RMExt: + goffset = c->modrm & 7; + opcode = opcode.u.group[goffset]; + break; + case Prefix: + if (c->rep_prefix && op_prefix) + return X86EMUL_UNHANDLEABLE; + simd_prefix = op_prefix ? 0x66 : c->rep_prefix; + switch (simd_prefix) { + case 0x00: opcode = opcode.u.gprefix->pfx_no; break; + case 0x66: opcode = opcode.u.gprefix->pfx_66; break; + case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; + case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; + } + break; + default: + return X86EMUL_UNHANDLEABLE; + } - if ((c->modrm >> 6) == 3) - opcode = g_mod3[goffset]; - else - opcode = g_mod012[goffset]; + c->d &= ~GroupMask; c->d |= opcode.flags; } c->execute = opcode.u.execute; + c->check_perm = opcode.check_perm; + c->intercept = opcode.intercept; /* Unrecognised? */ if (c->d == 0 || (c->d & Undefined)) @@ -2763,6 +3524,9 @@ done_prefixes: c->op_bytes = 4; } + if (c->d & Sse) + c->op_bytes = 16; + /* ModRM and SIB bytes. */ if (c->d & ModRM) { rc = decode_modrm(ctxt, ops, &memop); @@ -2776,7 +3540,7 @@ done_prefixes: if (!c->has_seg_override) set_seg_override(c, VCPU_SREG_DS); - memop.addr.mem.seg = seg_override(ctxt, ops, c); + memop.addr.mem.seg = seg_override(ctxt, c); if (memop.type == OP_MEM && c->ad_bytes != 8) memop.addr.mem.ea = (u32)memop.addr.mem.ea; @@ -2792,7 +3556,7 @@ done_prefixes: case SrcNone: break; case SrcReg: - decode_register_operand(&c->src, c, 0); + decode_register_operand(ctxt, &c->src, c, 0); break; case SrcMem16: memop.bytes = 2; @@ -2836,7 +3600,7 @@ done_prefixes: c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->src.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSI]); - c->src.addr.mem.seg = seg_override(ctxt, ops, c), + c->src.addr.mem.seg = seg_override(ctxt, c); c->src.val = 0; break; case SrcImmFAddr: @@ -2883,7 +3647,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ switch (c->d & DstMask) { case DstReg: - decode_register_operand(&c->dst, c, + decode_register_operand(ctxt, &c->dst, c, c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); break; case DstImmUByte: @@ -2926,7 +3690,7 @@ done_prefixes: } done: - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) @@ -2979,12 +3743,51 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + if ((c->d & Sse) + && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) + || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { + rc = emulate_ud(ctxt); + goto done; + } + + if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { + rc = emulate_nm(ctxt); + goto done; + } + + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_PRE_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + /* Privileged instruction can be executed only in CPL=0 */ - if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { + if ((c->d & Priv) && ops->cpl(ctxt)) { rc = emulate_gp(ctxt, 0); goto done; } + /* Instruction can only be executed in protected mode */ + if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { + rc = emulate_ud(ctxt); + goto done; + } + + /* Do instruction specific permission checks */ + if (c->check_perm) { + rc = c->check_perm(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + } + + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_EXCEPT); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->rep_prefix && (c->d & String)) { /* All REP prefixes have the same first termination condition */ if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { @@ -2994,16 +3797,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), - c->src.valptr, c->src.bytes); + rc = segmented_read(ctxt, c->src.addr.mem, + c->src.valptr, c->src.bytes); if (rc != X86EMUL_CONTINUE) goto done; c->src.orig_val64 = c->src.val64; } if (c->src2.type == OP_MEM) { - rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), - &c->src2.val, c->src2.bytes); + rc = segmented_read(ctxt, c->src2.addr.mem, + &c->src2.val, c->src2.bytes); if (rc != X86EMUL_CONTINUE) goto done; } @@ -3014,7 +3817,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { /* optimisation - avoid slow emulated read if Mov */ - rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), + rc = segmented_read(ctxt, c->dst.addr.mem, &c->dst.val, c->dst.bytes); if (rc != X86EMUL_CONTINUE) goto done; @@ -3023,6 +3826,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: + if (unlikely(ctxt->guest_mode) && c->intercept) { + rc = emulator_check_intercept(ctxt, c->intercept, + X86_ICPT_POST_MEMACCESS); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (c->execute) { rc = c->execute(ctxt); if (rc != X86EMUL_CONTINUE) @@ -3034,75 +3844,33 @@ special_insn: goto twobyte_insn; switch (c->b) { - case 0x00 ... 0x05: - add: /* add */ - emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); - break; case 0x06: /* push es */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); break; - case 0x08 ... 0x0d: - or: /* or */ - emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); - break; case 0x0e: /* push cs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); - break; - case 0x10 ... 0x15: - adc: /* adc */ - emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); break; case 0x16: /* push ss */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); break; - case 0x18 ... 0x1d: - sbb: /* sbb */ - emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); - break; case 0x1e: /* push ds */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); break; - case 0x20 ... 0x25: - and: /* and */ - emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); - break; - case 0x28 ... 0x2d: - sub: /* sub */ - emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); - break; - case 0x30 ... 0x35: - xor: /* xor */ - emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); - break; - case 0x38 ... 0x3d: - cmp: /* cmp */ - emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); - break; case 0x40 ... 0x47: /* inc r16/r32 */ emulate_1op("inc", c->dst, ctxt->eflags); break; case 0x48 ... 0x4f: /* dec r16/r32 */ emulate_1op("dec", c->dst, ctxt->eflags); break; - case 0x58 ... 0x5f: /* pop reg */ - pop_instruction: - rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); - break; - case 0x60: /* pusha */ - rc = emulate_pusha(ctxt, ops); - break; - case 0x61: /* popa */ - rc = emulate_popa(ctxt, ops); - break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) goto cannot_emulate; @@ -3121,26 +3889,6 @@ special_insn: if (test_cc(c->b, ctxt->eflags)) jmp_rel(c, c->src.val); break; - case 0x80 ... 0x83: /* Grp1 */ - switch (c->modrm_reg) { - case 0: - goto add; - case 1: - goto or; - case 2: - goto adc; - case 3: - goto sbb; - case 4: - goto and; - case 5: - goto sub; - case 6: - goto xor; - case 7: - goto cmp; - } - break; case 0x84 ... 0x85: test: emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); @@ -3162,7 +3910,7 @@ special_insn: rc = emulate_ud(ctxt); goto done; } - c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); + c->dst.val = get_segment_selector(ctxt, c->modrm_reg); break; case 0x8d: /* lea r16/r32, m */ c->dst.val = c->src.addr.mem.ea; @@ -3187,7 +3935,7 @@ special_insn: break; } case 0x8f: /* pop (sole member of Grp1a) */ - rc = emulate_grp1a(ctxt, ops); + rc = em_grp1a(ctxt); break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) @@ -3200,31 +3948,17 @@ special_insn: case 8: c->dst.val = (s32)c->dst.val; break; } break; - case 0x9c: /* pushf */ - c->src.val = (unsigned long) ctxt->eflags; - emulate_push(ctxt, ops); - break; - case 0x9d: /* popf */ - c->dst.type = OP_REG; - c->dst.addr.reg = &ctxt->eflags; - c->dst.bytes = c->op_bytes; - rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); - break; - case 0xa6 ... 0xa7: /* cmps */ - c->dst.type = OP_NONE; /* Disable writeback. */ - goto cmp; case 0xa8 ... 0xa9: /* test ax, imm */ goto test; - case 0xae ... 0xaf: /* scas */ - goto cmp; case 0xc0 ... 0xc1: - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xc3: /* ret */ c->dst.type = OP_REG; c->dst.addr.reg = &c->eip; c->dst.bytes = c->op_bytes; - goto pop_instruction; + rc = em_pop(ctxt); + break; case 0xc4: /* les */ rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); break; @@ -3252,11 +3986,11 @@ special_insn: rc = emulate_iret(ctxt, ops); break; case 0xd0 ... 0xd1: /* Grp2 */ - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xd2 ... 0xd3: /* Grp2 */ c->src.val = c->regs[VCPU_REGS_RCX]; - emulate_grp2(ctxt); + rc = em_grp2(ctxt); break; case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); @@ -3278,23 +4012,14 @@ special_insn: long int rel = c->src.val; c->src.val = (unsigned long) c->eip; jmp_rel(c, rel); - emulate_push(ctxt, ops); + rc = em_push(ctxt); break; } case 0xe9: /* jmp rel */ goto jmp; - case 0xea: { /* jmp far */ - unsigned short sel; - jump_far: - memcpy(&sel, c->src.valptr + c->op_bytes, 2); - - if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) - goto done; - - c->eip = 0; - memcpy(&c->eip, c->src.valptr, c->op_bytes); + case 0xea: /* jmp far */ + rc = em_jmp_far(ctxt); break; - } case 0xeb: jmp: /* jmp rel short */ jmp_rel(c, c->src.val); @@ -3304,11 +4029,6 @@ special_insn: case 0xed: /* in (e/r)ax,dx */ c->src.val = c->regs[VCPU_REGS_RDX]; do_io_in: - c->dst.bytes = min(c->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, &c->dst.val)) goto done; /* IO is needed */ @@ -3317,25 +4037,19 @@ special_insn: case 0xef: /* out dx,(e/r)ax */ c->dst.val = c->regs[VCPU_REGS_RDX]; do_io_out: - c->src.bytes = min(c->src.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->dst.val, - c->src.bytes)) { - rc = emulate_gp(ctxt, 0); - goto done; - } - ops->pio_out_emulated(c->src.bytes, c->dst.val, - &c->src.val, 1, ctxt->vcpu); + ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, + &c->src.val, 1); c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ - ctxt->vcpu->arch.halt_request = 1; + ctxt->ops->halt(ctxt); break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ ctxt->eflags ^= EFLG_CF; break; case 0xf6 ... 0xf7: /* Grp3 */ - rc = emulate_grp3(ctxt, ops); + rc = em_grp3(ctxt); break; case 0xf8: /* clc */ ctxt->eflags &= ~EFLG_CF; @@ -3366,13 +4080,11 @@ special_insn: ctxt->eflags |= EFLG_DF; break; case 0xfe: /* Grp4 */ - grp45: - rc = emulate_grp45(ctxt, ops); + rc = em_grp45(ctxt); break; case 0xff: /* Grp5 */ - if (c->modrm_reg == 5) - goto jump_far; - goto grp45; + rc = em_grp45(ctxt); + break; default: goto cannot_emulate; } @@ -3381,7 +4093,7 @@ special_insn: goto done; writeback: - rc = writeback(ctxt, ops); + rc = writeback(ctxt); if (rc != X86EMUL_CONTINUE) goto done; @@ -3392,7 +4104,7 @@ writeback: c->dst.type = saved_dst_type; if ((c->d & SrcMask) == SrcSI) - string_addr_inc(ctxt, seg_override(ctxt, ops, c), + string_addr_inc(ctxt, seg_override(ctxt, c), VCPU_REGS_RSI, &c->src); if ((c->d & DstMask) == DstDI) @@ -3427,115 +4139,34 @@ writeback: done: if (rc == X86EMUL_PROPAGATE_FAULT) ctxt->have_exception = true; + if (rc == X86EMUL_INTERCEPTED) + return EMULATION_INTERCEPTED; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; twobyte_insn: switch (c->b) { - case 0x01: /* lgdt, lidt, lmsw */ - switch (c->modrm_reg) { - u16 size; - unsigned long address; - - case 0: /* vmcall */ - if (c->modrm_mod != 3 || c->modrm_rm != 1) - goto cannot_emulate; - - rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - - /* Let the processor re-execute the fixed hypercall */ - c->eip = ctxt->eip; - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 2: /* lgdt */ - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - realmode_lgdt(ctxt->vcpu, size, address); - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3) { - switch (c->modrm_rm) { - case 1: - rc = kvm_fix_hypercall(ctxt->vcpu); - break; - default: - goto cannot_emulate; - } - } else { - rc = read_descriptor(ctxt, ops, c->src.addr.mem, - &size, &address, - c->op_bytes); - if (rc != X86EMUL_CONTINUE) - goto done; - realmode_lidt(ctxt->vcpu, size, address); - } - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - case 4: /* smsw */ - c->dst.bytes = 2; - c->dst.val = ops->get_cr(0, ctxt->vcpu); - break; - case 6: /* lmsw */ - ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) | - (c->src.val & 0x0f), ctxt->vcpu); - c->dst.type = OP_NONE; - break; - case 5: /* not defined */ - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - case 7: /* invlpg*/ - emulate_invlpg(ctxt->vcpu, - linear(ctxt, c->src.addr.mem)); - /* Disable writeback. */ - c->dst.type = OP_NONE; - break; - default: - goto cannot_emulate; - } - break; case 0x05: /* syscall */ rc = emulate_syscall(ctxt, ops); break; case 0x06: - emulate_clts(ctxt->vcpu); + rc = em_clts(ctxt); break; case 0x09: /* wbinvd */ - kvm_emulate_wbinvd(ctxt->vcpu); + (ctxt->ops->wbinvd)(ctxt); break; case 0x08: /* invd */ case 0x0d: /* GrpP (prefetch) */ case 0x18: /* Grp16 (prefetch/nop) */ break; case 0x20: /* mov cr, reg */ - switch (c->modrm_reg) { - case 1: - case 5 ... 7: - case 9 ... 15: - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); + c->dst.val = ops->get_cr(ctxt, c->modrm_reg); break; case 0x21: /* mov from dr to reg */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); + ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); break; case 0x22: /* mov reg, cr */ - if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { + if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3543,16 +4174,9 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - emulate_ud(ctxt); - rc = X86EMUL_PROPAGATE_FAULT; - goto done; - } - - if (ops->set_dr(c->modrm_reg, c->src.val & + if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & ((ctxt->mode == X86EMUL_MODE_PROT64) ? - ~0ULL : ~0U), ctxt->vcpu) < 0) { + ~0ULL : ~0U)) < 0) { /* #UD condition is already handled by the code above */ emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; @@ -3565,7 +4189,7 @@ twobyte_insn: /* wrmsr */ msr_data = (u32)c->regs[VCPU_REGS_RAX] | ((u64)c->regs[VCPU_REGS_RDX] << 32); - if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { + if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3574,7 +4198,7 @@ twobyte_insn: break; case 0x32: /* rdmsr */ - if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { + if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { emulate_gp(ctxt, 0); rc = X86EMUL_PROPAGATE_FAULT; goto done; @@ -3603,7 +4227,7 @@ twobyte_insn: c->dst.val = test_cc(c->b, ctxt->eflags); break; case 0xa0: /* push fs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); break; case 0xa1: /* pop fs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); @@ -3620,7 +4244,7 @@ twobyte_insn: emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); break; case 0xa8: /* push gs */ - emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); break; case 0xa9: /* pop gs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); @@ -3727,7 +4351,7 @@ twobyte_insn: (u64) c->src.val; break; case 0xc7: /* Grp9 (cmpxchg8b) */ - rc = emulate_grp9(ctxt, ops); + rc = em_grp9(ctxt); break; default: goto cannot_emulate; @@ -3739,5 +4363,5 @@ twobyte_insn: goto writeback; cannot_emulate: - return -1; + return EMULATION_FAILED; } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 46d08ca0b48f..51a97426e791 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -33,7 +33,6 @@ struct kvm_kpit_state { }; struct kvm_pit { - unsigned long base_addresss; struct kvm_io_device dev; struct kvm_io_device speaker_dev; struct kvm *kvm; @@ -51,7 +50,6 @@ struct kvm_pit { #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 #define KVM_PIT_CHANNEL_MASK 0x3 -void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); void kvm_free_pit(struct kvm *kvm); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ba910d149410..53e2d084bffb 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm); void kvm_destroy_pic(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); -void kvm_pic_clear_isr_ack(struct kvm *kvm); static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) { @@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_timers(struct kvm_vcpu *vcpu); -int pit_has_pending_timer(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 22fae7593ee7..28418054b880 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) static void nonpaging_update_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, - const void *pte, unsigned long mmu_seq) + const void *pte) { WARN_ON(1); } @@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, } static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp, - u64 *spte, - const void *new, unsigned long mmu_seq) + struct kvm_mmu_page *sp, u64 *spte, + const void *new) { if (sp->role.level != PT_PAGE_TABLE_LEVEL) { ++vcpu->kvm->stat.mmu_pde_zapped; @@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); + vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); } static bool need_remote_flush(u64 old, u64 new) @@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); - unsigned long mmu_seq; u64 entry, gentry, *spte; unsigned pte_size, page_offset, misaligned, quadrant, offset; int level, npte, invlpg_counter, r, flooded = 0; @@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - spin_lock(&vcpu->kvm->mmu_lock); if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) gentry = 0; @@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (gentry && !((sp->role.word ^ vcpu->arch.mmu.base_role.word) & mask.word)) - mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, - mmu_seq); + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); if (!remote_flush && need_remote_flush(entry, *spte)) remote_flush = true; ++spte; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index c6397795d865..6c4dc010c4cb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } -static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, - gfn_t table_gfn, unsigned index, - pt_element_t orig_pte, pt_element_t new_pte) +static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + pt_element_t __user *ptep_user, unsigned index, + pt_element_t orig_pte, pt_element_t new_pte) { + int npages; pt_element_t ret; pt_element_t *table; struct page *page; - page = gfn_to_page(kvm, table_gfn); + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + /* Check if the user is doing something meaningless. */ + if (unlikely(npages != 1)) + return -EFAULT; table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); @@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gva_t addr, u32 access) { pt_element_t pte; + pt_element_t __user *ptep_user; gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; @@ -152,6 +157,9 @@ walk: pt_access = ACC_ALL; for (;;) { + gfn_t real_gfn; + unsigned long host_addr; + index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); @@ -160,43 +168,64 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, - offset, sizeof(pte), - PFERR_USER_MASK|PFERR_WRITE_MASK)) { + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (unlikely(real_gfn == UNMAPPED_GVA)) { + present = false; + break; + } + real_gfn = gpa_to_gfn(real_gfn); + + host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + if (unlikely(kvm_is_error_hva(host_addr))) { + present = false; + break; + } + + ptep_user = (pt_element_t __user *)((void *)host_addr + offset); + if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { present = false; break; } trace_kvm_mmu_paging_element(pte, walker->level); - if (!is_present_gpte(pte)) { + if (unlikely(!is_present_gpte(pte))) { present = false; break; } - if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { + if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, + walker->level))) { rsvd_fault = true; break; } - if (write_fault && !is_writable_pte(pte)) - if (user_fault || is_write_protection(vcpu)) - eperm = true; + if (unlikely(write_fault && !is_writable_pte(pte) + && (user_fault || is_write_protection(vcpu)))) + eperm = true; - if (user_fault && !(pte & PT_USER_MASK)) + if (unlikely(user_fault && !(pte & PT_USER_MASK))) eperm = true; #if PTTYPE == 64 - if (fetch_fault && (pte & PT64_NX_MASK)) + if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) eperm = true; #endif - if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { + if (!eperm && !rsvd_fault + && unlikely(!(pte & PT_ACCESSED_MASK))) { + int ret; trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, - index, pte, pte|PT_ACCESSED_MASK)) + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_ACCESSED_MASK); + if (unlikely(ret < 0)) { + present = false; + break; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_ACCESSED_MASK; } @@ -241,17 +270,21 @@ walk: --walker->level; } - if (!present || eperm || rsvd_fault) + if (unlikely(!present || eperm || rsvd_fault)) goto error; - if (write_fault && !is_dirty_gpte(pte)) { - bool ret; + if (write_fault && unlikely(!is_dirty_gpte(pte))) { + int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, - pte|PT_DIRTY_MASK); - if (ret) + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, + pte, pte|PT_DIRTY_MASK); + if (unlikely(ret < 0)) { + present = false; + goto error; + } else if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_DIRTY_MASK; walker->ptes[walker->level - 1] = pte; @@ -325,7 +358,7 @@ no_present: } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte, unsigned long mmu_seq) + u64 *spte, const void *pte) { pt_element_t gpte; unsigned pte_access; @@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, kvm_release_pfn_clean(pfn); return; } - if (mmu_notifier_retry(vcpu, mmu_seq)) - return; /* * we call mmu_set_spte() with host_writable = true because that diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6bb15d583e47..506e4fe23adc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -63,6 +63,10 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define TSC_RATIO_RSVD 0xffffff0000000000ULL +#define TSC_RATIO_MIN 0x0000000000000001ULL +#define TSC_RATIO_MAX 0x000000ffffffffffULL + static bool erratum_383_found __read_mostly; static const u32 host_save_user_msrs[] = { @@ -93,14 +97,6 @@ struct nested_state { /* A VMEXIT is required but not yet emulated */ bool exit_required; - /* - * If we vmexit during an instruction emulation we need this to restore - * the l1 guest rip after the emulation - */ - unsigned long vmexit_rip; - unsigned long vmexit_rsp; - unsigned long vmexit_rax; - /* cache for intercepts of the guest */ u32 intercept_cr; u32 intercept_dr; @@ -144,8 +140,13 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; u32 apf_reason; + + u64 tsc_ratio; }; +static DEFINE_PER_CPU(u64, current_tsc_ratio); +#define TSC_RATIO_DEFAULT 0x0100000000ULL + #define MSR_INVALID 0xffffffffU static struct svm_direct_access_msrs { @@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); +static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -376,7 +378,6 @@ struct svm_cpu_data { }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); -static uint32_t svm_features; struct svm_init_data { int cpu; @@ -569,6 +570,10 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { + /* Make sure we clean up behind us */ + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + cpu_svm_disable(); } @@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; + } + svm_init_erratum_383(); return 0; @@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); + if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 max; + + kvm_has_tsc_control = true; + + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines and a value of 0 is used to disable + * tsc-scaling for the vcpu. + */ + max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); + + kvm_max_guest_tsc_khz = max; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); @@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void) goto err; } - svm_features = cpuid_edx(SVM_CPUID_FUNC); - if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; @@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } +static u64 __scale_tsc(u64 ratio, u64 tsc) +{ + u64 mult, frac, _tsc; + + mult = ratio >> 32; + frac = ratio & ((1ULL << 32) - 1); + + _tsc = tsc; + _tsc *= mult; + _tsc += (tsc >> 32) * frac; + _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; + + return _tsc; +} + +static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 _tsc = tsc; + + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) + _tsc = __scale_tsc(svm->tsc_ratio, tsc); + + return _tsc; +} + +static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 ratio; + u64 khz; + + /* TSC scaling supported? */ + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) + return; + + /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ + if (user_tsc_khz == 0) { + vcpu->arch.virtual_tsc_khz = 0; + svm->tsc_ratio = TSC_RATIO_DEFAULT; + return; + } + + khz = user_tsc_khz; + + /* TSC scaling required - calculate ratio */ + ratio = khz << 32; + do_div(ratio, tsc_khz); + + if (ratio == 0 || ratio & TSC_RATIO_RSVD) { + WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return; + } + vcpu->arch.virtual_tsc_khz = user_tsc_khz; + svm->tsc_ratio = ratio; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } +static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = svm_scale_tsc(vcpu, native_read_tsc()); + + return target_tsc - tsc; +} + static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; save->dr7 = 0x400; - save->rflags = 2; + kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->tsc_ratio = TSC_RATIO_DEFAULT; + err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + + if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && + svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { + __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; + wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_guest_mode(vcpu)) { - /* - * We are here because we run in nested mode, the host kvm - * intercepts cr0 writes but the l1 hypervisor does not. - * But the L1 hypervisor may intercept selective cr0 writes. - * This needs to be checked here. - */ - unsigned long old, new; - - /* Remove bits that would trigger a real cr0 write intercept */ - old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; - new = cr0 & SVM_CR0_SELECTIVE_MASK; - - if (old == new) { - /* cr0 write with ts and mp unchanged */ - svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { - svm->nested.vmexit_rip = kvm_rip_read(vcpu); - svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); - return; - } - } - } - #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { @@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; - nested_vmcb->save.rflags = vmcb->save.rflags; + nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); nested_vmcb->save.rip = vmcb->save.rip; nested_vmcb->save.rsp = vmcb->save.rsp; nested_vmcb->save.rax = vmcb->save.rax; @@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.ds = hsave->save.ds; svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; - svm->vmcb->save.rflags = hsave->save.rflags; + kvm_set_rflags(&svm->vcpu, hsave->save.rflags); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); @@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) hsave->save.efer = svm->vcpu.arch.efer; hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); hsave->save.cr4 = svm->vcpu.arch.cr4; - hsave->save.rflags = vmcb->save.rflags; + hsave->save.rflags = kvm_get_rflags(&svm->vcpu); hsave->save.rip = kvm_rip_read(&svm->vcpu); hsave->save.rsp = vmcb->save.rsp; hsave->save.rax = vmcb->save.rax; @@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) copy_vmcb_control_area(hsave, vmcb); - if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) svm->vcpu.arch.hflags |= HF_HIF_MASK; else svm->vcpu.arch.hflags &= ~HF_HIF_MASK; @@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->vmcb->save.ds = nested_vmcb->save.ds; svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; svm->vmcb->save.idtr = nested_vmcb->save.idtr; - svm->vmcb->save.rflags = nested_vmcb->save.rflags; + kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); @@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_unmap(page); @@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_unmap(page); @@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +{ + unsigned long cr0 = svm->vcpu.arch.cr0; + bool ret = false; + u64 intercept; + + intercept = svm->nested.intercept; + + if (!is_guest_mode(&svm->vcpu) || + (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) + return false; + + cr0 &= ~SVM_CR0_SELECTIVE_MASK; + val &= ~SVM_CR0_SELECTIVE_MASK; + + if (cr0 ^ val) { + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); + } + + return ret; +} + #define CR_VALID (1ULL << 63) static int cr_interception(struct vcpu_svm *svm) @@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm) val = kvm_register_read(&svm->vcpu, reg); switch (cr) { case 0: - err = kvm_set_cr0(&svm->vcpu, val); + if (!check_selective_cr0_intercepted(svm, val)) + err = kvm_set_cr0(&svm->vcpu, val); + else + return 1; + break; case 3: err = kvm_set_cr3(&svm->vcpu, val); @@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm) return 1; } -static int cr0_write_interception(struct vcpu_svm *svm) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - int r; - - r = cr_interception(svm); - - if (svm->nested.vmexit_rip) { - kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); - kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); - kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); - svm->nested.vmexit_rip = 0; - } - - return r; -} - static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_TSC: { struct vmcb *vmcb = get_host_vmcb(svm); - *data = vmcb->control.tsc_offset + native_read_tsc(); + *data = vmcb->control.tsc_offset + + svm_scale_tsc(vcpu, native_read_tsc()); + break; } case MSR_STAR: @@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, - [SVM_EXIT_WRITE_CR0] = cr0_write_interception, + [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, [SVM_EXIT_WRITE_CR8] = cr8_write_interception, @@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_NPF] = pf_interception, }; -void dump_vmcb(struct kvm_vcpu *vcpu) +static void dump_vmcb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; pr_err("VMCB Control Area:\n"); - pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); - pr_err("cr_write: %04x\n", control->intercept_cr >> 16); - pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); - pr_err("dr_write: %04x\n", control->intercept_dr >> 16); - pr_err("exceptions: %08x\n", control->intercept_exceptions); - pr_err("intercepts: %016llx\n", control->intercept); - pr_err("pause filter count: %d\n", control->pause_filter_count); - pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); - pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); - pr_err("tsc_offset: %016llx\n", control->tsc_offset); - pr_err("asid: %d\n", control->asid); - pr_err("tlb_ctl: %d\n", control->tlb_ctl); - pr_err("int_ctl: %08x\n", control->int_ctl); - pr_err("int_vector: %08x\n", control->int_vector); - pr_err("int_state: %08x\n", control->int_state); - pr_err("exit_code: %08x\n", control->exit_code); - pr_err("exit_info1: %016llx\n", control->exit_info_1); - pr_err("exit_info2: %016llx\n", control->exit_info_2); - pr_err("exit_int_info: %08x\n", control->exit_int_info); - pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); - pr_err("nested_ctl: %lld\n", control->nested_ctl); - pr_err("nested_cr3: %016llx\n", control->nested_cr3); - pr_err("event_inj: %08x\n", control->event_inj); - pr_err("event_inj_err: %08x\n", control->event_inj_err); - pr_err("lbr_ctl: %lld\n", control->lbr_ctl); - pr_err("next_rip: %016llx\n", control->next_rip); + pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); + pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); + pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); + pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); + pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); + pr_err("%-20s%016llx\n", "intercepts:", control->intercept); + pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); + pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); + pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); + pr_err("%-20s%d\n", "asid:", control->asid); + pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); + pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); + pr_err("%-20s%08x\n", "int_vector:", control->int_vector); + pr_err("%-20s%08x\n", "int_state:", control->int_state); + pr_err("%-20s%08x\n", "exit_code:", control->exit_code); + pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); + pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); + pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); + pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); + pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); + pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); + pr_err("%-20s%08x\n", "event_inj:", control->event_inj); + pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); + pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); + pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("VMCB State Save Area:\n"); - pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", - save->es.selector, save->es.attrib, - save->es.limit, save->es.base); - pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", - save->cs.selector, save->cs.attrib, - save->cs.limit, save->cs.base); - pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", - save->ss.selector, save->ss.attrib, - save->ss.limit, save->ss.base); - pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", - save->ds.selector, save->ds.attrib, - save->ds.limit, save->ds.base); - pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", - save->fs.selector, save->fs.attrib, - save->fs.limit, save->fs.base); - pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", - save->gs.selector, save->gs.attrib, - save->gs.limit, save->gs.base); - pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->gdtr.selector, save->gdtr.attrib, - save->gdtr.limit, save->gdtr.base); - pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->ldtr.selector, save->ldtr.attrib, - save->ldtr.limit, save->ldtr.base); - pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->idtr.selector, save->idtr.attrib, - save->idtr.limit, save->idtr.base); - pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", - save->tr.selector, save->tr.attrib, - save->tr.limit, save->tr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "es:", + save->es.selector, save->es.attrib, + save->es.limit, save->es.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "cs:", + save->cs.selector, save->cs.attrib, + save->cs.limit, save->cs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ss:", + save->ss.selector, save->ss.attrib, + save->ss.limit, save->ss.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ds:", + save->ds.selector, save->ds.attrib, + save->ds.limit, save->ds.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "fs:", + save->fs.selector, save->fs.attrib, + save->fs.limit, save->fs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gs:", + save->gs.selector, save->gs.attrib, + save->gs.limit, save->gs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gdtr:", + save->gdtr.selector, save->gdtr.attrib, + save->gdtr.limit, save->gdtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ldtr:", + save->ldtr.selector, save->ldtr.attrib, + save->ldtr.limit, save->ldtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "idtr:", + save->idtr.selector, save->idtr.attrib, + save->idtr.limit, save->idtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "tr:", + save->tr.selector, save->tr.attrib, + save->tr.limit, save->tr.base); pr_err("cpl: %d efer: %016llx\n", save->cpl, save->efer); - pr_err("cr0: %016llx cr2: %016llx\n", - save->cr0, save->cr2); - pr_err("cr3: %016llx cr4: %016llx\n", - save->cr3, save->cr4); - pr_err("dr6: %016llx dr7: %016llx\n", - save->dr6, save->dr7); - pr_err("rip: %016llx rflags: %016llx\n", - save->rip, save->rflags); - pr_err("rsp: %016llx rax: %016llx\n", - save->rsp, save->rax); - pr_err("star: %016llx lstar: %016llx\n", - save->star, save->lstar); - pr_err("cstar: %016llx sfmask: %016llx\n", - save->cstar, save->sfmask); - pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", - save->kernel_gs_base, save->sysenter_cs); - pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", - save->sysenter_esp, save->sysenter_eip); - pr_err("gpat: %016llx dbgctl: %016llx\n", - save->g_pat, save->dbgctl); - pr_err("br_from: %016llx br_to: %016llx\n", - save->br_from, save->br_to); - pr_err("excp_from: %016llx excp_to: %016llx\n", - save->last_excp_from, save->last_excp_to); - + pr_err("%-15s %016llx %-13s %016llx\n", + "cr0:", save->cr0, "cr2:", save->cr2); + pr_err("%-15s %016llx %-13s %016llx\n", + "cr3:", save->cr3, "cr4:", save->cr4); + pr_err("%-15s %016llx %-13s %016llx\n", + "dr6:", save->dr6, "dr7:", save->dr7); + pr_err("%-15s %016llx %-13s %016llx\n", + "rip:", save->rip, "rflags:", save->rflags); + pr_err("%-15s %016llx %-13s %016llx\n", + "rsp:", save->rsp, "rax:", save->rax); + pr_err("%-15s %016llx %-13s %016llx\n", + "star:", save->star, "lstar:", save->lstar); + pr_err("%-15s %016llx %-13s %016llx\n", + "cstar:", save->cstar, "sfmask:", save->sfmask); + pr_err("%-15s %016llx %-13s %016llx\n", + "kernel_gs_base:", save->kernel_gs_base, + "sysenter_cs:", save->sysenter_cs); + pr_err("%-15s %016llx %-13s %016llx\n", + "sysenter_esp:", save->sysenter_esp, + "sysenter_eip:", save->sysenter_eip); + pr_err("%-15s %016llx %-13s %016llx\n", + "gpat:", save->g_pat, "dbgctl:", save->dbgctl); + pr_err("%-15s %016llx %-13s %016llx\n", + "br_from:", save->br_from, "br_to:", save->br_to); + pr_err("%-15s %016llx %-13s %016llx\n", + "excp_from:", save->last_excp_from, + "excp_to:", save->last_excp_to); } static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) @@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) return 0; - ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); if (is_guest_mode(vcpu)) return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); @@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +#define PRE_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_PRE_EXCEPT, } +#define POST_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_EXCEPT, } +#define POST_MEM(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_MEMACCESS, } + +static struct __x86_intercept { + u32 exit_code; + enum x86_intercept_stage stage; +} x86_intercept_map[] = { + [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), + [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), + [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), + [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), + [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), + [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), + [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), + [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), + [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), + [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), + [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), + [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), + [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), + [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), + [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), + [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), + [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), + [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), + [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), + [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), + [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), + [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), + [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), + [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), + [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), + [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), + [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), + [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), + [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), + [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), + [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), + [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), + [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), + [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), + [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), + [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), +}; + +#undef PRE_EX +#undef POST_EX +#undef POST_MEM + +static int svm_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + struct vcpu_svm *svm = to_svm(vcpu); + int vmexit, ret = X86EMUL_CONTINUE; + struct __x86_intercept icpt_info; + struct vmcb *vmcb = svm->vmcb; + + if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) + goto out; + + icpt_info = x86_intercept_map[info->intercept]; + + if (stage != icpt_info.stage) + goto out; + + switch (icpt_info.exit_code) { + case SVM_EXIT_READ_CR0: + if (info->intercept == x86_intercept_cr_read) + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_WRITE_CR0: { + unsigned long cr0, val; + u64 intercept; + + if (info->intercept == x86_intercept_cr_write) + icpt_info.exit_code += info->modrm_reg; + + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) + break; + + intercept = svm->nested.intercept; + + if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) + break; + + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; + + if (info->intercept == x86_intercept_lmsw) { + cr0 &= 0xfUL; + val &= 0xfUL; + /* lmsw can't clear PE - catch this here */ + if (cr0 & X86_CR0_PE) + val |= X86_CR0_PE; + } + + if (cr0 ^ val) + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; + + break; + } + case SVM_EXIT_READ_DR0: + case SVM_EXIT_WRITE_DR0: + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_MSR: + if (info->intercept == x86_intercept_wrmsr) + vmcb->control.exit_info_1 = 1; + else + vmcb->control.exit_info_1 = 0; + break; + case SVM_EXIT_PAUSE: + /* + * We get this for NOP only, but pause + * is rep not, check this here + */ + if (info->rep_prefix != REPE_PREFIX) + goto out; + case SVM_EXIT_IOIO: { + u64 exit_info; + u32 bytes; + + exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + exit_info |= SVM_IOIO_TYPE_MASK; + bytes = info->src_bytes; + } else { + bytes = info->dst_bytes; + } + + if (info->intercept == x86_intercept_outs || + info->intercept == x86_intercept_ins) + exit_info |= SVM_IOIO_STR_MASK; + + if (info->rep_prefix) + exit_info |= SVM_IOIO_REP_MASK; + + bytes = min(bytes, 4u); + + exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; + + exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); + + vmcb->control.exit_info_1 = exit_info; + vmcb->control.exit_info_2 = info->next_rip; + + break; + } + default: + break; + } + + vmcb->control.next_rip = info->next_rip; + vmcb->control.exit_code = icpt_info.exit_code; + vmexit = nested_svm_exit_handled(svm); + + ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED + : X86EMUL_CONTINUE; + +out: + return ret; +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = { .has_wbinvd_exit = svm_has_wbinvd_exit, + .set_tsc_khz = svm_set_tsc_khz, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, + .compute_tsc_offset = svm_compute_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, + + .check_intercept = svm_check_intercept, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5b4cdcbd154c..4c3fa0f67469 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -128,8 +128,11 @@ struct vcpu_vmx { unsigned long host_rsp; int launched; u8 fail; + u8 cpl; + bool nmi_known_unmasked; u32 exit_intr_info; u32 idt_vectoring_info; + ulong rflags; struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; @@ -159,6 +162,10 @@ struct vcpu_vmx { u32 ar; } tr, es, ds, fs, gs; } rmode; + struct { + u32 bitmask; /* 4 bits per segment (1 bit per field) */ + struct kvm_save_segment seg[8]; + } segment_cache; int vpid; bool emulation_required; @@ -171,6 +178,15 @@ struct vcpu_vmx { bool rdtscp_enabled; }; +enum segment_cache_field { + SEG_FIELD_SEL = 0, + SEG_FIELD_BASE = 1, + SEG_FIELD_LIMIT = 2, + SEG_FIELD_AR = 3, + + SEG_FIELD_NR = 4 +}; + static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_vmx, vcpu); @@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + +static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, + unsigned field) +{ + bool ret; + u32 mask = 1 << (seg * SEG_FIELD_NR + field); + + if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { + vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); + vmx->segment_cache.bitmask = 0; + } + ret = vmx->segment_cache.bitmask & mask; + vmx->segment_cache.bitmask |= mask; + return ret; +} + +static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) +{ + u16 *p = &vmx->segment_cache.seg[seg].selector; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) + *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); + return *p; +} + +static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) +{ + ulong *p = &vmx->segment_cache.seg[seg].base; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) + *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); + return *p; +} + +static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].limit; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); + return *p; +} + +static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) +{ + u32 *p = &vmx->segment_cache.seg[seg].ar; + + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) + *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); + return *p; +} + static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; @@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags, save_rflags; - rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) { - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = to_vmx(vcpu)->rmode.save_rflags; - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + rflags = vmcs_readl(GUEST_RFLAGS); + if (to_vmx(vcpu)->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = to_vmx(vcpu)->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } + to_vmx(vcpu)->rflags = rflags; } - return rflags; + return to_vmx(vcpu)->rflags; } static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; @@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, } if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) + int inc_eip = 0; + if (kvm_exception_is_soft(nr)) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void) } /* + * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ + * ioctl. In this case the call-back should update internal vmx state to make + * the changes effective. + */ +static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +{ + /* Nothing to do here */ +} + +/* * writes 'offset' into guest's timestamp counter offset register */ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) @@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) vmcs_write64(TSC_OFFSET, offset + adjustment); } +static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + return target_tsc - native_read_tsc(); +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) break; #ifdef CONFIG_X86_64 case MSR_FS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_FS_BASE, data); break; case MSR_GS_BASE: + vmx_segment_cache_clear(vmx); vmcs_writel(GUEST_GS_BASE, data); break; case MSR_KERNEL_GS_BASE: @@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->emulation_required = 1; vmx->rmode.vm86_active = 0; + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); @@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); + vmx_segment_cache_clear(vmx); + vmcs_write16(GUEST_SS_SELECTOR, 0); vmcs_write32(GUEST_SS_AR_BYTES, 0x93); @@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); } + vmx_segment_cache_clear(vmx); + vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); @@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) { u32 guest_tr_ar; + vmx_segment_cache_clear(to_vmx(vcpu)); + guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { printk(KERN_DEBUG "%s: tss fixup for long mode. \n", @@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static u64 construct_eptp(unsigned long root_hpa) @@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_save_segment *save; u32 ar; @@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = save->limit; ar = save->ar; if (seg == VCPU_SREG_TR - || var->selector == vmcs_read16(sf->selector)) + || var->selector == vmx_read_guest_seg_selector(vmx, seg)) goto use_saved_rmode_seg; } - var->base = vmcs_readl(sf->base); - var->limit = vmcs_read32(sf->limit); - var->selector = vmcs_read16(sf->selector); - ar = vmcs_read32(sf->ar_bytes); + var->base = vmx_read_guest_seg_base(vmx, seg); + var->limit = vmx_read_guest_seg_limit(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + ar = vmx_read_guest_seg_ar(vmx, seg); use_saved_rmode_seg: if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; @@ -2098,27 +2205,37 @@ use_saved_rmode_seg: static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_segment s; if (to_vmx(vcpu)->rmode.vm86_active) { vmx_get_segment(vcpu, &s, seg); return s.base; } - return vmcs_readl(sf->base); + return vmx_read_guest_seg_base(to_vmx(vcpu), seg); } -static int vmx_get_cpl(struct kvm_vcpu *vcpu) +static int __vmx_get_cpl(struct kvm_vcpu *vcpu) { if (!is_protmode(vcpu)) return 0; - if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + if (!is_long_mode(vcpu) + && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; - return vmcs_read16(GUEST_CS_SELECTOR) & 3; + return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; } +static int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { + __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); + } + return to_vmx(vcpu)->cpl; +} + + static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; @@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; + vmx_segment_cache_clear(vmx); + if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { vmcs_write16(sf->selector, var->selector); vmx->rmode.tr.selector = var->selector; @@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, ar |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, ar); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { - u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); + u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); *db = (ar >> 14) & 1; *l = (ar >> 13) & 1; @@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) if (ret != 0) goto out; + vmx_segment_cache_clear(vmx); + seg_setup(VCPU_SREG_CS); /* * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode @@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) ++vcpu->stat.irq_injections; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) + int inc_eip = 0; + if (vcpu->arch.interrupt.soft) + inc_eip = vcpu->arch.event_exit_inst_len; + if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) } ++vcpu->stat.nmi_injections; + vmx->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) + if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return; } @@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { if (!cpu_has_virtual_nmis()) return to_vmx(vcpu)->soft_vnmi_blocked; + if (to_vmx(vcpu)->nmi_known_unmasked) + return false; return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; } @@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) vmx->vnmi_blocked_time = 0; } } else { + vmx->nmi_known_unmasked = !masked; if (masked) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) enum emulation_result er; vect_info = vmx->idt_vectoring_info; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + intr_info = vmx->exit_intr_info; if (is_machine_check(intr_info)) return handle_machine_check(vcpu); @@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) } error_code = 0; - rip = kvm_rip_read(vcpu); if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { @@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) vmx->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); kvm_run->exit_reason = KVM_EXIT_DEBUG; + rip = kvm_rip_read(vcpu); kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; kvm_run->debug.arch.exception = ex_no; break; @@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) switch (type) { case INTR_TYPE_NMI_INTR: vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + vmx_set_nmi_mask(vcpu, true); break; case INTR_TYPE_EXT_INTR: case INTR_TYPE_SOFT_INTR: @@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; + + if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY + || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) + return; + + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + exit_intr_info = vmx->exit_intr_info; /* Handle machine checks before interrupts are enabled */ - if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) - || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI - && is_machine_check(exit_intr_info))) + if (is_machine_check(exit_intr_info)) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ @@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; bool unblock_nmi; u8 vector; bool idtv_info_valid; @@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; if (cpu_has_virtual_nmis()) { + if (vmx->nmi_known_unmasked) + return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) vector != DF_VECTOR && !idtv_info_valid) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + else + vmx->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); } else if (unlikely(vmx->soft_vnmi_blocked)) vmx->vnmi_blocked_time += ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); @@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, * Clear bit "block by NMI" before VM entry if a NMI * delivery faulted. */ - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + vmx_set_nmi_mask(&vmx->vcpu, false); break; case INTR_TYPE_SOFT_EXCEPTION: vmx->vcpu.arch.event_exit_inst_len = @@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) + | (1 << VCPU_EXREG_RFLAGS) + | (1 << VCPU_EXREG_CPL) | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_SEGMENTS) | (1 << VCPU_EXREG_CR3)); vcpu->arch.regs_dirty = 0; @@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); @@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + err = -ENOMEM; if (!vmx->guest_msrs) { - err = -ENOMEM; goto uninit_vcpu; } @@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_vmcs; if (vm_need_virtualize_apic_accesses(kvm)) - if (alloc_apic_access_page(kvm) != 0) + err = alloc_apic_access_page(kvm); + if (err) goto free_vmcs; if (enable_ept) { @@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { } +static int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return X86EMUL_CONTINUE; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + .set_tsc_khz = vmx_set_tsc_khz, .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, + .compute_tsc_offset = vmx_compute_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, + + .check_intercept = vmx_check_intercept, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 934b4c6b0bf9..77c9d8673dc4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -60,22 +60,12 @@ #include <asm/div64.h> #define MAX_IO_MSRS 256 -#define CR0_RESERVED_BITS \ - (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ - | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ - | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR4_RESERVED_BITS \ - (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ - | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXSAVE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) - -#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) - #define KVM_MAX_MCE_BANKS 32 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) +#define emul_to_vcpu(ctxt) \ + container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) + /* EFER defaults: * - enable syscall per default because its emulated by KVM * - enable LME and LMA per default on 64 bit KVM @@ -100,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); int ignore_msrs = 0; module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); +bool kvm_has_tsc_control; +EXPORT_SYMBOL_GPL(kvm_has_tsc_control); +u32 kvm_max_guest_tsc_khz; +EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -157,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { u64 __read_mostly host_xcr0; +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); + static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -361,8 +358,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) void kvm_inject_nmi(struct kvm_vcpu *vcpu) { - kvm_make_request(KVM_REQ_NMI, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); + vcpu->arch.nmi_pending = 1; } EXPORT_SYMBOL_GPL(kvm_inject_nmi); @@ -982,7 +979,15 @@ static inline int kvm_tsc_changes_freq(void) return ret; } -static inline u64 nsec_to_cycles(u64 nsec) +static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.virtual_tsc_khz) + return vcpu->arch.virtual_tsc_khz; + else + return __this_cpu_read(cpu_tsc_khz); +} + +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) { u64 ret; @@ -990,25 +995,24 @@ static inline u64 nsec_to_cycles(u64 nsec) if (kvm_tsc_changes_freq()) printk_once(KERN_WARNING "kvm: unreliable cycle conversion on adjustable rate TSC\n"); - ret = nsec * __this_cpu_read(cpu_tsc_khz); + ret = nsec * vcpu_tsc_khz(vcpu); do_div(ret, USEC_PER_SEC); return ret; } -static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) +static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, - &kvm->arch.virtual_tsc_shift, - &kvm->arch.virtual_tsc_mult); - kvm->arch.virtual_tsc_khz = this_tsc_khz; + &vcpu->arch.tsc_catchup_shift, + &vcpu->arch.tsc_catchup_mult); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) { u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, - vcpu->kvm->arch.virtual_tsc_mult, - vcpu->kvm->arch.virtual_tsc_shift); + vcpu->arch.tsc_catchup_mult, + vcpu->arch.tsc_catchup_shift); tsc += vcpu->arch.last_tsc_write; return tsc; } @@ -1021,7 +1025,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) s64 sdiff; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = data - native_read_tsc(); + offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; sdiff = data - kvm->arch.last_tsc_write; @@ -1037,13 +1041,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) * In that case, for a reliable TSC, we can match TSC offsets, * or make a best guest using elapsed value. */ - if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && + if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && elapsed < 5ULL * NSEC_PER_SEC) { if (!check_tsc_unstable()) { offset = kvm->arch.last_tsc_offset; pr_debug("kvm: matched tsc offset for %llu\n", data); } else { - u64 delta = nsec_to_cycles(elapsed); + u64 delta = nsec_to_cycles(vcpu, elapsed); offset += delta; pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } @@ -1075,8 +1079,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); kernel_ns = get_kernel_ns(); - this_tsc_khz = __this_cpu_read(cpu_tsc_khz); - + this_tsc_khz = vcpu_tsc_khz(v); if (unlikely(this_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); @@ -1993,6 +1996,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: + case KVM_CAP_GET_TSC_KHZ: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2019,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_XCRS: r = cpu_has_xsave; break; + case KVM_CAP_TSC_CONTROL: + r = kvm_has_tsc_control; + break; default: r = 0; break; @@ -2120,8 +2127,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { /* Make sure TSC doesn't go backwards */ - s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : - native_read_tsc() - vcpu->arch.last_host_tsc; + s64 tsc_delta; + u64 tsc; + + kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); + tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : + tsc - vcpu->arch.last_guest_tsc; + if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { @@ -2139,7 +2151,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = native_read_tsc(); + kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); } static int is_efer_nx(void) @@ -2324,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0xC0000001.edx */ + const u32 kvm_supported_word5_x86_features = + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | + F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | + F(PMM) | F(PMM_EN); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); do_cpuid_1_ent(entry, function, index); @@ -2418,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | + (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); entry->ebx = 0; entry->ecx = 0; @@ -2432,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_supported_word6_x86_features; cpuid_mask(&entry->ecx, 6); break; + /*Add support for Centaur's CPUID instruction*/ + case 0xC0000000: + /*Just support up to 0xC0000004 now*/ + entry->eax = min(entry->eax, 0xC0000004); + break; + case 0xC0000001: + entry->edx &= kvm_supported_word5_x86_features; + cpuid_mask(&entry->edx, 5); + break; + case 0xC0000002: + case 0xC0000003: + case 0xC0000004: + /*Now nothing to do, reserved for the future*/ + break; } kvm_x86_ops->set_supported_cpuid(function, entry); @@ -2478,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, if (nent >= cpuid->nent) goto out_free; + /* Add support for Centaur's CPUID instruction. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { + do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + + limit = cpuid_entries[nent - 1].eax; + for (func = 0xC0000001; + func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + } + do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, cpuid->nent); @@ -3046,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); break; } + case KVM_SET_TSC_KHZ: { + u32 user_tsc_khz; + + r = -EINVAL; + if (!kvm_has_tsc_control) + break; + + user_tsc_khz = (u32)arg; + + if (user_tsc_khz >= kvm_max_guest_tsc_khz) + goto out; + + kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); + + r = 0; + goto out; + } + case KVM_GET_TSC_KHZ: { + r = -EIO; + if (check_tsc_unstable()) + goto out; + + r = vcpu_tsc_khz(vcpu); + + goto out; + } default: r = -EINVAL; } @@ -3595,20 +3674,43 @@ static void kvm_init_msr_list(void) static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + handled += n; + addr += n; + len -= n; + v += n; + } while (len); + + return handled; } static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) { - if (vcpu->arch.apic && - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) - return 0; + int handled = 0; + int n; + + do { + n = min(len, 8); + if (!(vcpu->arch.apic && + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + break; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + handled += n; + addr += n; + len -= n; + v += n; + } while (len); - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); + return handled; } static void kvm_set_segment(struct kvm_vcpu *vcpu, @@ -3703,37 +3805,43 @@ out: } /* used for instruction fetching */ -static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access | PFERR_FETCH_MASK, exception); } -static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, +static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); } -static int kvm_write_guest_virt_system(gva_t addr, void *val, +static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, + gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); void *data = val; int r = X86EMUL_CONTINUE; @@ -3761,13 +3869,15 @@ out: return r; } -static int emulator_read_emulated(unsigned long addr, +static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; + int handled; if (vcpu->mmio_read_completed) { memcpy(val, vcpu->mmio_data, bytes); @@ -3786,7 +3896,7 @@ static int emulator_read_emulated(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) + if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; @@ -3794,18 +3904,24 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); + handled = vcpu_mmio_read(vcpu, gpa, bytes, val); + + if (handled == bytes) return X86EMUL_CONTINUE; - } + + gpa += handled; + bytes -= handled; + val += handled; trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); vcpu->mmio_needed = 1; vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; + vcpu->mmio_index = 0; return X86EMUL_IO_NEEDED; } @@ -3829,6 +3945,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_vcpu *vcpu) { gpa_t gpa; + int handled; gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); @@ -3847,25 +3964,35 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) + handled = vcpu_mmio_write(vcpu, gpa, bytes, val); + if (handled == bytes) return X86EMUL_CONTINUE; + gpa += handled; + bytes -= handled; + val += handled; + vcpu->mmio_needed = 1; + memcpy(vcpu->mmio_data, val, bytes); vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->run->mmio.len = vcpu->mmio_size = bytes; + vcpu->mmio_size = bytes; + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; - memcpy(vcpu->run->mmio.data, val, bytes); + memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); + vcpu->mmio_index = 0; return X86EMUL_CONTINUE; } -int emulator_write_emulated(unsigned long addr, +int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { int rc, now; @@ -3893,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr, (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) #endif -static int emulator_cmpxchg_emulated(unsigned long addr, +static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_exception *exception, - struct kvm_vcpu *vcpu) + struct x86_exception *exception) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; struct page *page; char *kaddr; @@ -3955,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, emul_write: printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); - return emulator_write_emulated(addr, new, bytes, exception, vcpu); + return emulator_write_emulated(ctxt, addr, new, bytes, exception); } static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) @@ -3974,9 +4102,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) } -static int emulator_pio_in_emulated(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu) +static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + if (vcpu->arch.pio.count) goto data_avail; @@ -4004,10 +4135,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, return 0; } -static int emulator_pio_out_emulated(int size, unsigned short port, - const void *val, unsigned int count, - struct kvm_vcpu *vcpu) +static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, + const void *val, unsigned int count) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + trace_kvm_pio(1, port, size, count); vcpu->arch.pio.port = port; @@ -4037,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) return kvm_x86_ops->get_segment_base(vcpu, seg); } -int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) +static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) { - kvm_mmu_invlpg(vcpu, address); - return X86EMUL_CONTINUE; + kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) @@ -4062,22 +4194,20 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); -int emulate_clts(struct kvm_vcpu *vcpu) +static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) { - kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - kvm_x86_ops->fpu_activate(vcpu); - return X86EMUL_CONTINUE; + kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); } -int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) +int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { - return _kvm_get_dr(vcpu, dr, dest); + return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } -int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) +int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) { - return __kvm_set_dr(vcpu, dr, value); + return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); } static u64 mk_cr_64(u64 curr_cr, u32 new_val) @@ -4085,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val) return (curr_cr & ~((1ULL << 32) - 1)) | new_val; } -static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); unsigned long value; switch (cr) { @@ -4113,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) return value; } -static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) +static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int res = 0; switch (cr) { @@ -4141,33 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) return res; } -static int emulator_get_cpl(struct kvm_vcpu *vcpu) +static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) +{ + return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); +} + +static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) +{ + kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); +} + +static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - return kvm_x86_ops->get_cpl(vcpu); + kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); } -static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_gdt(vcpu, dt); + kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); } -static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) { - kvm_x86_ops->get_idt(vcpu, dt); + kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); } -static unsigned long emulator_get_cached_segment_base(int seg, - struct kvm_vcpu *vcpu) +static unsigned long emulator_get_cached_segment_base( + struct x86_emulate_ctxt *ctxt, int seg) { - return get_segment_base(vcpu, seg); + return get_segment_base(emul_to_vcpu(ctxt), seg); } -static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, - int seg, struct kvm_vcpu *vcpu) +static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, + int seg) { struct kvm_segment var; - kvm_get_segment(vcpu, &var, seg); + kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); + *selector = var.selector; if (var.unusable) return false; @@ -4192,14 +4336,14 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, return true; } -static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, - int seg, struct kvm_vcpu *vcpu) +static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, + int seg) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_segment var; - /* needed to preserve selector */ - kvm_get_segment(vcpu, &var, seg); - + var.selector = selector; var.base = get_desc_base(desc); #ifdef CONFIG_X86_64 var.base |= ((u64)base3) << 32; @@ -4223,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, return; } -static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) +static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) { - struct kvm_segment kvm_seg; + return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); +} - kvm_get_segment(vcpu, &kvm_seg, seg); - return kvm_seg.selector; +static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) +{ + return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); } -static void emulator_set_segment_selector(u16 sel, int seg, - struct kvm_vcpu *vcpu) +static void emulator_halt(struct x86_emulate_ctxt *ctxt) { - struct kvm_segment kvm_seg; + emul_to_vcpu(ctxt)->arch.halt_request = 1; +} - kvm_get_segment(vcpu, &kvm_seg, seg); - kvm_seg.selector = sel; - kvm_set_segment(vcpu, &kvm_seg, seg); +static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_disable(); + kvm_load_guest_fpu(emul_to_vcpu(ctxt)); + /* + * CR0.TS may reference the host fpu state, not the guest fpu state, + * so it may be clear at this point. + */ + clts(); +} + +static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) +{ + preempt_enable(); +} + +static int emulator_intercept(struct x86_emulate_ctxt *ctxt, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } static struct x86_emulate_ops emulate_ops = { @@ -4248,22 +4414,29 @@ static struct x86_emulate_ops emulate_ops = { .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, + .invlpg = emulator_invlpg, .pio_in_emulated = emulator_pio_in_emulated, .pio_out_emulated = emulator_pio_out_emulated, - .get_cached_descriptor = emulator_get_cached_descriptor, - .set_cached_descriptor = emulator_set_cached_descriptor, - .get_segment_selector = emulator_get_segment_selector, - .set_segment_selector = emulator_set_segment_selector, + .get_segment = emulator_get_segment, + .set_segment = emulator_set_segment, .get_cached_segment_base = emulator_get_cached_segment_base, .get_gdt = emulator_get_gdt, .get_idt = emulator_get_idt, + .set_gdt = emulator_set_gdt, + .set_idt = emulator_set_idt, .get_cr = emulator_get_cr, .set_cr = emulator_set_cr, .cpl = emulator_get_cpl, .get_dr = emulator_get_dr, .set_dr = emulator_set_dr, - .set_msr = kvm_set_msr, - .get_msr = kvm_get_msr, + .set_msr = emulator_set_msr, + .get_msr = emulator_get_msr, + .halt = emulator_halt, + .wbinvd = emulator_wbinvd, + .fix_hypercall = emulator_fix_hypercall, + .get_fpu = emulator_get_fpu, + .put_fpu = emulator_put_fpu, + .intercept = emulator_intercept, }; static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -4305,12 +4478,17 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int cs_db, cs_l; + /* + * TODO: fix emulate.c to use guest_read/write_register + * instead of direct ->regs accesses, can save hundred cycles + * on Intel for instructions that don't read/change RSP, for + * for example. + */ cache_all_regs(vcpu); kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); vcpu->arch.emulate_ctxt.mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : @@ -4318,11 +4496,13 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ? X86EMUL_MODE_VM86 : cs_l ? X86EMUL_MODE_PROT64 : cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); memset(c, 0, sizeof(struct decode_cache)); memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; int ret; @@ -4331,7 +4511,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.decode.op_bytes = 2; vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; - vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; + vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + + inc_eip; ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); if (ret != X86EMUL_CONTINUE) @@ -4340,7 +4521,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) vcpu->arch.emulate_ctxt.eip = c->eip; memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); if (irq == NMI_VECTOR) vcpu->arch.nmi_pending = false; @@ -4402,16 +4583,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, { int r; struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + bool writeback = true; kvm_clear_exception_queue(vcpu); - vcpu->arch.mmio_fault_cr2 = cr2; - /* - * TODO: fix emulate.c to use guest_read/write_register - * instead of direct ->regs accesses, can save hundred cycles - * on Intel for instructions that don't read/change RSP, for - * for example. - */ - cache_all_regs(vcpu); if (!(emulation_type & EMULTYPE_NO_DECODE)) { init_emulate_ctxt(vcpu); @@ -4442,13 +4616,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } - /* this is needed for vmware backdor interface to work since it + /* this is needed for vmware backdoor interface to work since it changes registers values during IO operation */ - memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); + } restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); + if (r == EMULATION_INTERCEPTED) + return EMULATE_DONE; + if (r == EMULATION_FAILED) { if (reexecute_instruction(vcpu, cr2)) return EMULATE_DONE; @@ -4462,21 +4642,28 @@ restart: } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; + else + writeback = false; r = EMULATE_DO_MMIO; } else if (vcpu->mmio_needed) { - if (vcpu->mmio_is_write) - vcpu->mmio_needed = 0; + if (!vcpu->mmio_is_write) + writeback = false; r = EMULATE_DO_MMIO; } else if (r == EMULATION_RESTART) goto restart; else r = EMULATE_DONE; - toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); - kvm_make_request(KVM_REQ_EVENT, vcpu); - memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); - kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + if (writeback) { + toggle_interruptibility(vcpu, + vcpu->arch.emulate_ctxt.interruptibility); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_make_request(KVM_REQ_EVENT, vcpu); + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + } else + vcpu->arch.emulate_regs_need_sync_to_vcpu = true; return r; } @@ -4485,7 +4672,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction); int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); - int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); + int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, + size, port, &val, 1); /* do not return to emulator after return from userspace */ vcpu->arch.pio.count = 0; return ret; @@ -4879,8 +5067,9 @@ out: } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); -int kvm_fix_hypercall(struct kvm_vcpu *vcpu) +int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); char instruction[3]; unsigned long rip = kvm_rip_read(vcpu); @@ -4893,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) kvm_x86_ops->patch_hypercall(vcpu, instruction); - return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); -} - -void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_gdt(vcpu, &dt); -} - -void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) -{ - struct desc_ptr dt = { limit, base }; - - kvm_x86_ops->set_idt(vcpu, &dt); + return emulator_write_emulated(&vcpu->arch.emulate_ctxt, + rip, instruction, 3, NULL); } static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) @@ -5170,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; + bool nmi_pending; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; @@ -5207,19 +5384,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 1; goto out; } - if (kvm_check_request(KVM_REQ_NMI, vcpu)) - vcpu->arch.nmi_pending = true; } r = kvm_mmu_reload(vcpu); if (unlikely(r)) goto out; + /* + * An NMI can be injected between local nmi_pending read and + * vcpu->arch.nmi_pending read inside inject_pending_event(). + * But in that case, KVM_REQ_EVENT will be set, which makes + * the race described above benign. + */ + nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); + if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ - if (vcpu->arch.nmi_pending) + if (nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); @@ -5399,6 +5582,41 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return r; } +static int complete_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int r; + + if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) + return 1; + + if (vcpu->mmio_needed) { + vcpu->mmio_needed = 0; + if (!vcpu->mmio_is_write) + memcpy(vcpu->mmio_data + vcpu->mmio_index, + run->mmio.data, 8); + vcpu->mmio_index += 8; + if (vcpu->mmio_index < vcpu->mmio_size) { + run->exit_reason = KVM_EXIT_MMIO; + run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; + memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); + run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); + run->mmio.is_write = vcpu->mmio_is_write; + vcpu->mmio_needed = 1; + return 0; + } + if (vcpu->mmio_is_write) + return 1; + vcpu->mmio_read_completed = 1; + } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (r != EMULATE_DONE) + return 0; + return 1; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -5425,20 +5643,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - if (vcpu->arch.pio.count || vcpu->mmio_needed) { - if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - } - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) { - r = 0; - goto out; - } - } + r = complete_mmio(vcpu); + if (r <= 0) + goto out; + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) kvm_register_write(vcpu, VCPU_REGS_RAX, kvm_run->hypercall.ret); @@ -5455,6 +5663,18 @@ out: int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { + /* + * We are here if userspace calls get_regs() in the middle of + * instruction emulation. Registers state needs to be copied + * back from emulation context to vcpu. Usrapace shouldn't do + * that usually, but some bad designed PV devices (vmware + * backdoor interface) need this to work + */ + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + } regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); @@ -5482,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; + kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); @@ -5592,7 +5815,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); return EMULATE_DONE; } @@ -5974,8 +6197,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } vcpu->arch.pio_data = page_address(page); - if (!kvm->arch.virtual_tsc_khz) - kvm_arch_set_tsc_khz(kvm, max_tsc_khz); + kvm_init_tsc_catchup(vcpu, max_tsc_khz); r = kvm_mmu_create(vcpu); if (r < 0) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c600da830ce0..e407ed3df817 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -77,7 +77,7 @@ static inline u32 bit(int bitno) void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 1cd608973ce5..e191c096ab90 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -7,7 +7,7 @@ * kernel and insert a module (lg.ko) which allows us to run other Linux * kernels the same way we'd run processes. We call the first kernel the Host, * and the others the Guests. The program which sets up and configures Guests - * (such as the example in Documentation/lguest/lguest.c) is called the + * (such as the example in Documentation/virtual/lguest/lguest.c) is called the * Launcher. * * Secondly, we only run specially modified Guests, not normal kernels: setting @@ -913,8 +913,6 @@ static struct clocksource lguest_clock = { .rating = 200, .read = lguest_clock_read, .mask = CLOCKSOURCE_MASK(64), - .mult = 1 << 22, - .shift = 22, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -997,7 +995,7 @@ static void lguest_time_init(void) /* Set up the timer interrupt (0) to go to our simple timer routine */ irq_set_handler(0, lguest_time_irq); - clocksource_register(&lguest_clock); + clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); /* We can't set cpumask in the initializer: damn C limitations! Set it * here and register our timer device. */ diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index aa4326bfb24a..f2145cfa12a6 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,6 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> +#include <asm/alternative-asm.h> /* * Zero a page. @@ -14,6 +15,15 @@ ENTRY(clear_page_c) CFI_ENDPROC ENDPROC(clear_page_c) +ENTRY(clear_page_c_e) + CFI_STARTPROC + movl $4096,%ecx + xorl %eax,%eax + rep stosb + ret + CFI_ENDPROC +ENDPROC(clear_page_c_e) + ENTRY(clear_page) CFI_STARTPROC xorl %eax,%eax @@ -38,21 +48,26 @@ ENTRY(clear_page) .Lclear_page_end: ENDPROC(clear_page) - /* Some CPUs run faster using the string instructions. - It is also a lot simpler. Use this when possible */ + /* + * Some CPUs support enhanced REP MOVSB/STOSB instructions. + * It is recommended to use this when possible. + * If enhanced REP MOVSB/STOSB is not available, try to use fast string. + * Otherwise, use original function. + * + */ #include <asm/cpufeature.h> .section .altinstr_replacement,"ax" 1: .byte 0xeb /* jmp <disp8> */ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ -2: +2: .byte 0xeb /* jmp <disp8> */ + .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ +3: .previous .section .altinstructions,"a" - .align 8 - .quad clear_page - .quad 1b - .word X86_FEATURE_REP_GOOD - .byte .Lclear_page_end - clear_page - .byte 2b - 1b + altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ + .Lclear_page_end-clear_page, 2b-1b + altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ + .Lclear_page_end-clear_page,3b-2b .previous diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 99e482615195..024840266ba0 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -15,23 +15,30 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/cpufeature.h> +#include <asm/alternative-asm.h> - .macro ALTERNATIVE_JUMP feature,orig,alt +/* + * By placing feature2 after feature1 in altinstructions section, we logically + * implement: + * If CPU has feature2, jmp to alt2 is used + * else if CPU has feature1, jmp to alt1 is used + * else jmp to orig is used. + */ + .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 0: .byte 0xe9 /* 32bit jump */ .long \orig-1f /* by default jump to orig */ 1: .section .altinstr_replacement,"ax" 2: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt-1b /* offset */ /* or alternatively to alt */ + .long \alt1-1b /* offset */ /* or alternatively to alt1 */ +3: .byte 0xe9 /* near jump with 32bit immediate */ + .long \alt2-1b /* offset */ /* or alternatively to alt2 */ .previous + .section .altinstructions,"a" - .align 8 - .quad 0b - .quad 2b - .word \feature /* when feature is set */ - .byte 5 - .byte 5 + altinstruction_entry 0b,2b,\feature1,5,5 + altinstruction_entry 0b,3b,\feature2,5,5 .previous .endm @@ -72,8 +79,10 @@ ENTRY(_copy_to_user) addq %rdx,%rcx jc bad_to_user cmpq TI_addr_limit(%rax),%rcx - jae bad_to_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + ja bad_to_user + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ + copy_user_generic_unrolled,copy_user_generic_string, \ + copy_user_enhanced_fast_string CFI_ENDPROC ENDPROC(_copy_to_user) @@ -85,8 +94,10 @@ ENTRY(_copy_from_user) addq %rdx,%rcx jc bad_from_user cmpq TI_addr_limit(%rax),%rcx - jae bad_from_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string + ja bad_from_user + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ + copy_user_generic_unrolled,copy_user_generic_string, \ + copy_user_enhanced_fast_string CFI_ENDPROC ENDPROC(_copy_from_user) @@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string) .previous CFI_ENDPROC ENDPROC(copy_user_generic_string) + +/* + * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. + * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. + * + * Input: + * rdi destination + * rsi source + * rdx count + * + * Output: + * eax uncopied bytes or 0 if successful. + */ +ENTRY(copy_user_enhanced_fast_string) + CFI_STARTPROC + andl %edx,%edx + jz 2f + movl %edx,%ecx +1: rep + movsb +2: xorl %eax,%eax + ret + + .section .fixup,"ax" +12: movl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + .section __ex_table,"a" + .align 8 + .quad 1b,12b + .previous + CFI_ENDPROC +ENDPROC(copy_user_enhanced_fast_string) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 75ef61e35e38..efbf2a0ecdea 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -4,6 +4,7 @@ #include <asm/cpufeature.h> #include <asm/dwarf2.h> +#include <asm/alternative-asm.h> /* * memcpy - Copy a memory block. @@ -37,6 +38,23 @@ .Lmemcpy_e: .previous +/* + * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than + * memcpy_c. Use memcpy_c_e when possible. + * + * This gets patched over the unrolled variant (below) via the + * alternative instructions framework: + */ + .section .altinstr_replacement, "ax", @progbits +.Lmemcpy_c_e: + movq %rdi, %rax + + movl %edx, %ecx + rep movsb + ret +.Lmemcpy_e_e: + .previous + ENTRY(__memcpy) ENTRY(memcpy) CFI_STARTPROC @@ -49,7 +67,7 @@ ENTRY(memcpy) jb .Lhandle_tail /* - * We check whether memory false dependece could occur, + * We check whether memory false dependence could occur, * then jump to corresponding copy mode. */ cmp %dil, %sil @@ -171,21 +189,22 @@ ENDPROC(memcpy) ENDPROC(__memcpy) /* - * Some CPUs run faster using the string copy instructions. - * It is also a lot simpler. Use this when possible: - */ - - .section .altinstructions, "a" - .align 8 - .quad memcpy - .quad .Lmemcpy_c - .word X86_FEATURE_REP_GOOD - - /* + * Some CPUs are adding enhanced REP MOVSB/STOSB feature + * If the feature is supported, memcpy_c_e() is the first choice. + * If enhanced rep movsb copy is not available, use fast string copy + * memcpy_c() when possible. This is faster and code is simpler than + * original memcpy(). + * Otherwise, original memcpy() is used. + * In .altinstructions section, ERMS feature is placed after REG_GOOD + * feature to implement the right patch order. + * * Replace only beginning, memcpy is used to apply alternatives, * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ - .byte .Lmemcpy_e - .Lmemcpy_c - .byte .Lmemcpy_e - .Lmemcpy_c + .section .altinstructions, "a" + altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ + .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c + altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ + .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e .previous diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0ecb8433e5a8..d0ec9c2936d7 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -8,6 +8,7 @@ #define _STRING_C #include <linux/linkage.h> #include <asm/dwarf2.h> +#include <asm/cpufeature.h> #undef memmove @@ -24,6 +25,7 @@ */ ENTRY(memmove) CFI_STARTPROC + /* Handle more 32bytes in loop */ mov %rdi, %rax cmp $0x20, %rdx @@ -31,8 +33,13 @@ ENTRY(memmove) /* Decide forward/backward copy mode */ cmp %rdi, %rsi - jb 2f + jge .Lmemmove_begin_forward + mov %rsi, %r8 + add %rdx, %r8 + cmp %rdi, %r8 + jg 2f +.Lmemmove_begin_forward: /* * movsq instruction have many startup latency * so we handle small size by general register. @@ -78,6 +85,8 @@ ENTRY(memmove) rep movsq movq %r11, (%r10) jmp 13f +.Lmemmove_end_forward: + /* * Handle data backward by movsq. */ @@ -194,4 +203,22 @@ ENTRY(memmove) 13: retq CFI_ENDPROC + + .section .altinstr_replacement,"ax" +.Lmemmove_begin_forward_efs: + /* Forward moving data. */ + movq %rdx, %rcx + rep movsb + retq +.Lmemmove_end_forward_efs: + .previous + + .section .altinstructions,"a" + .align 8 + .quad .Lmemmove_begin_forward + .quad .Lmemmove_begin_forward_efs + .word X86_FEATURE_ERMS + .byte .Lmemmove_end_forward-.Lmemmove_begin_forward + .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs + .previous ENDPROC(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 09d344269652..79bd454b78a3 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -2,9 +2,13 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> +#include <asm/cpufeature.h> +#include <asm/alternative-asm.h> /* - * ISO C memset - set a memory block to a byte value. + * ISO C memset - set a memory block to a byte value. This function uses fast + * string to get better performance than the original function. The code is + * simpler and shorter than the orignal function as well. * * rdi destination * rsi value (char) @@ -31,6 +35,28 @@ .Lmemset_e: .previous +/* + * ISO C memset - set a memory block to a byte value. This function uses + * enhanced rep stosb to override the fast string function. + * The code is simpler and shorter than the fast string function as well. + * + * rdi destination + * rsi value (char) + * rdx count (bytes) + * + * rax original destination + */ + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c_e: + movq %rdi,%r9 + movb %sil,%al + movl %edx,%ecx + rep stosb + movq %r9,%rax + ret +.Lmemset_e_e: + .previous + ENTRY(memset) ENTRY(__memset) CFI_STARTPROC @@ -112,16 +138,20 @@ ENTRY(__memset) ENDPROC(memset) ENDPROC(__memset) - /* Some CPUs run faster using the string instructions. - It is also a lot simpler. Use this when possible */ - -#include <asm/cpufeature.h> - + /* Some CPUs support enhanced REP MOVSB/STOSB feature. + * It is recommended to use this when possible. + * + * If enhanced REP MOVSB/STOSB feature is not available, use fast string + * instructions. + * + * Otherwise, use original memset function. + * + * In .altinstructions section, ERMS feature is placed after REG_GOOD + * feature to implement the right patch order. + */ .section .altinstructions,"a" - .align 8 - .quad memset - .quad .Lmemset_c - .word X86_FEATURE_REP_GOOD - .byte .Lfinal - memset - .byte .Lmemset_e - .Lmemset_c + altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ + .Lfinal-memset,.Lmemset_e-.Lmemset_c + altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ + .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e .previous diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3e608edf9958..3d11327c9ab4 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -23,8 +23,8 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o -obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o -obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o +obj-$(CONFIG_AMD_NUMA) += amdtopology.o +obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology.c index 0919c26820d4..5247d01329ca 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/nodemask.h> #include <linux/memblock.h> +#include <linux/bootmem.h> #include <asm/io.h> #include <linux/pci_ids.h> @@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void) int __init amd_numa_init(void) { - unsigned long start = PFN_PHYS(0); - unsigned long end = PFN_PHYS(max_pfn); + u64 start = PFN_PHYS(0); + u64 end = PFN_PHYS(max_pfn); unsigned numnodes; - unsigned long prevbase; + u64 prevbase; int i, j, nb; u32 nodeid, reg; unsigned int bits, cores, apicid_base; @@ -95,7 +96,7 @@ int __init amd_numa_init(void) prevbase = 0; for (i = 0; i < 8; i++) { - unsigned long base, limit; + u64 base, limit; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); @@ -107,18 +108,18 @@ int __init amd_numa_init(void) continue; } if (nodeid >= numnodes) { - pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, + pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid, base, limit); continue; } if (!limit) { - pr_info("Skipping node entry %d (base %lx)\n", + pr_info("Skipping node entry %d (base %Lx)\n", i, base); continue; } if ((base >> 8) & 3 || (limit >> 8) & 3) { - pr_err("Node %d using interleaving mode %lx/%lx\n", + pr_err("Node %d using interleaving mode %Lx/%Lx\n", nodeid, (base >> 8) & 3, (limit >> 8) & 3); return -EINVAL; } @@ -150,19 +151,19 @@ int __init amd_numa_init(void) continue; } if (limit < base) { - pr_err("Node %d bogus settings %lx-%lx.\n", + pr_err("Node %d bogus settings %Lx-%Lx.\n", nodeid, base, limit); continue; } /* Could sort here, but pun for now. Should not happen anyroads. */ if (prevbase > base) { - pr_err("Node map not sorted %lx,%lx\n", + pr_err("Node map not sorted %Lx,%Lx\n", prevbase, base); return -EINVAL; } - pr_info("Node %d MemBase %016lx Limit %016lx\n", + pr_info("Node %d MemBase %016Lx Limit %016Lx\n", nodeid, base, limit); prevbase = base; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 20e3f8702d1e..bcb394dfbb35 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -12,6 +12,7 @@ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ +#include <linux/prefetch.h> /* prefetchw */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 80088f994193..29f7c6d98179 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -678,8 +678,10 @@ static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); +#ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; +#endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; @@ -716,6 +718,7 @@ void __init paging_init(void) * NOTE: at this point the bootmem allocator is fully available. */ olpc_dt_build_devicetree(); + sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); zone_sizes_init(); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 794233587287..d865c4aeec55 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -616,7 +616,9 @@ void __init paging_init(void) unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); +#ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; +#endif max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; max_zone_pfns[ZONE_NORMAL] = max_pfn; @@ -679,14 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(arch_add_memory); -#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) -int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - #endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_vsyscall; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0369843511dc..be1ef574ce9a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -91,13 +91,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return (__force void __iomem *)phys_to_virt(phys_addr); /* - * Check if the request spans more than any BAR in the iomem resource - * tree. - */ - WARN_ONCE(iomem_map_sanity_check(phys_addr, size), - KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); - - /* * Don't allow anybody to remap normal RAM that we're using.. */ last_pfn = last_addr >> PAGE_SHIFT; @@ -170,6 +163,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, ret_addr = (void __iomem *) (vaddr + offset); mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); + /* + * Check if the request spans more than any BAR in the iomem resource + * tree. + */ + WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size), + KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); + return ret_addr; err_free_area: free_vm_area(area); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 745258dfc4dc..f5510d889a22 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -1,11 +1,39 @@ /* Common code for 32 and 64-bit NUMA */ -#include <linux/topology.h> -#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/init.h> #include <linux/bootmem.h> -#include <asm/numa.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/ctype.h> +#include <linux/module.h> +#include <linux/nodemask.h> +#include <linux/sched.h> +#include <linux/topology.h> + +#include <asm/e820.h> +#include <asm/proto.h> +#include <asm/dma.h> #include <asm/acpi.h> +#include <asm/amd_nb.h> + +#include "numa_internal.h" int __initdata numa_off; +nodemask_t numa_nodes_parsed __initdata; + +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); + +static struct numa_meminfo numa_meminfo +#ifndef CONFIG_MEMORY_HOTPLUG +__initdata +#endif +; + +static int numa_distance_cnt; +static u8 *numa_distance; static __init int numa_setup(char *opt) { @@ -32,6 +60,15 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; +int __cpuinit numa_cpu_node(int cpu) +{ + int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); + + if (apicid != BAD_APICID) + return __apicid_to_node[apicid]; + return NUMA_NO_NODE; +} + cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); @@ -95,6 +132,407 @@ void __init setup_node_to_cpumask_map(void) pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); } +static int __init numa_add_memblk_to(int nid, u64 start, u64 end, + struct numa_meminfo *mi) +{ + /* ignore zero length blks */ + if (start == end) + return 0; + + /* whine about and ignore invalid blks */ + if (start > end || nid < 0 || nid >= MAX_NUMNODES) { + pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", + nid, start, end); + return 0; + } + + if (mi->nr_blks >= NR_NODE_MEMBLKS) { + pr_err("NUMA: too many memblk ranges\n"); + return -EINVAL; + } + + mi->blk[mi->nr_blks].start = start; + mi->blk[mi->nr_blks].end = end; + mi->blk[mi->nr_blks].nid = nid; + mi->nr_blks++; + return 0; +} + +/** + * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo + * @idx: Index of memblk to remove + * @mi: numa_meminfo to remove memblk from + * + * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and + * decrementing @mi->nr_blks. + */ +void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) +{ + mi->nr_blks--; + memmove(&mi->blk[idx], &mi->blk[idx + 1], + (mi->nr_blks - idx) * sizeof(mi->blk[0])); +} + +/** + * numa_add_memblk - Add one numa_memblk to numa_meminfo + * @nid: NUMA node ID of the new memblk + * @start: Start address of the new memblk + * @end: End address of the new memblk + * + * Add a new memblk to the default numa_meminfo. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init numa_add_memblk(int nid, u64 start, u64 end) +{ + return numa_add_memblk_to(nid, start, end, &numa_meminfo); +} + +/* Initialize NODE_DATA for a node on the local memory */ +static void __init setup_node_data(int nid, u64 start, u64 end) +{ + const u64 nd_low = PFN_PHYS(MAX_DMA_PFN); + const u64 nd_high = PFN_PHYS(max_pfn_mapped); + const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); + bool remapped = false; + u64 nd_pa; + void *nd; + int tnid; + + /* + * Don't confuse VM with a node that doesn't have the + * minimum amount of memory: + */ + if (end && (end - start) < NODE_MIN_SIZE) + return; + + /* initialize remap allocator before aligning to ZONE_ALIGN */ + init_alloc_remap(nid, start, end); + + start = roundup(start, ZONE_ALIGN); + + printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", + nid, start, end); + + /* + * Allocate node data. Try remap allocator first, node-local + * memory and then any node. Never allocate in DMA zone. + */ + nd = alloc_remap(nid, nd_size); + if (nd) { + nd_pa = __pa(nd); + remapped = true; + } else { + nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, + nd_size, SMP_CACHE_BYTES); + if (nd_pa == MEMBLOCK_ERROR) + nd_pa = memblock_find_in_range(nd_low, nd_high, + nd_size, SMP_CACHE_BYTES); + if (nd_pa == MEMBLOCK_ERROR) { + pr_err("Cannot find %zu bytes in node %d\n", + nd_size, nid); + return; + } + memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA"); + nd = __va(nd_pa); + } + + /* report and initialize */ + printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", + nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); + if (!remapped && tnid != nid) + printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); + + node_data[nid] = nd; + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->node_id = nid; + NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; + NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; + + node_set_online(nid); +} + +/** + * numa_cleanup_meminfo - Cleanup a numa_meminfo + * @mi: numa_meminfo to clean up + * + * Sanitize @mi by merging and removing unncessary memblks. Also check for + * conflicts and clear unused memblks. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init numa_cleanup_meminfo(struct numa_meminfo *mi) +{ + const u64 low = 0; + const u64 high = PFN_PHYS(max_pfn); + int i, j, k; + + /* first, trim all entries */ + for (i = 0; i < mi->nr_blks; i++) { + struct numa_memblk *bi = &mi->blk[i]; + + /* make sure all blocks are inside the limits */ + bi->start = max(bi->start, low); + bi->end = min(bi->end, high); + + /* and there's no empty block */ + if (bi->start >= bi->end) + numa_remove_memblk_from(i--, mi); + } + + /* merge neighboring / overlapping entries */ + for (i = 0; i < mi->nr_blks; i++) { + struct numa_memblk *bi = &mi->blk[i]; + + for (j = i + 1; j < mi->nr_blks; j++) { + struct numa_memblk *bj = &mi->blk[j]; + u64 start, end; + + /* + * See whether there are overlapping blocks. Whine + * about but allow overlaps of the same nid. They + * will be merged below. + */ + if (bi->end > bj->start && bi->start < bj->end) { + if (bi->nid != bj->nid) { + pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", + bi->nid, bi->start, bi->end, + bj->nid, bj->start, bj->end); + return -EINVAL; + } + pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", + bi->nid, bi->start, bi->end, + bj->start, bj->end); + } + + /* + * Join together blocks on the same node, holes + * between which don't overlap with memory on other + * nodes. + */ + if (bi->nid != bj->nid) + continue; + start = min(bi->start, bj->start); + end = max(bi->end, bj->end); + for (k = 0; k < mi->nr_blks; k++) { + struct numa_memblk *bk = &mi->blk[k]; + + if (bi->nid == bk->nid) + continue; + if (start < bk->end && end > bk->start) + break; + } + if (k < mi->nr_blks) + continue; + printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", + bi->nid, bi->start, bi->end, bj->start, bj->end, + start, end); + bi->start = start; + bi->end = end; + numa_remove_memblk_from(j--, mi); + } + } + + /* clear unused ones */ + for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { + mi->blk[i].start = mi->blk[i].end = 0; + mi->blk[i].nid = NUMA_NO_NODE; + } + + return 0; +} + +/* + * Set nodes, which have memory in @mi, in *@nodemask. + */ +static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, + const struct numa_meminfo *mi) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mi->blk); i++) + if (mi->blk[i].start != mi->blk[i].end && + mi->blk[i].nid != NUMA_NO_NODE) + node_set(mi->blk[i].nid, *nodemask); +} + +/** + * numa_reset_distance - Reset NUMA distance table + * + * The current table is freed. The next numa_set_distance() call will + * create a new one. + */ +void __init numa_reset_distance(void) +{ + size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); + + /* numa_distance could be 1LU marking allocation failure, test cnt */ + if (numa_distance_cnt) + memblock_x86_free_range(__pa(numa_distance), + __pa(numa_distance) + size); + numa_distance_cnt = 0; + numa_distance = NULL; /* enable table creation */ +} + +static int __init numa_alloc_distance(void) +{ + nodemask_t nodes_parsed; + size_t size; + int i, j, cnt = 0; + u64 phys; + + /* size the new table and allocate it */ + nodes_parsed = numa_nodes_parsed; + numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); + + for_each_node_mask(i, nodes_parsed) + cnt = i; + cnt++; + size = cnt * cnt * sizeof(numa_distance[0]); + + phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), + size, PAGE_SIZE); + if (phys == MEMBLOCK_ERROR) { + pr_warning("NUMA: Warning: can't allocate distance table!\n"); + /* don't retry until explicitly reset */ + numa_distance = (void *)1LU; + return -ENOMEM; + } + memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); + + numa_distance = __va(phys); + numa_distance_cnt = cnt; + + /* fill with the default distances */ + for (i = 0; i < cnt; i++) + for (j = 0; j < cnt; j++) + numa_distance[i * cnt + j] = i == j ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); + + return 0; +} + +/** + * numa_set_distance - Set NUMA distance from one NUMA to another + * @from: the 'from' node to set distance + * @to: the 'to' node to set distance + * @distance: NUMA distance + * + * Set the distance from node @from to @to to @distance. If distance table + * doesn't exist, one which is large enough to accommodate all the currently + * known nodes will be created. + * + * If such table cannot be allocated, a warning is printed and further + * calls are ignored until the distance table is reset with + * numa_reset_distance(). + * + * If @from or @to is higher than the highest known node at the time of + * table creation or @distance doesn't make sense, the call is ignored. + * This is to allow simplification of specific NUMA config implementations. + */ +void __init numa_set_distance(int from, int to, int distance) +{ + if (!numa_distance && numa_alloc_distance() < 0) + return; + + if (from >= numa_distance_cnt || to >= numa_distance_cnt) { + printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + if ((u8)distance != distance || + (from == to && distance != LOCAL_DISTANCE)) { + pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + numa_distance[from * numa_distance_cnt + to] = distance; +} + +int __node_distance(int from, int to) +{ + if (from >= numa_distance_cnt || to >= numa_distance_cnt) + return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; + return numa_distance[from * numa_distance_cnt + to]; +} +EXPORT_SYMBOL(__node_distance); + +/* + * Sanity check to catch more bad NUMA configurations (they are amazingly + * common). Make sure the nodes cover all memory. + */ +static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) +{ + u64 numaram, e820ram; + int i; + + numaram = 0; + for (i = 0; i < mi->nr_blks; i++) { + u64 s = mi->blk[i].start >> PAGE_SHIFT; + u64 e = mi->blk[i].end >> PAGE_SHIFT; + numaram += e - s; + numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); + if ((s64)numaram < 0) + numaram = 0; + } + + e820ram = max_pfn - (memblock_x86_hole_size(0, + PFN_PHYS(max_pfn)) >> PAGE_SHIFT); + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ + if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { + printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", + (numaram << PAGE_SHIFT) >> 20, + (e820ram << PAGE_SHIFT) >> 20); + return false; + } + return true; +} + +static int __init numa_register_memblks(struct numa_meminfo *mi) +{ + int i, nid; + + /* Account for nodes with cpus and no memory */ + node_possible_map = numa_nodes_parsed; + numa_nodemask_from_meminfo(&node_possible_map, mi); + if (WARN_ON(nodes_empty(node_possible_map))) + return -EINVAL; + + for (i = 0; i < mi->nr_blks; i++) + memblock_x86_register_active_regions(mi->blk[i].nid, + mi->blk[i].start >> PAGE_SHIFT, + mi->blk[i].end >> PAGE_SHIFT); + + /* for out of order entries */ + sort_node_map(); + if (!numa_meminfo_cover_memory(mi)) + return -EINVAL; + + /* Finally register nodes. */ + for_each_node_mask(nid, node_possible_map) { + u64 start = PFN_PHYS(max_pfn); + u64 end = 0; + + for (i = 0; i < mi->nr_blks; i++) { + if (nid != mi->blk[i].nid) + continue; + start = min(mi->blk[i].start, start); + end = max(mi->blk[i].end, end); + } + + if (start < end) + setup_node_data(nid, start, end); + } + + return 0; +} + /* * There are unfortunately some poorly designed mainboards around that * only connect memory to a single CPU. This breaks the 1:1 cpu->node @@ -102,7 +540,7 @@ void __init setup_node_to_cpumask_map(void) * as the number of CPUs is not known yet. We round robin the existing * nodes. */ -void __init numa_init_array(void) +static void __init numa_init_array(void) { int rr, i; @@ -117,6 +555,95 @@ void __init numa_init_array(void) } } +static int __init numa_init(int (*init_func)(void)) +{ + int i; + int ret; + + for (i = 0; i < MAX_LOCAL_APIC; i++) + set_apicid_to_node(i, NUMA_NO_NODE); + + nodes_clear(numa_nodes_parsed); + nodes_clear(node_possible_map); + nodes_clear(node_online_map); + memset(&numa_meminfo, 0, sizeof(numa_meminfo)); + remove_all_active_ranges(); + numa_reset_distance(); + + ret = init_func(); + if (ret < 0) + return ret; + ret = numa_cleanup_meminfo(&numa_meminfo); + if (ret < 0) + return ret; + + numa_emulation(&numa_meminfo, numa_distance_cnt); + + ret = numa_register_memblks(&numa_meminfo); + if (ret < 0) + return ret; + + for (i = 0; i < nr_cpu_ids; i++) { + int nid = early_cpu_to_node(i); + + if (nid == NUMA_NO_NODE) + continue; + if (!node_online(nid)) + numa_clear_node(i); + } + numa_init_array(); + return 0; +} + +/** + * dummy_numa_init - Fallback dummy NUMA init + * + * Used if there's no underlying NUMA architecture, NUMA initialization + * fails, or NUMA is disabled on the command line. + * + * Must online at least one node and add memory blocks that cover all + * allowed memory. This function must not fail. + */ +static int __init dummy_numa_init(void) +{ + printk(KERN_INFO "%s\n", + numa_off ? "NUMA turned off" : "No NUMA configuration found"); + printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", + 0LLU, PFN_PHYS(max_pfn)); + + node_set(0, numa_nodes_parsed); + numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); + + return 0; +} + +/** + * x86_numa_init - Initialize NUMA + * + * Try each configured NUMA initialization method until one succeeds. The + * last fallback is dummy single node config encomapssing whole memory and + * never fails. + */ +void __init x86_numa_init(void) +{ + if (!numa_off) { +#ifdef CONFIG_X86_NUMAQ + if (!numa_init(numaq_numa_init)) + return; +#endif +#ifdef CONFIG_ACPI_NUMA + if (!numa_init(x86_acpi_numa_init)) + return; +#endif +#ifdef CONFIG_AMD_NUMA + if (!numa_init(amd_numa_init)) + return; +#endif + } + + numa_init(dummy_numa_init); +} + static __init int find_near_online_node(int node) { int n, val; @@ -282,3 +809,18 @@ const struct cpumask *cpumask_of_node(int node) EXPORT_SYMBOL(cpumask_of_node); #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +#ifdef CONFIG_MEMORY_HOTPLUG +int memory_add_physaddr_to_nid(u64 start) +{ + struct numa_meminfo *mi = &numa_meminfo; + int nid = mi->blk[0].nid; + int i; + + for (i = 0; i < mi->nr_blks; i++) + if (mi->blk[i].start <= start && mi->blk[i].end > start) + nid = mi->blk[i].nid; + return nid; +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index bde3906420df..849a975d3fa0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -22,39 +22,11 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/mm.h> #include <linux/bootmem.h> #include <linux/memblock.h> -#include <linux/mmzone.h> -#include <linux/highmem.h> -#include <linux/initrd.h> -#include <linux/nodemask.h> #include <linux/module.h> -#include <linux/kexec.h> -#include <linux/pfn.h> -#include <linux/swap.h> -#include <linux/acpi.h> - -#include <asm/e820.h> -#include <asm/setup.h> -#include <asm/mmzone.h> -#include <asm/bios_ebda.h> -#include <asm/proto.h> - -struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; -EXPORT_SYMBOL(node_data); - -/* - * numa interface - we expect the numa architecture specific code to have - * populated the following initialisation. - * - * 1) node_online_map - the map of all nodes configured (online) in the system - * 2) node_start_pfn - the starting page frame number for a node - * 3) node_end_pfn - the ending page fram number for a node - */ -unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; -unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; +#include "numa_internal.h" #ifdef CONFIG_DISCONTIGMEM /* @@ -99,108 +71,46 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, } #endif -extern unsigned long find_max_low_pfn(void); extern unsigned long highend_pfn, highstart_pfn; #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) -unsigned long node_remap_size[MAX_NUMNODES]; static void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -static unsigned long kva_start_pfn; -static unsigned long kva_pages; - -int __cpuinit numa_cpu_node(int cpu) -{ - return apic->x86_32_numa_cpu_node(cpu); -} - -/* - * FLAT - support for basic PC memory model with discontig enabled, essentially - * a single node with all available processors in it with a flat - * memory map. - */ -int __init get_memcfg_numa_flat(void) -{ - printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); - - node_start_pfn[0] = 0; - node_end_pfn[0] = max_pfn; - memblock_x86_register_active_regions(0, 0, max_pfn); - memory_present(0, 0, max_pfn); - node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); - - /* Indicate there is one node available. */ - nodes_clear(node_online_map); - node_set_online(0); - return 1; -} - -/* - * Find the highest page frame number we have available for the node - */ -static void __init propagate_e820_map_node(int nid) -{ - if (node_end_pfn[nid] > max_pfn) - node_end_pfn[nid] = max_pfn; - /* - * if a user has given mem=XXXX, then we need to make sure - * that the node _starts_ before that, too, not just ends - */ - if (node_start_pfn[nid] > max_pfn) - node_start_pfn[nid] = max_pfn; - BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); -} - -/* - * Allocate memory for the pg_data_t for this node via a crude pre-bootmem - * method. For node zero take this from the bottom of memory, for - * subsequent nodes place them at node_remap_start_vaddr which contains - * node local data in physically node local memory. See setup_memory() - * for details. - */ -static void __init allocate_pgdat(int nid) -{ - char buf[16]; - - if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) - NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; - else { - unsigned long pgdat_phys; - pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, - max_pfn_mapped<<PAGE_SHIFT, - sizeof(pg_data_t), - PAGE_SIZE); - NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); - memset(buf, 0, sizeof(buf)); - sprintf(buf, "NODE_DATA %d", nid); - memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); - } - printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", - nid, (unsigned long)NODE_DATA(nid)); -} - /* - * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel - * virtual address space (KVA) is reserved and portions of nodes are mapped - * using it. This is to allow node-local memory to be allocated for - * structures that would normally require ZONE_NORMAL. The memory is - * allocated with alloc_remap() and callers should be prepared to allocate - * from the bootmem allocator instead. + * Remap memory allocator */ static unsigned long node_remap_start_pfn[MAX_NUMNODES]; static void *node_remap_end_vaddr[MAX_NUMNODES]; static void *node_remap_alloc_vaddr[MAX_NUMNODES]; -static unsigned long node_remap_offset[MAX_NUMNODES]; +/** + * alloc_remap - Allocate remapped memory + * @nid: NUMA node to allocate memory from + * @size: The size of allocation + * + * Allocate @size bytes from the remap area of NUMA node @nid. The + * size of the remap area is predetermined by init_alloc_remap() and + * only the callers considered there should call this function. For + * more info, please read the comment on top of init_alloc_remap(). + * + * The caller must be ready to handle allocation failure from this + * function and fall back to regular memory allocator in such cases. + * + * CONTEXT: + * Single CPU early boot context. + * + * RETURNS: + * Pointer to the allocated memory on success, %NULL on failure. + */ void *alloc_remap(int nid, unsigned long size) { void *allocation = node_remap_alloc_vaddr[nid]; size = ALIGN(size, L1_CACHE_BYTES); - if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) return NULL; node_remap_alloc_vaddr[nid] += size; @@ -209,26 +119,6 @@ void *alloc_remap(int nid, unsigned long size) return allocation; } -static void __init remap_numa_kva(void) -{ - void *vaddr; - unsigned long pfn; - int node; - - for_each_online_node(node) { - printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); - for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { - vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); - printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", - (unsigned long)vaddr, - node_remap_start_pfn[node] + pfn); - set_pmd_pfn((ulong) vaddr, - node_remap_start_pfn[node] + pfn, - PAGE_KERNEL_LARGE); - } - } -} - #ifdef CONFIG_HIBERNATION /** * resume_map_numa_kva - add KVA mapping to the temporary page tables created @@ -240,15 +130,16 @@ void resume_map_numa_kva(pgd_t *pgd_base) int node; for_each_online_node(node) { - unsigned long start_va, start_pfn, size, pfn; + unsigned long start_va, start_pfn, nr_pages, pfn; start_va = (unsigned long)node_remap_start_vaddr[node]; start_pfn = node_remap_start_pfn[node]; - size = node_remap_size[node]; + nr_pages = (node_remap_end_vaddr[node] - + node_remap_start_vaddr[node]) >> PAGE_SHIFT; printk(KERN_DEBUG "%s: node %d\n", __func__, node); - for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { + for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); pgd_t *pgd = pgd_base + pgd_index(vaddr); pud_t *pud = pud_offset(pgd, vaddr); @@ -264,132 +155,89 @@ void resume_map_numa_kva(pgd_t *pgd_base) } #endif -static __init unsigned long calculate_numa_remap_pages(void) +/** + * init_alloc_remap - Initialize remap allocator for a NUMA node + * @nid: NUMA node to initizlie remap allocator for + * + * NUMA nodes may end up without any lowmem. As allocating pgdat and + * memmap on a different node with lowmem is inefficient, a special + * remap allocator is implemented which can be used by alloc_remap(). + * + * For each node, the amount of memory which will be necessary for + * pgdat and memmap is calculated and two memory areas of the size are + * allocated - one in the node and the other in lowmem; then, the area + * in the node is remapped to the lowmem area. + * + * As pgdat and memmap must be allocated in lowmem anyway, this + * doesn't waste lowmem address space; however, the actual lowmem + * which gets remapped over is wasted. The amount shouldn't be + * problematic on machines this feature will be used. + * + * Initialization failure isn't fatal. alloc_remap() is used + * opportunistically and the callers will fall back to other memory + * allocation mechanisms on failure. + */ +void __init init_alloc_remap(int nid, u64 start, u64 end) { - int nid; - unsigned long size, reserve_pages = 0; - - for_each_online_node(nid) { - u64 node_kva_target; - u64 node_kva_final; - - /* - * The acpi/srat node info can show hot-add memroy zones - * where memory could be added but not currently present. - */ - printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", - nid, node_start_pfn[nid], node_end_pfn[nid]); - if (node_start_pfn[nid] > max_pfn) - continue; - if (!node_end_pfn[nid]) - continue; - if (node_end_pfn[nid] > max_pfn) - node_end_pfn[nid] = max_pfn; - - /* ensure the remap includes space for the pgdat. */ - size = node_remap_size[nid] + sizeof(pg_data_t); - - /* convert size to large (pmd size) pages, rounding up */ - size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; - /* now the roundup is correct, convert to PAGE_SIZE pages */ - size = size * PTRS_PER_PTE; - - node_kva_target = round_down(node_end_pfn[nid] - size, - PTRS_PER_PTE); - node_kva_target <<= PAGE_SHIFT; - do { - node_kva_final = memblock_find_in_range(node_kva_target, - ((u64)node_end_pfn[nid])<<PAGE_SHIFT, - ((u64)size)<<PAGE_SHIFT, - LARGE_PAGE_BYTES); - node_kva_target -= LARGE_PAGE_BYTES; - } while (node_kva_final == MEMBLOCK_ERROR && - (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); - - if (node_kva_final == MEMBLOCK_ERROR) - panic("Can not get kva ram\n"); - - node_remap_size[nid] = size; - node_remap_offset[nid] = reserve_pages; - reserve_pages += size; - printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" - " node %d at %llx\n", - size, nid, node_kva_final>>PAGE_SHIFT); - - /* - * prevent kva address below max_low_pfn want it on system - * with less memory later. - * layout will be: KVA address , KVA RAM - * - * we are supposed to only record the one less then max_low_pfn - * but we could have some hole in high memory, and it will only - * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide - * to use it as free. - * So memblock_x86_reserve_range here, hope we don't run out of that array - */ - memblock_x86_reserve_range(node_kva_final, - node_kva_final+(((u64)size)<<PAGE_SHIFT), - "KVA RAM"); - - node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; - } - printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", - reserve_pages); - return reserve_pages; -} + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long end_pfn = end >> PAGE_SHIFT; + unsigned long size, pfn; + u64 node_pa, remap_pa; + void *remap_va; -static void init_remap_allocator(int nid) -{ - node_remap_start_vaddr[nid] = pfn_to_kaddr( - kva_start_pfn + node_remap_offset[nid]); - node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + - (node_remap_size[nid] * PAGE_SIZE); - node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + - ALIGN(sizeof(pg_data_t), PAGE_SIZE); - - printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, - (ulong) node_remap_start_vaddr[nid], - (ulong) node_remap_end_vaddr[nid]); + /* + * The acpi/srat node info can show hot-add memroy zones where + * memory could be added but not currently present. + */ + printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", + nid, start_pfn, end_pfn); + + /* calculate the necessary space aligned to large page size */ + size = node_memmap_size_bytes(nid, start_pfn, end_pfn); + size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); + size = ALIGN(size, LARGE_PAGE_BYTES); + + /* allocate node memory and the lowmem remap area */ + node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); + if (node_pa == MEMBLOCK_ERROR) { + pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", + size, nid); + return; + } + memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); + + remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, + max_low_pfn << PAGE_SHIFT, + size, LARGE_PAGE_BYTES); + if (remap_pa == MEMBLOCK_ERROR) { + pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", + size, nid); + memblock_x86_free_range(node_pa, node_pa + size); + return; + } + memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); + remap_va = phys_to_virt(remap_pa); + + /* perform actual remap */ + for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) + set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), + (node_pa >> PAGE_SHIFT) + pfn, + PAGE_KERNEL_LARGE); + + /* initialize remap allocator parameters */ + node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; + node_remap_start_vaddr[nid] = remap_va; + node_remap_end_vaddr[nid] = remap_va + size; + node_remap_alloc_vaddr[nid] = remap_va; + + printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", + nid, node_pa, node_pa + size, remap_va, remap_va + size); } void __init initmem_init(void) { - int nid; - long kva_target_pfn; - - /* - * When mapping a NUMA machine we allocate the node_mem_map arrays - * from node local memory. They are then mapped directly into KVA - * between zone normal and vmalloc space. Calculate the size of - * this space and use it to adjust the boundary between ZONE_NORMAL - * and ZONE_HIGHMEM. - */ - - get_memcfg_numa(); - numa_init_array(); - - kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); + x86_numa_init(); - kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); - do { - kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT, - max_low_pfn<<PAGE_SHIFT, - kva_pages<<PAGE_SHIFT, - PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; - kva_target_pfn -= PTRS_PER_PTE; - } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn); - - if (kva_start_pfn == MEMBLOCK_ERROR) - panic("Can not get kva space\n"); - - printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", - kva_start_pfn, max_low_pfn); - printk(KERN_INFO "max_pfn = %lx\n", max_pfn); - - /* avoid clash with initrd */ - memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT, - (kva_start_pfn + kva_pages)<<PAGE_SHIFT, - "KVA PG"); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > max_low_pfn) @@ -409,51 +257,9 @@ void __init initmem_init(void) printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", (ulong) pfn_to_kaddr(max_low_pfn)); - for_each_online_node(nid) { - init_remap_allocator(nid); - - allocate_pgdat(nid); - } - remap_numa_kva(); printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); - for_each_online_node(nid) - propagate_e820_map_node(nid); - - for_each_online_node(nid) { - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->node_id = nid; - } setup_bootmem_allocator(); } - -#ifdef CONFIG_MEMORY_HOTPLUG -static int paddr_to_nid(u64 addr) -{ - int nid; - unsigned long pfn = PFN_DOWN(addr); - - for_each_node(nid) - if (node_start_pfn[nid] <= pfn && - pfn < node_end_pfn[nid]) - return nid; - - return -1; -} - -/* - * This function is used to ask node id BEFORE memmap and mem_section's - * initialization (pfn_to_nid() can't be used yet). - * If _PXM is not defined on ACPI's DSDT, node id must be found by this. - */ -int memory_add_physaddr_to_nid(u64 addr) -{ - int nid = paddr_to_nid(addr); - return (nid >= 0) ? nid : 0; -} - -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 85b52fc03084..dd27f401f0a0 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -2,646 +2,13 @@ * Generic VM initialization for x86-64 NUMA setups. * Copyright 2002,2003 Andi Kleen, SuSE Labs. */ -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/init.h> #include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/mmzone.h> -#include <linux/ctype.h> -#include <linux/module.h> -#include <linux/nodemask.h> -#include <linux/sched.h> -#include <linux/acpi.h> - -#include <asm/e820.h> -#include <asm/proto.h> -#include <asm/dma.h> -#include <asm/acpi.h> -#include <asm/amd_nb.h> #include "numa_internal.h" -struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; -EXPORT_SYMBOL(node_data); - -nodemask_t numa_nodes_parsed __initdata; - -struct memnode memnode; - -static unsigned long __initdata nodemap_addr; -static unsigned long __initdata nodemap_size; - -static struct numa_meminfo numa_meminfo __initdata; - -static int numa_distance_cnt; -static u8 *numa_distance; - -/* - * Given a shift value, try to populate memnodemap[] - * Returns : - * 1 if OK - * 0 if memnodmap[] too small (of shift too small) - * -1 if node overlap or lost ram (shift too big) - */ -static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift) -{ - unsigned long addr, end; - int i, res = -1; - - memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); - for (i = 0; i < mi->nr_blks; i++) { - addr = mi->blk[i].start; - end = mi->blk[i].end; - if (addr >= end) - continue; - if ((end >> shift) >= memnodemapsize) - return 0; - do { - if (memnodemap[addr >> shift] != NUMA_NO_NODE) - return -1; - memnodemap[addr >> shift] = mi->blk[i].nid; - addr += (1UL << shift); - } while (addr < end); - res = 1; - } - return res; -} - -static int __init allocate_cachealigned_memnodemap(void) -{ - unsigned long addr; - - memnodemap = memnode.embedded_map; - if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map)) - return 0; - - addr = 0x8000; - nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); - nodemap_addr = memblock_find_in_range(addr, get_max_mapped(), - nodemap_size, L1_CACHE_BYTES); - if (nodemap_addr == MEMBLOCK_ERROR) { - printk(KERN_ERR - "NUMA: Unable to allocate Memory to Node hash map\n"); - nodemap_addr = nodemap_size = 0; - return -1; - } - memnodemap = phys_to_virt(nodemap_addr); - memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP"); - - printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", - nodemap_addr, nodemap_addr + nodemap_size); - return 0; -} - -/* - * The LSB of all start and end addresses in the node map is the value of the - * maximum possible shift. - */ -static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi) -{ - int i, nodes_used = 0; - unsigned long start, end; - unsigned long bitfield = 0, memtop = 0; - - for (i = 0; i < mi->nr_blks; i++) { - start = mi->blk[i].start; - end = mi->blk[i].end; - if (start >= end) - continue; - bitfield |= start; - nodes_used++; - if (end > memtop) - memtop = end; - } - if (nodes_used <= 1) - i = 63; - else - i = find_first_bit(&bitfield, sizeof(unsigned long)*8); - memnodemapsize = (memtop >> i)+1; - return i; -} - -static int __init compute_hash_shift(const struct numa_meminfo *mi) -{ - int shift; - - shift = extract_lsb_from_nodes(mi); - if (allocate_cachealigned_memnodemap()) - return -1; - printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", - shift); - - if (populate_memnodemap(mi, shift) != 1) { - printk(KERN_INFO "Your memory is not aligned you need to " - "rebuild your kernel with a bigger NODEMAPSIZE " - "shift=%d\n", shift); - return -1; - } - return shift; -} - -int __meminit __early_pfn_to_nid(unsigned long pfn) -{ - return phys_to_nid(pfn << PAGE_SHIFT); -} - -static void * __init early_node_mem(int nodeid, unsigned long start, - unsigned long end, unsigned long size, - unsigned long align) -{ - unsigned long mem; - - /* - * put it on high as possible - * something will go with NODE_DATA - */ - if (start < (MAX_DMA_PFN<<PAGE_SHIFT)) - start = MAX_DMA_PFN<<PAGE_SHIFT; - if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && - end > (MAX_DMA32_PFN<<PAGE_SHIFT)) - start = MAX_DMA32_PFN<<PAGE_SHIFT; - mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align); - if (mem != MEMBLOCK_ERROR) - return __va(mem); - - /* extend the search scope */ - end = max_pfn_mapped << PAGE_SHIFT; - start = MAX_DMA_PFN << PAGE_SHIFT; - mem = memblock_find_in_range(start, end, size, align); - if (mem != MEMBLOCK_ERROR) - return __va(mem); - - printk(KERN_ERR "Cannot find %lu bytes in node %d\n", - size, nodeid); - - return NULL; -} - -static int __init numa_add_memblk_to(int nid, u64 start, u64 end, - struct numa_meminfo *mi) -{ - /* ignore zero length blks */ - if (start == end) - return 0; - - /* whine about and ignore invalid blks */ - if (start > end || nid < 0 || nid >= MAX_NUMNODES) { - pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", - nid, start, end); - return 0; - } - - if (mi->nr_blks >= NR_NODE_MEMBLKS) { - pr_err("NUMA: too many memblk ranges\n"); - return -EINVAL; - } - - mi->blk[mi->nr_blks].start = start; - mi->blk[mi->nr_blks].end = end; - mi->blk[mi->nr_blks].nid = nid; - mi->nr_blks++; - return 0; -} - -/** - * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo - * @idx: Index of memblk to remove - * @mi: numa_meminfo to remove memblk from - * - * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and - * decrementing @mi->nr_blks. - */ -void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) -{ - mi->nr_blks--; - memmove(&mi->blk[idx], &mi->blk[idx + 1], - (mi->nr_blks - idx) * sizeof(mi->blk[0])); -} - -/** - * numa_add_memblk - Add one numa_memblk to numa_meminfo - * @nid: NUMA node ID of the new memblk - * @start: Start address of the new memblk - * @end: End address of the new memblk - * - * Add a new memblk to the default numa_meminfo. - * - * RETURNS: - * 0 on success, -errno on failure. - */ -int __init numa_add_memblk(int nid, u64 start, u64 end) -{ - return numa_add_memblk_to(nid, start, end, &numa_meminfo); -} - -/* Initialize bootmem allocator for a node */ -void __init -setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) -{ - unsigned long start_pfn, last_pfn, nodedata_phys; - const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); - int nid; - - if (!end) - return; - - /* - * Don't confuse VM with a node that doesn't have the - * minimum amount of memory: - */ - if (end && (end - start) < NODE_MIN_SIZE) - return; - - start = roundup(start, ZONE_ALIGN); - - printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, - start, end); - - start_pfn = start >> PAGE_SHIFT; - last_pfn = end >> PAGE_SHIFT; - - node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, - SMP_CACHE_BYTES); - if (node_data[nodeid] == NULL) - return; - nodedata_phys = __pa(node_data[nodeid]); - memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); - printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, - nodedata_phys + pgdat_size - 1); - nid = phys_to_nid(nodedata_phys); - if (nid != nodeid) - printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); - - memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); - NODE_DATA(nodeid)->node_id = nodeid; - NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; - - node_set_online(nodeid); -} - -/** - * numa_cleanup_meminfo - Cleanup a numa_meminfo - * @mi: numa_meminfo to clean up - * - * Sanitize @mi by merging and removing unncessary memblks. Also check for - * conflicts and clear unused memblks. - * - * RETURNS: - * 0 on success, -errno on failure. - */ -int __init numa_cleanup_meminfo(struct numa_meminfo *mi) -{ - const u64 low = 0; - const u64 high = (u64)max_pfn << PAGE_SHIFT; - int i, j, k; - - for (i = 0; i < mi->nr_blks; i++) { - struct numa_memblk *bi = &mi->blk[i]; - - /* make sure all blocks are inside the limits */ - bi->start = max(bi->start, low); - bi->end = min(bi->end, high); - - /* and there's no empty block */ - if (bi->start >= bi->end) { - numa_remove_memblk_from(i--, mi); - continue; - } - - for (j = i + 1; j < mi->nr_blks; j++) { - struct numa_memblk *bj = &mi->blk[j]; - unsigned long start, end; - - /* - * See whether there are overlapping blocks. Whine - * about but allow overlaps of the same nid. They - * will be merged below. - */ - if (bi->end > bj->start && bi->start < bj->end) { - if (bi->nid != bj->nid) { - pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", - bi->nid, bi->start, bi->end, - bj->nid, bj->start, bj->end); - return -EINVAL; - } - pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", - bi->nid, bi->start, bi->end, - bj->start, bj->end); - } - - /* - * Join together blocks on the same node, holes - * between which don't overlap with memory on other - * nodes. - */ - if (bi->nid != bj->nid) - continue; - start = max(min(bi->start, bj->start), low); - end = min(max(bi->end, bj->end), high); - for (k = 0; k < mi->nr_blks; k++) { - struct numa_memblk *bk = &mi->blk[k]; - - if (bi->nid == bk->nid) - continue; - if (start < bk->end && end > bk->start) - break; - } - if (k < mi->nr_blks) - continue; - printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", - bi->nid, bi->start, bi->end, bj->start, bj->end, - start, end); - bi->start = start; - bi->end = end; - numa_remove_memblk_from(j--, mi); - } - } - - for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { - mi->blk[i].start = mi->blk[i].end = 0; - mi->blk[i].nid = NUMA_NO_NODE; - } - - return 0; -} - -/* - * Set nodes, which have memory in @mi, in *@nodemask. - */ -static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, - const struct numa_meminfo *mi) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mi->blk); i++) - if (mi->blk[i].start != mi->blk[i].end && - mi->blk[i].nid != NUMA_NO_NODE) - node_set(mi->blk[i].nid, *nodemask); -} - -/** - * numa_reset_distance - Reset NUMA distance table - * - * The current table is freed. The next numa_set_distance() call will - * create a new one. - */ -void __init numa_reset_distance(void) -{ - size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); - - /* numa_distance could be 1LU marking allocation failure, test cnt */ - if (numa_distance_cnt) - memblock_x86_free_range(__pa(numa_distance), - __pa(numa_distance) + size); - numa_distance_cnt = 0; - numa_distance = NULL; /* enable table creation */ -} - -static int __init numa_alloc_distance(void) -{ - nodemask_t nodes_parsed; - size_t size; - int i, j, cnt = 0; - u64 phys; - - /* size the new table and allocate it */ - nodes_parsed = numa_nodes_parsed; - numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); - - for_each_node_mask(i, nodes_parsed) - cnt = i; - cnt++; - size = cnt * cnt * sizeof(numa_distance[0]); - - phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT, - size, PAGE_SIZE); - if (phys == MEMBLOCK_ERROR) { - pr_warning("NUMA: Warning: can't allocate distance table!\n"); - /* don't retry until explicitly reset */ - numa_distance = (void *)1LU; - return -ENOMEM; - } - memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); - - numa_distance = __va(phys); - numa_distance_cnt = cnt; - - /* fill with the default distances */ - for (i = 0; i < cnt; i++) - for (j = 0; j < cnt; j++) - numa_distance[i * cnt + j] = i == j ? - LOCAL_DISTANCE : REMOTE_DISTANCE; - printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); - - return 0; -} - -/** - * numa_set_distance - Set NUMA distance from one NUMA to another - * @from: the 'from' node to set distance - * @to: the 'to' node to set distance - * @distance: NUMA distance - * - * Set the distance from node @from to @to to @distance. If distance table - * doesn't exist, one which is large enough to accommodate all the currently - * known nodes will be created. - * - * If such table cannot be allocated, a warning is printed and further - * calls are ignored until the distance table is reset with - * numa_reset_distance(). - * - * If @from or @to is higher than the highest known node at the time of - * table creation or @distance doesn't make sense, the call is ignored. - * This is to allow simplification of specific NUMA config implementations. - */ -void __init numa_set_distance(int from, int to, int distance) -{ - if (!numa_distance && numa_alloc_distance() < 0) - return; - - if (from >= numa_distance_cnt || to >= numa_distance_cnt) { - printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", - from, to, distance); - return; - } - - if ((u8)distance != distance || - (from == to && distance != LOCAL_DISTANCE)) { - pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", - from, to, distance); - return; - } - - numa_distance[from * numa_distance_cnt + to] = distance; -} - -int __node_distance(int from, int to) -{ - if (from >= numa_distance_cnt || to >= numa_distance_cnt) - return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; - return numa_distance[from * numa_distance_cnt + to]; -} -EXPORT_SYMBOL(__node_distance); - -/* - * Sanity check to catch more bad NUMA configurations (they are amazingly - * common). Make sure the nodes cover all memory. - */ -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) -{ - unsigned long numaram, e820ram; - int i; - - numaram = 0; - for (i = 0; i < mi->nr_blks; i++) { - unsigned long s = mi->blk[i].start >> PAGE_SHIFT; - unsigned long e = mi->blk[i].end >> PAGE_SHIFT; - numaram += e - s; - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); - if ((long)numaram < 0) - numaram = 0; - } - - e820ram = max_pfn - (memblock_x86_hole_size(0, - max_pfn << PAGE_SHIFT) >> PAGE_SHIFT); - /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ - if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { - printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", - (numaram << PAGE_SHIFT) >> 20, - (e820ram << PAGE_SHIFT) >> 20); - return false; - } - return true; -} - -static int __init numa_register_memblks(struct numa_meminfo *mi) -{ - int i, nid; - - /* Account for nodes with cpus and no memory */ - node_possible_map = numa_nodes_parsed; - numa_nodemask_from_meminfo(&node_possible_map, mi); - if (WARN_ON(nodes_empty(node_possible_map))) - return -EINVAL; - - memnode_shift = compute_hash_shift(mi); - if (memnode_shift < 0) { - printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n"); - return -EINVAL; - } - - for (i = 0; i < mi->nr_blks; i++) - memblock_x86_register_active_regions(mi->blk[i].nid, - mi->blk[i].start >> PAGE_SHIFT, - mi->blk[i].end >> PAGE_SHIFT); - - /* for out of order entries */ - sort_node_map(); - if (!numa_meminfo_cover_memory(mi)) - return -EINVAL; - - /* Finally register nodes. */ - for_each_node_mask(nid, node_possible_map) { - u64 start = (u64)max_pfn << PAGE_SHIFT; - u64 end = 0; - - for (i = 0; i < mi->nr_blks; i++) { - if (nid != mi->blk[i].nid) - continue; - start = min(mi->blk[i].start, start); - end = max(mi->blk[i].end, end); - } - - if (start < end) - setup_node_bootmem(nid, start, end); - } - - return 0; -} - -/** - * dummy_numma_init - Fallback dummy NUMA init - * - * Used if there's no underlying NUMA architecture, NUMA initialization - * fails, or NUMA is disabled on the command line. - * - * Must online at least one node and add memory blocks that cover all - * allowed memory. This function must not fail. - */ -static int __init dummy_numa_init(void) -{ - printk(KERN_INFO "%s\n", - numa_off ? "NUMA turned off" : "No NUMA configuration found"); - printk(KERN_INFO "Faking a node at %016lx-%016lx\n", - 0LU, max_pfn << PAGE_SHIFT); - - node_set(0, numa_nodes_parsed); - numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); - - return 0; -} - -static int __init numa_init(int (*init_func)(void)) -{ - int i; - int ret; - - for (i = 0; i < MAX_LOCAL_APIC; i++) - set_apicid_to_node(i, NUMA_NO_NODE); - - nodes_clear(numa_nodes_parsed); - nodes_clear(node_possible_map); - nodes_clear(node_online_map); - memset(&numa_meminfo, 0, sizeof(numa_meminfo)); - remove_all_active_ranges(); - numa_reset_distance(); - - ret = init_func(); - if (ret < 0) - return ret; - ret = numa_cleanup_meminfo(&numa_meminfo); - if (ret < 0) - return ret; - - numa_emulation(&numa_meminfo, numa_distance_cnt); - - ret = numa_register_memblks(&numa_meminfo); - if (ret < 0) - return ret; - - for (i = 0; i < nr_cpu_ids; i++) { - int nid = early_cpu_to_node(i); - - if (nid == NUMA_NO_NODE) - continue; - if (!node_online(nid)) - numa_clear_node(i); - } - numa_init_array(); - return 0; -} - void __init initmem_init(void) { - int ret; - - if (!numa_off) { -#ifdef CONFIG_ACPI_NUMA - ret = numa_init(x86_acpi_numa_init); - if (!ret) - return; -#endif -#ifdef CONFIG_AMD_NUMA - ret = numa_init(amd_numa_init); - if (!ret) - return; -#endif - } - - numa_init(dummy_numa_init); + x86_numa_init(); } unsigned long __init numa_free_all_bootmem(void) @@ -656,12 +23,3 @@ unsigned long __init numa_free_all_bootmem(void) return pages; } - -int __cpuinit numa_cpu_node(int cpu) -{ - int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); - - if (apicid != BAD_APICID) - return __apicid_to_node[apicid]; - return NUMA_NO_NODE; -} diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index de84cc140379..d0ed086b6247 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -5,6 +5,7 @@ #include <linux/errno.h> #include <linux/topology.h> #include <linux/memblock.h> +#include <linux/bootmem.h> #include <asm/dma.h> #include "numa_internal.h" @@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, nr_nodes = MAX_NUMNODES; } - size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes; + /* + * Calculate target node size. x86_32 freaks on __udivdi3() so do + * the division in ulong number of pages and convert back. + */ + size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); + size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); + /* * Calculate the number of big nodes that can be allocated as a result * of consolidating the remainder. @@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, */ while (nodes_weight(physnode_mask)) { for_each_node_mask(i, physnode_mask) { - u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; + u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); u64 start, limit, end; int phys_blk; @@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) { static struct numa_meminfo ei __initdata; static struct numa_meminfo pi __initdata; - const u64 max_addr = max_pfn << PAGE_SHIFT; + const u64 max_addr = PFN_PHYS(max_pfn); u8 *phys_dist = NULL; size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); int max_emu_nid, dfl_phys_nid; @@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) if (numa_dist_cnt) { u64 phys; - phys = memblock_find_in_range(0, - (u64)max_pfn_mapped << PAGE_SHIFT, + phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), phys_size, PAGE_SIZE); if (phys == MEMBLOCK_ERROR) { pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h index ef2d97377d7c..7178c3afe05e 100644 --- a/arch/x86/mm/numa_internal.h +++ b/arch/x86/mm/numa_internal.h @@ -19,6 +19,14 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); int __init numa_cleanup_meminfo(struct numa_meminfo *mi); void __init numa_reset_distance(void); +void __init x86_numa_init(void); + +#ifdef CONFIG_X86_64 +static inline void init_alloc_remap(int nid, u64 start, u64 end) { } +#else +void __init init_alloc_remap(int nid, u64 start, u64 end); +#endif + #ifdef CONFIG_NUMA_EMU void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt); diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index 38e6d174c497..9f0614daea85 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c @@ -414,22 +414,17 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) unsigned char *p; struct prefix_bits prf; int i; - unsigned long rv; p = (unsigned char *)ins_addr; p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(reg_rop); i++) - if (reg_rop[i] == opcode) { - rv = REG_READ; + if (reg_rop[i] == opcode) goto do_work; - } for (i = 0; i < ARRAY_SIZE(reg_wop); i++) - if (reg_wop[i] == opcode) { - rv = REG_WRITE; + if (reg_wop[i] == opcode) goto do_work; - } printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " "0x%02x\n", opcode); @@ -474,16 +469,13 @@ unsigned long get_ins_imm_val(unsigned long ins_addr) unsigned char *p; struct prefix_bits prf; int i; - unsigned long rv; p = (unsigned char *)ins_addr; p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(imm_wop); i++) - if (imm_wop[i] == opcode) { - rv = IMM_WRITE; + if (imm_wop[i] == opcode) goto do_work; - } printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " "0x%02x\n", opcode); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat.c index 8e9d3394f6d4..81dbfdeb080d 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat.c @@ -26,8 +26,6 @@ int acpi_numa __initdata; -static struct bootnode nodes_add[MAX_NUMNODES]; - static __init int setup_node(int pxm) { return acpi_map_pxm_to_node(pxm); @@ -37,7 +35,6 @@ static __init void bad_srat(void) { printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; - memset(nodes_add, 0, sizeof(nodes_add)); } static __init inline int srat_disabled(void) @@ -131,73 +128,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) pxm, apic_id, node); } -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG static inline int save_add_info(void) {return 1;} #else static inline int save_add_info(void) {return 0;} #endif -/* - * Update nodes_add[] - * This code supports one contiguous hot add area per node - */ -static void __init -update_nodes_add(int node, unsigned long start, unsigned long end) -{ - unsigned long s_pfn = start >> PAGE_SHIFT; - unsigned long e_pfn = end >> PAGE_SHIFT; - int changed = 0; - struct bootnode *nd = &nodes_add[node]; - - /* I had some trouble with strange memory hotadd regions breaking - the boot. Be very strict here and reject anything unexpected. - If you want working memory hotadd write correct SRATs. - - The node size check is a basic sanity check to guard against - mistakes */ - if ((signed long)(end - start) < NODE_MIN_SIZE) { - printk(KERN_ERR "SRAT: Hotplug area too small\n"); - return; - } - - /* This check might be a bit too strict, but I'm keeping it for now. */ - if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) { - printk(KERN_ERR - "SRAT: Hotplug area %lu -> %lu has existing memory\n", - s_pfn, e_pfn); - return; - } - - /* Looks good */ - - if (nd->start == nd->end) { - nd->start = start; - nd->end = end; - changed = 1; - } else { - if (nd->start == end) { - nd->start = start; - changed = 1; - } - if (nd->end == start) { - nd->end = end; - changed = 1; - } - if (!changed) - printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); - } - - if (changed) { - node_set(node, numa_nodes_parsed); - printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", - nd->start, nd->end); - } -} /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ void __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { - unsigned long start, end; + u64 start, end; int node, pxm; if (srat_disabled()) @@ -226,11 +167,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) return; } - printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, + printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, start, end); - - if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) - update_nodes_add(node, start, end); } void __init acpi_numa_arch_fixup(void) {} @@ -244,17 +182,3 @@ int __init x86_acpi_numa_init(void) return ret; return srat_disabled() ? -EINVAL : 0; } - -#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) -int memory_add_physaddr_to_nid(u64 start) -{ - int i, ret = 0; - - for_each_node(i) - if (nodes_add[i].start <= start && nodes_add[i].end > start) - ret = i; - - return ret; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c deleted file mode 100644 index 364f36bdfad8..000000000000 --- a/arch/x86/mm/srat_32.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Some of the code in this file has been gleaned from the 64 bit - * discontigmem support code base. - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to Pat Gaughen <gone@us.ibm.com> - */ -#include <linux/mm.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/mmzone.h> -#include <linux/acpi.h> -#include <linux/nodemask.h> -#include <asm/srat.h> -#include <asm/topology.h> -#include <asm/smp.h> -#include <asm/e820.h> - -/* - * proximity macros and definitions - */ -#define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */ -#define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ -#define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) -#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) -/* bitmap length; _PXM is at most 255 */ -#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) -static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ - -#define MAX_CHUNKS_PER_NODE 3 -#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) -struct node_memory_chunk_s { - unsigned long start_pfn; - unsigned long end_pfn; - u8 pxm; // proximity domain of node - u8 nid; // which cnode contains this chunk? - u8 bank; // which mem bank on this node -}; -static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; - -static int __initdata num_memory_chunks; /* total number of memory chunks */ -static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC]; - -int acpi_numa __initdata; - -static __init void bad_srat(void) -{ - printk(KERN_ERR "SRAT: SRAT not used.\n"); - acpi_numa = -1; - num_memory_chunks = 0; -} - -static __init inline int srat_disabled(void) -{ - return numa_off || acpi_numa < 0; -} - -/* Identify CPU proximity domains */ -void __init -acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) -{ - if (srat_disabled()) - return; - if (cpu_affinity->header.length != - sizeof(struct acpi_srat_cpu_affinity)) { - bad_srat(); - return; - } - - if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) - return; /* empty entry */ - - /* mark this node as "seen" in node bitmap */ - BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); - - /* don't need to check apic_id here, because it is always 8 bits */ - apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; - - printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", - cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo); -} - -/* - * Identify memory proximity domains and hot-remove capabilities. - * Fill node memory chunk list structure. - */ -void __init -acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) -{ - unsigned long long paddr, size; - unsigned long start_pfn, end_pfn; - u8 pxm; - struct node_memory_chunk_s *p, *q, *pend; - - if (srat_disabled()) - return; - if (memory_affinity->header.length != - sizeof(struct acpi_srat_mem_affinity)) { - bad_srat(); - return; - } - - if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) - return; /* empty entry */ - - pxm = memory_affinity->proximity_domain & 0xff; - - /* mark this node as "seen" in node bitmap */ - BMAP_SET(pxm_bitmap, pxm); - - /* calculate info for memory chunk structure */ - paddr = memory_affinity->base_address; - size = memory_affinity->length; - - start_pfn = paddr >> PAGE_SHIFT; - end_pfn = (paddr + size) >> PAGE_SHIFT; - - - if (num_memory_chunks >= MAXCHUNKS) { - printk(KERN_WARNING "Too many mem chunks in SRAT." - " Ignoring %lld MBytes at %llx\n", - size/(1024*1024), paddr); - return; - } - - /* Insertion sort based on base address */ - pend = &node_memory_chunk[num_memory_chunks]; - for (p = &node_memory_chunk[0]; p < pend; p++) { - if (start_pfn < p->start_pfn) - break; - } - if (p < pend) { - for (q = pend; q >= p; q--) - *(q + 1) = *q; - } - p->start_pfn = start_pfn; - p->end_pfn = end_pfn; - p->pxm = pxm; - - num_memory_chunks++; - - printk(KERN_DEBUG "Memory range %08lx to %08lx" - " in proximity domain %02x %s\n", - start_pfn, end_pfn, - pxm, - ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? - "enabled and removable" : "enabled" ) ); -} - -/* Callback for SLIT parsing */ -void __init acpi_numa_slit_init(struct acpi_table_slit *slit) -{ -} - -void acpi_numa_arch_fixup(void) -{ -} -/* - * The SRAT table always lists ascending addresses, so can always - * assume that the first "start" address that you see is the real - * start of the node, and that the current "end" address is after - * the previous one. - */ -static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) -{ - /* - * Only add present memory as told by the e820. - * There is no guarantee from the SRAT that the memory it - * enumerates is present at boot time because it represents - * *possible* memory hotplug areas the same as normal RAM. - */ - if (memory_chunk->start_pfn >= max_pfn) { - printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", - memory_chunk->start_pfn, memory_chunk->end_pfn); - return -1; - } - if (memory_chunk->nid != nid) - return -1; - - if (!node_has_online_mem(nid)) - node_start_pfn[nid] = memory_chunk->start_pfn; - - if (node_start_pfn[nid] > memory_chunk->start_pfn) - node_start_pfn[nid] = memory_chunk->start_pfn; - - if (node_end_pfn[nid] < memory_chunk->end_pfn) - node_end_pfn[nid] = memory_chunk->end_pfn; - - return 0; -} - -int __init get_memcfg_from_srat(void) -{ - int i, j, nid; - - if (srat_disabled()) - goto out_fail; - - if (acpi_numa_init() < 0) - goto out_fail; - - if (num_memory_chunks == 0) { - printk(KERN_DEBUG - "could not find any ACPI SRAT memory areas.\n"); - goto out_fail; - } - - /* Calculate total number of nodes in system from PXM bitmap and create - * a set of sequential node IDs starting at zero. (ACPI doesn't seem - * to specify the range of _PXM values.) - */ - /* - * MCD - we no longer HAVE to number nodes sequentially. PXM domain - * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically - * 32, so we will continue numbering them in this manner until MAX_NUMNODES - * approaches MAX_PXM_DOMAINS for i386. - */ - nodes_clear(node_online_map); - for (i = 0; i < MAX_PXM_DOMAINS; i++) { - if (BMAP_TEST(pxm_bitmap, i)) { - int nid = acpi_map_pxm_to_node(i); - node_set_online(nid); - } - } - BUG_ON(num_online_nodes() == 0); - - /* set cnode id in memory chunk structure */ - for (i = 0; i < num_memory_chunks; i++) - node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); - - printk(KERN_DEBUG "pxm bitmap: "); - for (i = 0; i < sizeof(pxm_bitmap); i++) { - printk(KERN_CONT "%02x ", pxm_bitmap[i]); - } - printk(KERN_CONT "\n"); - printk(KERN_DEBUG "Number of logical nodes in system = %d\n", - num_online_nodes()); - printk(KERN_DEBUG "Number of memory chunks in system = %d\n", - num_memory_chunks); - - for (i = 0; i < MAX_LOCAL_APIC; i++) - set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i])); - - for (j = 0; j < num_memory_chunks; j++){ - struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; - printk(KERN_DEBUG - "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", - j, chunk->nid, chunk->start_pfn, chunk->end_pfn); - if (node_read_chunk(chunk->nid, chunk)) - continue; - - memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn, - min(chunk->end_pfn, max_pfn)); - } - /* for out of order entries in SRAT */ - sort_node_map(); - - for_each_online_node(nid) { - unsigned long start = node_start_pfn[nid]; - unsigned long end = min(node_end_pfn[nid], max_pfn); - - memory_present(nid, start, end); - node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); - } - return 1; -out_fail: - printk(KERN_DEBUG "failed to get NUMA memory information from SRAT" - " table\n"); - return 0; -} diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile new file mode 100644 index 000000000000..90568c33ddb0 --- /dev/null +++ b/arch/x86/net/Makefile @@ -0,0 +1,4 @@ +# +# Arch-specific network modules +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S new file mode 100644 index 000000000000..66870223f8c5 --- /dev/null +++ b/arch/x86/net/bpf_jit.S @@ -0,0 +1,140 @@ +/* bpf_jit.S : BPF JIT helper functions + * + * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/linkage.h> +#include <asm/dwarf2.h> + +/* + * Calling convention : + * rdi : skb pointer + * esi : offset of byte(s) to fetch in skb (can be scratched) + * r8 : copy of skb->data + * r9d : hlen = skb->len - skb->data_len + */ +#define SKBDATA %r8 + +sk_load_word_ind: + .globl sk_load_word_ind + + add %ebx,%esi /* offset += X */ +# test %esi,%esi /* if (offset < 0) goto bpf_error; */ + js bpf_error + +sk_load_word: + .globl sk_load_word + + mov %r9d,%eax # hlen + sub %esi,%eax # hlen - offset + cmp $3,%eax + jle bpf_slow_path_word + mov (SKBDATA,%rsi),%eax + bswap %eax /* ntohl() */ + ret + + +sk_load_half_ind: + .globl sk_load_half_ind + + add %ebx,%esi /* offset += X */ + js bpf_error + +sk_load_half: + .globl sk_load_half + + mov %r9d,%eax + sub %esi,%eax # hlen - offset + cmp $1,%eax + jle bpf_slow_path_half + movzwl (SKBDATA,%rsi),%eax + rol $8,%ax # ntohs() + ret + +sk_load_byte_ind: + .globl sk_load_byte_ind + add %ebx,%esi /* offset += X */ + js bpf_error + +sk_load_byte: + .globl sk_load_byte + + cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ + jle bpf_slow_path_byte + movzbl (SKBDATA,%rsi),%eax + ret + +/** + * sk_load_byte_msh - BPF_S_LDX_B_MSH helper + * + * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) + * Must preserve A accumulator (%eax) + * Inputs : %esi is the offset value, already known positive + */ +ENTRY(sk_load_byte_msh) + CFI_STARTPROC + cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ + jle bpf_slow_path_byte_msh + movzbl (SKBDATA,%rsi),%ebx + and $15,%bl + shl $2,%bl + ret + CFI_ENDPROC +ENDPROC(sk_load_byte_msh) + +bpf_error: +# force a return 0 from jit handler + xor %eax,%eax + mov -8(%rbp),%rbx + leaveq + ret + +/* rsi contains offset and can be scratched */ +#define bpf_slow_path_common(LEN) \ + push %rdi; /* save skb */ \ + push %r9; \ + push SKBDATA; \ +/* rsi already has offset */ \ + mov $LEN,%ecx; /* len */ \ + lea -12(%rbp),%rdx; \ + call skb_copy_bits; \ + test %eax,%eax; \ + pop SKBDATA; \ + pop %r9; \ + pop %rdi + + +bpf_slow_path_word: + bpf_slow_path_common(4) + js bpf_error + mov -12(%rbp),%eax + bswap %eax + ret + +bpf_slow_path_half: + bpf_slow_path_common(2) + js bpf_error + mov -12(%rbp),%ax + rol $8,%ax + movzwl %ax,%eax + ret + +bpf_slow_path_byte: + bpf_slow_path_common(1) + js bpf_error + movzbl -12(%rbp),%eax + ret + +bpf_slow_path_byte_msh: + xchg %eax,%ebx /* dont lose A , X is about to be scratched */ + bpf_slow_path_common(1) + js bpf_error + movzbl -12(%rbp),%eax + and $15,%al + shl $2,%al + xchg %eax,%ebx + ret diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c new file mode 100644 index 000000000000..bfab3fa10edc --- /dev/null +++ b/arch/x86/net/bpf_jit_comp.c @@ -0,0 +1,654 @@ +/* bpf_jit_comp.c : BPF JIT compiler + * + * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/moduleloader.h> +#include <asm/cacheflush.h> +#include <linux/netdevice.h> +#include <linux/filter.h> + +/* + * Conventions : + * EAX : BPF A accumulator + * EBX : BPF X accumulator + * RDI : pointer to skb (first argument given to JIT function) + * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) + * ECX,EDX,ESI : scratch registers + * r9d : skb->len - skb->data_len (headlen) + * r8 : skb->data + * -8(RBP) : saved RBX value + * -16(RBP)..-80(RBP) : BPF_MEMWORDS values + */ +int bpf_jit_enable __read_mostly; + +/* + * assembly code in arch/x86/net/bpf_jit.S + */ +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; + +static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) +{ + if (len == 1) + *ptr = bytes; + else if (len == 2) + *(u16 *)ptr = bytes; + else { + *(u32 *)ptr = bytes; + barrier(); + } + return ptr + len; +} + +#define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0) + +#define EMIT1(b1) EMIT(b1, 1) +#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) +#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) +#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) +#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) + +#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ +#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ + +static inline bool is_imm8(int value) +{ + return value <= 127 && value >= -128; +} + +static inline bool is_near(int offset) +{ + return offset <= 127 && offset >= -128; +} + +#define EMIT_JMP(offset) \ +do { \ + if (offset) { \ + if (is_near(offset)) \ + EMIT2(0xeb, offset); /* jmp .+off8 */ \ + else \ + EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \ + } \ +} while (0) + +/* list of x86 cond jumps opcodes (. + s8) + * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) + */ +#define X86_JB 0x72 +#define X86_JAE 0x73 +#define X86_JE 0x74 +#define X86_JNE 0x75 +#define X86_JBE 0x76 +#define X86_JA 0x77 + +#define EMIT_COND_JMP(op, offset) \ +do { \ + if (is_near(offset)) \ + EMIT2(op, offset); /* jxx .+off8 */ \ + else { \ + EMIT2(0x0f, op + 0x10); \ + EMIT(offset, 4); /* jxx .+off32 */ \ + } \ +} while (0) + +#define COND_SEL(CODE, TOP, FOP) \ + case CODE: \ + t_op = TOP; \ + f_op = FOP; \ + goto cond_branch + + +#define SEEN_DATAREF 1 /* might call external helpers */ +#define SEEN_XREG 2 /* ebx is used */ +#define SEEN_MEM 4 /* use mem[] for temporary storage */ + +static inline void bpf_flush_icache(void *start, void *end) +{ + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + smp_wmb(); + flush_icache_range((unsigned long)start, (unsigned long)end); + set_fs(old_fs); +} + + +void bpf_jit_compile(struct sk_filter *fp) +{ + u8 temp[64]; + u8 *prog; + unsigned int proglen, oldproglen = 0; + int ilen, i; + int t_offset, f_offset; + u8 t_op, f_op, seen = 0, pass; + u8 *image = NULL; + u8 *func; + int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ + unsigned int cleanup_addr; /* epilogue code offset */ + unsigned int *addrs; + const struct sock_filter *filter = fp->insns; + int flen = fp->len; + + if (!bpf_jit_enable) + return; + + addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) + return; + + /* Before first pass, make a rough estimation of addrs[] + * each bpf instruction is translated to less than 64 bytes + */ + for (proglen = 0, i = 0; i < flen; i++) { + proglen += 64; + addrs[i] = proglen; + } + cleanup_addr = proglen; /* epilogue address */ + + for (pass = 0; pass < 10; pass++) { + /* no prologue/epilogue for trivial filters (RET something) */ + proglen = 0; + prog = temp; + + if (seen) { + EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ + EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ + /* note : must save %rbx in case bpf_error is hit */ + if (seen & (SEEN_XREG | SEEN_DATAREF)) + EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ + if (seen & SEEN_XREG) + CLEAR_X(); /* make sure we dont leek kernel memory */ + + /* + * If this filter needs to access skb data, + * loads r9 and r8 with : + * r9 = skb->len - skb->data_len + * r8 = skb->data + */ + if (seen & SEEN_DATAREF) { + if (offsetof(struct sk_buff, len) <= 127) + /* mov off8(%rdi),%r9d */ + EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); + else { + /* mov off32(%rdi),%r9d */ + EMIT3(0x44, 0x8b, 0x8f); + EMIT(offsetof(struct sk_buff, len), 4); + } + if (is_imm8(offsetof(struct sk_buff, data_len))) + /* sub off8(%rdi),%r9d */ + EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len)); + else { + EMIT3(0x44, 0x2b, 0x8f); + EMIT(offsetof(struct sk_buff, data_len), 4); + } + + if (is_imm8(offsetof(struct sk_buff, data))) + /* mov off8(%rdi),%r8 */ + EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data)); + else { + /* mov off32(%rdi),%r8 */ + EMIT3(0x4c, 0x8b, 0x87); + EMIT(offsetof(struct sk_buff, data), 4); + } + } + } + + switch (filter[0].code) { + case BPF_S_RET_K: + case BPF_S_LD_W_LEN: + case BPF_S_ANC_PROTOCOL: + case BPF_S_ANC_IFINDEX: + case BPF_S_ANC_MARK: + case BPF_S_ANC_RXHASH: + case BPF_S_ANC_CPU: + case BPF_S_ANC_QUEUE: + case BPF_S_LD_W_ABS: + case BPF_S_LD_H_ABS: + case BPF_S_LD_B_ABS: + /* first instruction sets A register (or is RET 'constant') */ + break; + default: + /* make sure we dont leak kernel information to user */ + CLEAR_A(); /* A = 0 */ + } + + for (i = 0; i < flen; i++) { + unsigned int K = filter[i].k; + + switch (filter[i].code) { + case BPF_S_ALU_ADD_X: /* A += X; */ + seen |= SEEN_XREG; + EMIT2(0x01, 0xd8); /* add %ebx,%eax */ + break; + case BPF_S_ALU_ADD_K: /* A += K; */ + if (!K) + break; + if (is_imm8(K)) + EMIT3(0x83, 0xc0, K); /* add imm8,%eax */ + else + EMIT1_off32(0x05, K); /* add imm32,%eax */ + break; + case BPF_S_ALU_SUB_X: /* A -= X; */ + seen |= SEEN_XREG; + EMIT2(0x29, 0xd8); /* sub %ebx,%eax */ + break; + case BPF_S_ALU_SUB_K: /* A -= K */ + if (!K) + break; + if (is_imm8(K)) + EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */ + else + EMIT1_off32(0x2d, K); /* sub imm32,%eax */ + break; + case BPF_S_ALU_MUL_X: /* A *= X; */ + seen |= SEEN_XREG; + EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */ + break; + case BPF_S_ALU_MUL_K: /* A *= K */ + if (is_imm8(K)) + EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ + else { + EMIT2(0x69, 0xc0); /* imul imm32,%eax */ + EMIT(K, 4); + } + break; + case BPF_S_ALU_DIV_X: /* A /= X; */ + seen |= SEEN_XREG; + EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ + if (pc_ret0 != -1) + EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4)); + else { + EMIT_COND_JMP(X86_JNE, 2 + 5); + CLEAR_A(); + EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ + } + EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ + break; + case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ + EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ + EMIT(K, 4); + EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */ + break; + case BPF_S_ALU_AND_X: + seen |= SEEN_XREG; + EMIT2(0x21, 0xd8); /* and %ebx,%eax */ + break; + case BPF_S_ALU_AND_K: + if (K >= 0xFFFFFF00) { + EMIT2(0x24, K & 0xFF); /* and imm8,%al */ + } else if (K >= 0xFFFF0000) { + EMIT2(0x66, 0x25); /* and imm16,%ax */ + EMIT2(K, 2); + } else { + EMIT1_off32(0x25, K); /* and imm32,%eax */ + } + break; + case BPF_S_ALU_OR_X: + seen |= SEEN_XREG; + EMIT2(0x09, 0xd8); /* or %ebx,%eax */ + break; + case BPF_S_ALU_OR_K: + if (is_imm8(K)) + EMIT3(0x83, 0xc8, K); /* or imm8,%eax */ + else + EMIT1_off32(0x0d, K); /* or imm32,%eax */ + break; + case BPF_S_ALU_LSH_X: /* A <<= X; */ + seen |= SEEN_XREG; + EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ + break; + case BPF_S_ALU_LSH_K: + if (K == 0) + break; + else if (K == 1) + EMIT2(0xd1, 0xe0); /* shl %eax */ + else + EMIT3(0xc1, 0xe0, K); + break; + case BPF_S_ALU_RSH_X: /* A >>= X; */ + seen |= SEEN_XREG; + EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */ + break; + case BPF_S_ALU_RSH_K: /* A >>= K; */ + if (K == 0) + break; + else if (K == 1) + EMIT2(0xd1, 0xe8); /* shr %eax */ + else + EMIT3(0xc1, 0xe8, K); + break; + case BPF_S_ALU_NEG: + EMIT2(0xf7, 0xd8); /* neg %eax */ + break; + case BPF_S_RET_K: + if (!K) { + if (pc_ret0 == -1) + pc_ret0 = i; + CLEAR_A(); + } else { + EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ + } + /* fallinto */ + case BPF_S_RET_A: + if (seen) { + if (i != flen - 1) { + EMIT_JMP(cleanup_addr - addrs[i]); + break; + } + if (seen & SEEN_XREG) + EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ + EMIT1(0xc9); /* leaveq */ + } + EMIT1(0xc3); /* ret */ + break; + case BPF_S_MISC_TAX: /* X = A */ + seen |= SEEN_XREG; + EMIT2(0x89, 0xc3); /* mov %eax,%ebx */ + break; + case BPF_S_MISC_TXA: /* A = X */ + seen |= SEEN_XREG; + EMIT2(0x89, 0xd8); /* mov %ebx,%eax */ + break; + case BPF_S_LD_IMM: /* A = K */ + if (!K) + CLEAR_A(); + else + EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ + break; + case BPF_S_LDX_IMM: /* X = K */ + seen |= SEEN_XREG; + if (!K) + CLEAR_X(); + else + EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */ + break; + case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */ + seen |= SEEN_MEM; + EMIT3(0x8b, 0x45, 0xf0 - K*4); + break; + case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */ + seen |= SEEN_XREG | SEEN_MEM; + EMIT3(0x8b, 0x5d, 0xf0 - K*4); + break; + case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */ + seen |= SEEN_MEM; + EMIT3(0x89, 0x45, 0xf0 - K*4); + break; + case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + seen |= SEEN_XREG | SEEN_MEM; + EMIT3(0x89, 0x5d, 0xf0 - K*4); + break; + case BPF_S_LD_W_LEN: /* A = skb->len; */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + if (is_imm8(offsetof(struct sk_buff, len))) + /* mov off8(%rdi),%eax */ + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len)); + else { + EMIT2(0x8b, 0x87); + EMIT(offsetof(struct sk_buff, len), 4); + } + break; + case BPF_S_LDX_W_LEN: /* X = skb->len; */ + seen |= SEEN_XREG; + if (is_imm8(offsetof(struct sk_buff, len))) + /* mov off8(%rdi),%ebx */ + EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len)); + else { + EMIT2(0x8b, 0x9f); + EMIT(offsetof(struct sk_buff, len), 4); + } + break; + case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + if (is_imm8(offsetof(struct sk_buff, protocol))) { + /* movzwl off8(%rdi),%eax */ + EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol)); + } else { + EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ + EMIT(offsetof(struct sk_buff, protocol), 4); + } + EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */ + break; + case BPF_S_ANC_IFINDEX: + if (is_imm8(offsetof(struct sk_buff, dev))) { + /* movq off8(%rdi),%rax */ + EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev)); + } else { + EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */ + EMIT(offsetof(struct sk_buff, dev), 4); + } + EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */ + EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6)); + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */ + EMIT(offsetof(struct net_device, ifindex), 4); + break; + case BPF_S_ANC_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + if (is_imm8(offsetof(struct sk_buff, mark))) { + /* mov off8(%rdi),%eax */ + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark)); + } else { + EMIT2(0x8b, 0x87); + EMIT(offsetof(struct sk_buff, mark), 4); + } + break; + case BPF_S_ANC_RXHASH: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); + if (is_imm8(offsetof(struct sk_buff, rxhash))) { + /* mov off8(%rdi),%eax */ + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash)); + } else { + EMIT2(0x8b, 0x87); + EMIT(offsetof(struct sk_buff, rxhash), 4); + } + break; + case BPF_S_ANC_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + if (is_imm8(offsetof(struct sk_buff, queue_mapping))) { + /* movzwl off8(%rdi),%eax */ + EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping)); + } else { + EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ + EMIT(offsetof(struct sk_buff, queue_mapping), 4); + } + break; + case BPF_S_ANC_CPU: +#ifdef CONFIG_SMP + EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */ + EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */ +#else + CLEAR_A(); +#endif + break; + case BPF_S_LD_W_ABS: + func = sk_load_word; +common_load: seen |= SEEN_DATAREF; + if ((int)K < 0) + goto out; + t_offset = func - (image + addrs[i]); + EMIT1_off32(0xbe, K); /* mov imm32,%esi */ + EMIT1_off32(0xe8, t_offset); /* call */ + break; + case BPF_S_LD_H_ABS: + func = sk_load_half; + goto common_load; + case BPF_S_LD_B_ABS: + func = sk_load_byte; + goto common_load; + case BPF_S_LDX_B_MSH: + if ((int)K < 0) { + if (pc_ret0 != -1) { + EMIT_JMP(addrs[pc_ret0] - addrs[i]); + break; + } + CLEAR_A(); + EMIT_JMP(cleanup_addr - addrs[i]); + break; + } + seen |= SEEN_DATAREF | SEEN_XREG; + t_offset = sk_load_byte_msh - (image + addrs[i]); + EMIT1_off32(0xbe, K); /* mov imm32,%esi */ + EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ + break; + case BPF_S_LD_W_IND: + func = sk_load_word_ind; +common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; + t_offset = func - (image + addrs[i]); + EMIT1_off32(0xbe, K); /* mov imm32,%esi */ + EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ + break; + case BPF_S_LD_H_IND: + func = sk_load_half_ind; + goto common_load_ind; + case BPF_S_LD_B_IND: + func = sk_load_byte_ind; + goto common_load_ind; + case BPF_S_JMP_JA: + t_offset = addrs[i + K] - addrs[i]; + EMIT_JMP(t_offset); + break; + COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE); + COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB); + COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE); + COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE); + COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE); + COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB); + COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE); + COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE); + +cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; + t_offset = addrs[i + filter[i].jt] - addrs[i]; + + /* same targets, can avoid doing the test :) */ + if (filter[i].jt == filter[i].jf) { + EMIT_JMP(t_offset); + break; + } + + switch (filter[i].code) { + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JEQ_X: + seen |= SEEN_XREG; + EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */ + break; + case BPF_S_JMP_JSET_X: + seen |= SEEN_XREG; + EMIT2(0x85, 0xd8); /* test %ebx,%eax */ + break; + case BPF_S_JMP_JEQ_K: + if (K == 0) { + EMIT2(0x85, 0xc0); /* test %eax,%eax */ + break; + } + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGE_K: + if (K <= 127) + EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */ + else + EMIT1_off32(0x3d, K); /* cmp imm32,%eax */ + break; + case BPF_S_JMP_JSET_K: + if (K <= 0xFF) + EMIT2(0xa8, K); /* test imm8,%al */ + else if (!(K & 0xFFFF00FF)) + EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */ + else if (K <= 0xFFFF) { + EMIT2(0x66, 0xa9); /* test imm16,%ax */ + EMIT(K, 2); + } else { + EMIT1_off32(0xa9, K); /* test imm32,%eax */ + } + break; + } + if (filter[i].jt != 0) { + if (filter[i].jf) + t_offset += is_near(f_offset) ? 2 : 6; + EMIT_COND_JMP(t_op, t_offset); + if (filter[i].jf) + EMIT_JMP(f_offset); + break; + } + EMIT_COND_JMP(f_op, f_offset); + break; + default: + /* hmm, too complex filter, give up with jit compiler */ + goto out; + } + ilen = prog - temp; + if (image) { + if (unlikely(proglen + ilen > oldproglen)) { + pr_err("bpb_jit_compile fatal error\n"); + kfree(addrs); + module_free(NULL, image); + return; + } + memcpy(image + proglen, temp, ilen); + } + proglen += ilen; + addrs[i] = proglen; + prog = temp; + } + /* last bpf instruction is always a RET : + * use it to give the cleanup instruction(s) addr + */ + cleanup_addr = proglen - 1; /* ret */ + if (seen) + cleanup_addr -= 1; /* leaveq */ + if (seen & SEEN_XREG) + cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ + + if (image) { + WARN_ON(proglen != oldproglen); + break; + } + if (proglen == oldproglen) { + image = module_alloc(max_t(unsigned int, + proglen, + sizeof(struct work_struct))); + if (!image) + goto out; + } + oldproglen = proglen; + } + if (bpf_jit_enable > 1) + pr_err("flen=%d proglen=%u pass=%d image=%p\n", + flen, proglen, pass, image); + + if (image) { + if (bpf_jit_enable > 1) + print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, + 16, 1, image, proglen, false); + + bpf_flush_icache(image, image + proglen); + + fp->bpf_func = (void *)image; + } +out: + kfree(addrs); + return; +} + +static void jit_free_defer(struct work_struct *arg) +{ + module_free(NULL, arg); +} + +/* run from softirq, we must use a work_struct to call + * module_free() from process context + */ +void bpf_jit_free(struct sk_filter *fp) +{ + if (fp->bpf_func != sk_run_filter) { + struct work_struct *work = (struct work_struct *)fp->bpf_func; + + INIT_WORK(work, jit_free_defer); + schedule_work(work); + } +} diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a36..a5b64ab4cd6e 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -16,17 +16,6 @@ #include <asm/stacktrace.h> #include <linux/compat.h> -static void backtrace_warning_symbol(void *data, char *msg, - unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - static int backtrace_stack(void *data, char *name) { /* Yes, we want all stacks */ @@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, .walk_stack = print_context_stack, diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e37b407a0ee8..8214724ce54d 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, (type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi"); + "msi-x" : "msi", + DOMID_SELF); if (irq < 0) goto error; dev_dbg(&dev->dev, @@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, (type == PCI_CAP_ID_MSIX) ? "pcifront-msi-x" : - "pcifront-msi"); + "pcifront-msi", + DOMID_SELF); if (irq < 0) goto free; i++; @@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) list_for_each_entry(msidesc, &dev->msi_list, list) { struct physdev_map_pirq map_irq; + domid_t domid; + + domid = ret = xen_find_device_domain_owner(dev); + /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED, + * hence check ret value for < 0. */ + if (ret < 0) + domid = DOMID_SELF; memset(&map_irq, 0, sizeof(map_irq)); - map_irq.domid = DOMID_SELF; + map_irq.domid = domid; map_irq.type = MAP_PIRQ_TYPE_MSI; map_irq.index = -1; map_irq.pirq = -1; @@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); if (ret) { - dev_warn(&dev->dev, "xen map irq failed %d\n", ret); + dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", + ret, domid); goto out; } ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq, map_irq.index, (type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi"); + "msi-x" : "msi", + domid); if (ret < 0) goto out; } @@ -461,3 +472,78 @@ void __init xen_setup_pirqs(void) } } #endif + +#ifdef CONFIG_XEN_DOM0 +struct xen_device_domain_owner { + domid_t domain; + struct pci_dev *dev; + struct list_head list; +}; + +static DEFINE_SPINLOCK(dev_domain_list_spinlock); +static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); + +static struct xen_device_domain_owner *find_device(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + + list_for_each_entry(owner, &dev_domain_list, list) { + if (owner->dev == dev) + return owner; + } + return NULL; +} + +int xen_find_device_domain_owner(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + int domain = -ENODEV; + + spin_lock(&dev_domain_list_spinlock); + owner = find_device(dev); + if (owner) + domain = owner->domain; + spin_unlock(&dev_domain_list_spinlock); + return domain; +} +EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); + +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) +{ + struct xen_device_domain_owner *owner; + + owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); + if (!owner) + return -ENODEV; + + spin_lock(&dev_domain_list_spinlock); + if (find_device(dev)) { + spin_unlock(&dev_domain_list_spinlock); + kfree(owner); + return -EEXIST; + } + owner->domain = domain; + owner->dev = dev; + list_add_tail(&owner->list, &dev_domain_list); + spin_unlock(&dev_domain_list_spinlock); + return 0; +} +EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); + +int xen_unregister_device_domain_owner(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + + spin_lock(&dev_domain_list_spinlock); + owner = find_device(dev); + if (!owner) { + spin_unlock(&dev_domain_list_spinlock); + return -ENODEV; + } + list_del(&owner->list); + spin_unlock(&dev_domain_list_spinlock); + kfree(owner); + return 0; +} +EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); +#endif diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 0fe27d7c6258..b30aa26a8df2 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -145,17 +145,6 @@ static void virt_efi_reset_system(int reset_type, data_size, data); } -static efi_status_t virt_efi_set_virtual_address_map( - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - return efi_call_virt4(set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); -} - static efi_status_t __init phys_efi_set_virtual_address_map( unsigned long memory_map_size, unsigned long descriptor_size, @@ -468,11 +457,25 @@ void __init efi_init(void) #endif } +void __init efi_set_executable(efi_memory_desc_t *md, bool executable) +{ + u64 addr, npages; + + addr = md->virt_addr; + npages = md->num_pages; + + memrange_efi_to_native(&addr, &npages); + + if (executable) + set_memory_x(addr, npages); + else + set_memory_nx(addr, npages); +} + static void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; - u64 addr, npages; /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -481,10 +484,7 @@ static void __init runtime_code_page_mkexec(void) if (md->type != EFI_RUNTIME_SERVICES_CODE) continue; - addr = md->virt_addr; - npages = md->num_pages; - memrange_efi_to_native(&addr, &npages); - set_memory_x(addr, npages); + efi_set_executable(md, true); } } @@ -498,13 +498,42 @@ static void __init runtime_code_page_mkexec(void) */ void __init efi_enter_virtual_mode(void) { - efi_memory_desc_t *md; + efi_memory_desc_t *md, *prev_md = NULL; efi_status_t status; unsigned long size; u64 end, systab, addr, npages, end_pfn; - void *p, *va; + void *p, *va, *new_memmap = NULL; + int count = 0; efi.systab = NULL; + + /* Merge contiguous regions of the same type and attribute */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + u64 prev_size; + md = p; + + if (!prev_md) { + prev_md = md; + continue; + } + + if (prev_md->type != md->type || + prev_md->attribute != md->attribute) { + prev_md = md; + continue; + } + + prev_size = prev_md->num_pages << EFI_PAGE_SHIFT; + + if (md->phys_addr == (prev_md->phys_addr + prev_size)) { + prev_md->num_pages += md->num_pages; + md->type = EFI_RESERVED_TYPE; + md->attribute = 0; + continue; + } + prev_md = md; + } + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) @@ -541,15 +570,21 @@ void __init efi_enter_virtual_mode(void) systab += md->virt_addr - md->phys_addr; efi.systab = (efi_system_table_t *) (unsigned long) systab; } + new_memmap = krealloc(new_memmap, + (count + 1) * memmap.desc_size, + GFP_KERNEL); + memcpy(new_memmap + (count * memmap.desc_size), md, + memmap.desc_size); + count++; } BUG_ON(!efi.systab); status = phys_efi_set_virtual_address_map( - memmap.desc_size * memmap.nr_map, + memmap.desc_size * count, memmap.desc_size, memmap.desc_version, - memmap.phys_map); + (efi_memory_desc_t *)__pa(new_memmap)); if (status != EFI_SUCCESS) { printk(KERN_ALERT "Unable to switch EFI into virtual mode " @@ -572,11 +607,12 @@ void __init efi_enter_virtual_mode(void) efi.set_variable = virt_efi_set_variable; efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; efi.reset_system = virt_efi_reset_system; - efi.set_virtual_address_map = virt_efi_set_virtual_address_map; + efi.set_virtual_address_map = NULL; if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; + kfree(new_memmap); } /* diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac0621a7ac3d..2649426a7905 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -41,22 +41,7 @@ static pgd_t save_pgd __initdata; static unsigned long efi_flags __initdata; -static void __init early_mapping_set_exec(unsigned long start, - unsigned long end, - int executable) -{ - unsigned long num_pages; - - start &= PMD_MASK; - end = (end + PMD_SIZE - 1) & PMD_MASK; - num_pages = (end - start) >> PAGE_SHIFT; - if (executable) - set_memory_x((unsigned long)__va(start), num_pages); - else - set_memory_nx((unsigned long)__va(start), num_pages); -} - -static void __init early_runtime_code_mapping_set_exec(int executable) +static void __init early_code_mapping_set_exec(int executable) { efi_memory_desc_t *md; void *p; @@ -67,11 +52,8 @@ static void __init early_runtime_code_mapping_set_exec(int executable) /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (md->type == EFI_RUNTIME_SERVICES_CODE) { - unsigned long end; - end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - early_mapping_set_exec(md->phys_addr, end, executable); - } + if (md->type == EFI_RUNTIME_SERVICES_CODE) + efi_set_executable(md, executable); } } @@ -79,7 +61,7 @@ void __init efi_call_phys_prelog(void) { unsigned long vaddress; - early_runtime_code_mapping_set_exec(1); + early_code_mapping_set_exec(1); local_irq_save(efi_flags); vaddress = (unsigned long)__va(0x0UL); save_pgd = *pgd_offset_k(0x0UL); @@ -95,7 +77,7 @@ void __init efi_call_phys_epilog(void) set_pgd(pgd_offset_k(0x0UL), save_pgd); __flush_tlb_all(); local_irq_restore(efi_flags); - early_runtime_code_mapping_set_exec(0); + early_code_mapping_set_exec(0); } void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, @@ -107,8 +89,10 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, return ioremap(phys_addr, size); last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); - if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) - return NULL; + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { + unsigned long top = last_map_pfn << PAGE_SHIFT; + efi_ioremap(top, size - (top - phys_addr), type); + } return (void __iomem *)__va(phys_addr); } diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 275dbc19e2cf..7000e74b3087 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -194,7 +194,7 @@ static unsigned long __init mrst_calibrate_tsc(void) return 0; } -void __init mrst_time_init(void) +static void __init mrst_time_init(void) { sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); switch (mrst_timer_options) { @@ -216,7 +216,7 @@ void __init mrst_time_init(void) apbt_time_init(); } -void __cpuinit mrst_arch_setup(void) +static void __cpuinit mrst_arch_setup(void) { if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile index c2a8cab65e5d..81c5e2165c24 100644 --- a/arch/x86/platform/olpc/Makefile +++ b/arch/x86/platform/olpc/Makefile @@ -1,4 +1,2 @@ -obj-$(CONFIG_OLPC) += olpc.o +obj-$(CONFIG_OLPC) += olpc.o olpc_ofw.o olpc_dt.o obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o -obj-$(CONFIG_OLPC) += olpc_ofw.o -obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index edaf3fe8dc5e..0060fd59ea00 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -18,6 +18,7 @@ #include <linux/io.h> #include <linux/string.h> #include <linux/platform_device.h> +#include <linux/of.h> #include <asm/geode.h> #include <asm/setup.h> @@ -187,41 +188,43 @@ err: } EXPORT_SYMBOL_GPL(olpc_ec_cmd); -static bool __init check_ofw_architecture(void) +static bool __init check_ofw_architecture(struct device_node *root) { - size_t propsize; - char olpc_arch[5]; - const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 }; - void *res[] = { &propsize }; + const char *olpc_arch; + int propsize; - if (olpc_ofw("getprop", args, res)) { - printk(KERN_ERR "ofw: getprop call failed!\n"); - return false; - } + olpc_arch = of_get_property(root, "architecture", &propsize); return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; } -static u32 __init get_board_revision(void) +static u32 __init get_board_revision(struct device_node *root) { - size_t propsize; - __be32 rev; - const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 }; - void *res[] = { &propsize }; - - if (olpc_ofw("getprop", args, res) || propsize != 4) { - printk(KERN_ERR "ofw: getprop call failed!\n"); - return cpu_to_be32(0); - } - return be32_to_cpu(rev); + int propsize; + const __be32 *rev; + + rev = of_get_property(root, "board-revision-int", &propsize); + if (propsize != 4) + return 0; + + return be32_to_cpu(*rev); } static bool __init platform_detect(void) { - if (!check_ofw_architecture()) + struct device_node *root = of_find_node_by_path("/"); + bool success; + + if (!root) return false; - olpc_platform_info.flags |= OLPC_F_PRESENT; - olpc_platform_info.boardrev = get_board_revision(); - return true; + + success = check_ofw_architecture(root); + if (success) { + olpc_platform_info.boardrev = get_board_revision(root); + olpc_platform_info.flags |= OLPC_F_PRESENT; + } + + of_node_put(root); + return success; } static int __init add_xo1_platform_devices(void) diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 044bda5b3174..d39f63d017d2 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -19,7 +19,9 @@ #include <linux/kernel.h> #include <linux/bootmem.h> #include <linux/of.h> +#include <linux/of_platform.h> #include <linux/of_pdt.h> +#include <asm/olpc.h> #include <asm/olpc_ofw.h> static phandle __init olpc_dt_getsibling(phandle node) @@ -180,3 +182,20 @@ void __init olpc_dt_build_devicetree(void) pr_info("PROM DT: Built device tree with %u bytes of memory.\n", prom_early_allocated); } + +/* A list of DT node/bus matches that we want to expose as platform devices */ +static struct of_device_id __initdata of_ids[] = { + { .compatible = "olpc,xo1-battery" }, + { .compatible = "olpc,xo1-dcon" }, + { .compatible = "olpc,xo1-rtc" }, + {}, +}; + +static int __init olpc_create_platform_devices(void) +{ + if (machine_is_olpc()) + return of_platform_bus_probe(NULL, of_ids, NULL); + else + return 0; +} +device_initcall(olpc_create_platform_devices); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 7cb6424317f6..c58e0ea39ef5 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va, unsigned int cpu) { - int tcpu; - int uvhub; int locals = 0; int remotes = 0; int hubs = 0; + int tcpu; + int tpnode; struct bau_desc *bau_desc; struct cpumask *flush_mask; struct ptc_stats *stat; struct bau_control *bcp; struct bau_control *tbcp; + struct hub_and_pnode *hpp; /* kernel was booted 'nobau' */ if (nobau) @@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); - /* cpu statistics */ for_each_cpu(tcpu, flush_mask) { - uvhub = uv_cpu_to_blade_id(tcpu); - bau_uvhub_set(uvhub, &bau_desc->distribution); - if (uvhub == bcp->uvhub) + /* + * The distribution vector is a bit map of pnodes, relative + * to the partition base pnode (and the partition base nasid + * in the header). + * Translate cpu to pnode and hub using an array stored + * in local memory. + */ + hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; + tpnode = hpp->pnode - bcp->partition_base_pnode; + bau_uvhub_set(tpnode, &bau_desc->distribution); + if (hpp->uvhub == bcp->uvhub) locals++; else remotes++; @@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) * an interrupt, but causes an error message to be returned to * the sender. */ -static void uv_enable_timeouts(void) +static void __init uv_enable_timeouts(void) { int uvhub; int nuvhubs; @@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void) } /* - * initialize the sending side's sending buffers + * Initialize the sending side's sending buffers. */ static void -uv_activation_descriptor_init(int node, int pnode) +uv_activation_descriptor_init(int node, int pnode, int base_pnode) { int i; int cpu; @@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode) n = pa >> uv_nshift; m = pa & uv_mmask; + /* the 14-bit pnode */ uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, (n << UV_DESC_BASE_PNODE_SHIFT | m)); - /* - * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each + * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each * cpu even though we only use the first one; one descriptor can * describe a broadcast to 256 uv hubs. */ @@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode) memset(bd2, 0, sizeof(struct bau_desc)); bd2->header.sw_ack_flag = 1; /* - * base_dest_nodeid is the nasid of the first uvhub - * in the partition. The bit map will indicate uvhub numbers, - * which are 0-N in a partition. Pnodes are unique system-wide. + * The base_dest_nasid set in the message header is the nasid + * of the first uvhub in the partition. The bit map will + * indicate destination pnode numbers relative to that base. + * They may not be consecutive if nasid striding is being used. */ - bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); - bd2->header.dest_subnodeid = 0x10; /* the LB */ + bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); + bd2->header.dest_subnodeid = UV_LB_SUBNODEID; bd2->header.command = UV_NET_ENDPOINT_INTD; bd2->header.int_both = 1; /* @@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode) /* * Initialization of each UV hub's structures */ -static void __init uv_init_uvhub(int uvhub, int vector) +static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) { int node; int pnode; @@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector) node = uvhub_to_first_node(uvhub); pnode = uv_blade_to_pnode(uvhub); - uv_activation_descriptor_init(node, pnode); + uv_activation_descriptor_init(node, pnode, base_pnode); uv_payload_queue_init(node, pnode); /* - * the below initialization can't be in firmware because the - * messaging IRQ will be determined by the OS + * The below initialization can't be in firmware because the + * messaging IRQ will be determined by the OS. */ apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, @@ -1491,10 +1500,11 @@ calculate_destination_timeout(void) /* * initialize the bau_control structure for each cpu */ -static int __init uv_init_per_cpu(int nuvhubs) +static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) { int i; int cpu; + int tcpu; int pnode; int uvhub; int have_hmaster; @@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs) bcp = &per_cpu(bau_control, cpu); memset(bcp, 0, sizeof(struct bau_control)); pnode = uv_cpu_hub_info(cpu)->pnode; + if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { + printk(KERN_EMERG + "cpu %d pnode %d-%d beyond %d; BAU disabled\n", + cpu, pnode, base_part_pnode, + UV_DISTRIBUTION_SIZE); + return 1; + } + bcp->osnode = cpu_to_node(cpu); + bcp->partition_base_pnode = uv_partition_base_pnode; uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); bdp = &uvhub_descs[uvhub]; @@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs) bdp->pnode = pnode; /* kludge: 'assuming' one node per socket, and assuming that disabling a socket just leaves a gap in node numbers */ - socket = (cpu_to_node(cpu) & 1); + socket = bcp->osnode & 1; bdp->socket_mask |= (1 << socket); sdp = &bdp->socket[socket]; sdp->cpu_number[sdp->num_cpus] = cpu; @@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs) nextsocket: socket++; socket_mask = (socket_mask >> 1); + /* each socket gets a local array of pnodes/hubs */ + bcp = smaster; + bcp->target_hub_and_pnode = kmalloc_node( + sizeof(struct hub_and_pnode) * + num_possible_cpus(), GFP_KERNEL, bcp->osnode); + memset(bcp->target_hub_and_pnode, 0, + sizeof(struct hub_and_pnode) * + num_possible_cpus()); + for_each_present_cpu(tcpu) { + bcp->target_hub_and_pnode[tcpu].pnode = + uv_cpu_hub_info(tcpu)->pnode; + bcp->target_hub_and_pnode[tcpu].uvhub = + uv_cpu_hub_info(tcpu)->numa_blade_id; + } } } kfree(uvhub_descs); @@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void) spin_lock_init(&disable_lock); congested_cycles = microsec_2_cycles(congested_response_us); - if (uv_init_per_cpu(nuvhubs)) { - nobau = 1; - return 0; - } - uv_partition_base_pnode = 0x7fffffff; - for (uvhub = 0; uvhub < nuvhubs; uvhub++) + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { if (uv_blade_nr_possible_cpus(uvhub) && (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) uv_partition_base_pnode = uv_blade_to_pnode(uvhub); + } + + if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { + nobau = 1; + return 0; + } vector = UV_BAU_MESSAGE; for_each_possible_blade(uvhub) if (uv_blade_nr_possible_cpus(uvhub)) - uv_init_uvhub(uvhub, vector); + uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); uv_enable_timeouts(); alloc_intr_gate(vector, uv_bau_message_intr1); diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 9daf5d1af9f1..0eb90184515f 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -40,7 +40,6 @@ static struct clocksource clocksource_uv = { .rating = 400, .read = uv_read_rtc, .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -372,14 +371,11 @@ static __init int uv_rtc_setup_clock(void) if (!is_uv_system()) return -ENODEV; - clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, - clocksource_uv.shift); - /* If single blade, prefer tsc */ if (uv_num_possible_blades() == 1) clocksource_uv.rating = 250; - rc = clocksource_register(&clocksource_uv); + rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second); if (rc) printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); else diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e3c6a06cf725..dd7b88f2ec7a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -235,7 +235,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, *dx &= maskedx; } -static __init void xen_init_cpuid_mask(void) +static void __init xen_init_cpuid_mask(void) { unsigned int ax, bx, cx, dx; unsigned int xsave_mask; @@ -400,7 +400,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr) /* * load_gdt for early boot, when the gdt is only mapped once */ -static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) +static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; @@ -662,7 +662,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, * Version of write_gdt_entry for use at early boot-time needed to * update an entry as simply as possible. */ -static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, +static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, const void *desc, int type) { switch (type) { @@ -933,18 +933,18 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } -static const struct pv_info xen_info __initdata = { +static const struct pv_info xen_info __initconst = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, .name = "Xen", }; -static const struct pv_init_ops xen_init_ops __initdata = { +static const struct pv_init_ops xen_init_ops __initconst = { .patch = xen_patch, }; -static const struct pv_cpu_ops xen_cpu_ops __initdata = { +static const struct pv_cpu_ops xen_cpu_ops __initconst = { .cpuid = xen_cpuid, .set_debugreg = xen_set_debugreg, @@ -1004,7 +1004,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .end_context_switch = xen_end_context_switch, }; -static const struct pv_apic_ops xen_apic_ops __initdata = { +static const struct pv_apic_ops xen_apic_ops __initconst = { #ifdef CONFIG_X86_LOCAL_APIC .startup_ipi_hook = paravirt_nop, #endif @@ -1055,7 +1055,7 @@ int xen_panic_handler_init(void) return 0; } -static const struct machine_ops __initdata xen_machine_ops = { +static const struct machine_ops xen_machine_ops __initconst = { .restart = xen_restart, .halt = xen_machine_halt, .power_off = xen_machine_halt, @@ -1332,7 +1332,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = { +static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { .notifier_call = xen_hvm_cpu_notify, }; @@ -1381,7 +1381,7 @@ bool xen_hvm_need_lapic(void) } EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); -const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { +const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { .name = "Xen HVM", .detect = xen_hvm_platform, .init_platform = xen_hvm_guest_init, diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 6a6fe8939645..8bbb465b6f0a 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -113,7 +113,7 @@ static void xen_halt(void) xen_safe_halt(); } -static const struct pv_irq_ops xen_irq_ops __initdata = { +static const struct pv_irq_ops xen_irq_ops __initconst = { .save_fl = PV_CALLEE_SAVE(xen_save_fl), .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0684f3c74d53..02d752460371 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1054,7 +1054,7 @@ void xen_mm_pin_all(void) * that's before we have page structures to store the bits. So do all * the book-keeping now. */ -static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, +static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, enum pt_level level) { SetPagePinned(page); @@ -1187,7 +1187,7 @@ static void drop_other_mm_ref(void *info) active_mm = percpu_read(cpu_tlbstate.active_mm); - if (active_mm == mm) + if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) leave_mm(smp_processor_id()); /* If this cpu still has a stale cr3 reference, then make sure @@ -1271,7 +1271,7 @@ void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } -static __init void xen_pagetable_setup_start(pgd_t *base) +static void __init xen_pagetable_setup_start(pgd_t *base) { } @@ -1291,7 +1291,7 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) static void xen_post_allocator_init(void); -static __init void xen_pagetable_setup_done(pgd_t *base) +static void __init xen_pagetable_setup_done(pgd_t *base) { xen_setup_shared_info(); xen_post_allocator_init(); @@ -1488,7 +1488,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) } #ifdef CONFIG_X86_32 -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) +static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) { /* If there's an existing pte, then don't allow _PAGE_RW to be set */ if (pte_val_ma(*ptep) & _PAGE_PRESENT) @@ -1498,7 +1498,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) return pte; } #else /* CONFIG_X86_64 */ -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) +static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) { unsigned long pfn = pte_pfn(pte); @@ -1519,7 +1519,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) /* Init-time set_pte while constructing initial pagetables, which doesn't allow RO pagetable pages to be remapped RW */ -static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) { pte = mask_rw_pte(ptep, pte); @@ -1537,7 +1537,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ -static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) +static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) { #ifdef CONFIG_FLATMEM BUG_ON(mem_map); /* should only be used early */ @@ -1547,7 +1547,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) } /* Used for pmd and pud */ -static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) +static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) { #ifdef CONFIG_FLATMEM BUG_ON(mem_map); /* should only be used early */ @@ -1557,13 +1557,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) /* Early release_pte assumes that all pts are pinned, since there's only init_mm and anything attached to that is pinned. */ -static __init void xen_release_pte_init(unsigned long pfn) +static void __init xen_release_pte_init(unsigned long pfn) { pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } -static __init void xen_release_pmd_init(unsigned long pfn) +static void __init xen_release_pmd_init(unsigned long pfn) { make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } @@ -1689,7 +1689,7 @@ static void set_page_prot(void *addr, pgprot_t prot) BUG(); } -static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) +static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; unsigned ident_pte; @@ -1772,7 +1772,7 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, +pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; @@ -1843,7 +1843,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); -static __init void xen_write_cr3_init(unsigned long cr3) +static void __init xen_write_cr3_init(unsigned long cr3) { unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); @@ -1880,7 +1880,7 @@ static __init void xen_write_cr3_init(unsigned long cr3) pv_mmu_ops.write_cr3 = &xen_write_cr3; } -__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, +pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; @@ -1986,7 +1986,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } -__init void xen_ident_map_ISA(void) +void __init xen_ident_map_ISA(void) { unsigned long pa; @@ -2009,7 +2009,7 @@ __init void xen_ident_map_ISA(void) xen_flush_tlb(); } -static __init void xen_post_allocator_init(void) +static void __init xen_post_allocator_init(void) { #ifdef CONFIG_XEN_DEBUG pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); @@ -2046,7 +2046,7 @@ static void xen_leave_lazy_mmu(void) preempt_enable(); } -static const struct pv_mmu_ops xen_mmu_ops __initdata = { +static const struct pv_mmu_ops xen_mmu_ops __initconst = { .read_cr2 = xen_read_cr2, .write_cr2 = xen_write_cr2, diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 141eb0de8b06..58efeb9d5440 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -522,11 +522,20 @@ static bool __init __early_alloc_p2m(unsigned long pfn) /* Boundary cross-over for the edges: */ if (idx) { unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); + unsigned long *mid_mfn_p; p2m_init(p2m); p2m_top[topidx][mididx] = p2m; + /* For save/restore we need to MFN of the P2M saved */ + + mid_mfn_p = p2m_top_mfn_p[topidx]; + WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), + "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", + topidx, mididx); + mid_mfn_p[mididx] = virt_to_mfn(p2m); + } return idx != 0; } @@ -549,12 +558,29 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) { unsigned topidx = p2m_top_index(pfn); - if (p2m_top[topidx] == p2m_mid_missing) { - unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); + unsigned long *mid_mfn_p; + unsigned long **mid; + + mid = p2m_top[topidx]; + mid_mfn_p = p2m_top_mfn_p[topidx]; + if (mid == p2m_mid_missing) { + mid = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_init(mid); p2m_top[topidx] = mid; + + BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); + } + /* And the save/restore P2M tables.. */ + if (mid_mfn_p == p2m_mid_missing_mfn) { + mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_mid_mfn_init(mid_mfn_p); + + p2m_top_mfn_p[topidx] = mid_mfn_p; + p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); + /* Note: we don't set mid_mfn_p[midix] here, + * look in __early_alloc_p2m */ } } @@ -650,7 +676,7 @@ static unsigned long mfn_hash(unsigned long mfn) } /* Add an MFN override for a particular page */ -int m2p_add_override(unsigned long mfn, struct page *page) +int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) { unsigned long flags; unsigned long pfn; @@ -662,7 +688,6 @@ int m2p_add_override(unsigned long mfn, struct page *page) if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); - if (WARN(ptep == NULL || level != PG_LEVEL_4K, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; @@ -674,18 +699,17 @@ int m2p_add_override(unsigned long mfn, struct page *page) if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) return -ENOMEM; - if (!PageHighMem(page)) + if (clear_pte && !PageHighMem(page)) /* Just zap old mapping for now */ pte_clear(&init_mm, address, ptep); - spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); return 0; } - -int m2p_remove_override(struct page *page) +EXPORT_SYMBOL_GPL(m2p_add_override); +int m2p_remove_override(struct page *page, bool clear_pte) { unsigned long flags; unsigned long mfn; @@ -713,7 +737,7 @@ int m2p_remove_override(struct page *page) spin_unlock_irqrestore(&m2p_override_lock, flags); set_phys_to_machine(pfn, page->index); - if (!PageHighMem(page)) + if (clear_pte && !PageHighMem(page)) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); /* No tlb flush necessary because the caller already @@ -721,6 +745,7 @@ int m2p_remove_override(struct page *page) return 0; } +EXPORT_SYMBOL_GPL(m2p_remove_override); struct page *m2p_find_override(unsigned long mfn) { diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 90bac0aac3a5..be1a464f6d66 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -50,7 +50,7 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; */ #define EXTRA_MEM_RATIO (10) -static __init void xen_add_extra_mem(unsigned long pages) +static void __init xen_add_extra_mem(unsigned long pages) { unsigned long pfn; @@ -166,7 +166,7 @@ static unsigned long __init xen_set_identity(const struct e820entry *list, if (last > end) continue; - if (entry->type == E820_RAM) { + if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { if (start > start_pci) identity += set_phys_range_identity( PFN_UP(start_pci), PFN_DOWN(start)); @@ -227,7 +227,11 @@ char * __init xen_memory_setup(void) memcpy(map_raw, map, sizeof(map)); e820.nr_map = 0; +#ifdef CONFIG_X86_32 + xen_extra_mem_start = mem_end; +#else xen_extra_mem_start = max((1ULL << 32), mem_end); +#endif for (i = 0; i < memmap.nr_entries; i++) { unsigned long long end; @@ -336,7 +340,7 @@ static void __init fiddle_vdso(void) #endif } -static __cpuinit int register_callback(unsigned type, const void *func) +static int __cpuinit register_callback(unsigned type, const void *func) { struct callback_register callback = { .type = type, diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 30612441ed99..41038c01de40 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -46,18 +46,17 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { inc_irq_stat(irq_resched_count); + scheduler_ipi(); return IRQ_HANDLED; } -static __cpuinit void cpu_bringup(void) +static void __cpuinit cpu_bringup(void) { int cpu = smp_processor_id(); @@ -85,7 +84,7 @@ static __cpuinit void cpu_bringup(void) wmb(); /* make sure everything is out */ } -static __cpuinit void cpu_bringup_and_idle(void) +static void __cpuinit cpu_bringup_and_idle(void) { cpu_bringup(); cpu_idle(); @@ -242,7 +241,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) } } -static __cpuinit int +static int __cpuinit cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; @@ -486,7 +485,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static const struct smp_ops xen_smp_ops __initdata = { +static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_smp_prepare_cpus, .smp_cpus_done = xen_smp_cpus_done, diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 2e2d370a47b1..5158c505bef9 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -26,8 +26,6 @@ #include "xen-ops.h" -#define XEN_SHIFT 22 - /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 #define NS_PER_TICK (1000000000LL / HZ) @@ -211,8 +209,6 @@ static struct clocksource xen_clocksource __read_mostly = { .rating = 400, .read = xen_clocksource_get_cycles, .mask = ~0, - .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */ - .shift = XEN_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -439,16 +435,16 @@ void xen_timer_resume(void) } } -static const struct pv_time_ops xen_time_ops __initdata = { +static const struct pv_time_ops xen_time_ops __initconst = { .sched_clock = xen_clocksource_read, }; -static __init void xen_time_init(void) +static void __init xen_time_init(void) { int cpu = smp_processor_id(); struct timespec tp; - clocksource_register(&xen_clocksource); + clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { /* Successfully turned off 100Hz tick, so we have the @@ -468,7 +464,7 @@ static __init void xen_time_init(void) xen_setup_cpu_clockevents(); } -__init void xen_init_time_ops(void) +void __init xen_init_time_ops(void) { pv_time_ops = xen_time_ops; @@ -490,7 +486,7 @@ static void xen_hvm_setup_cpu_clockevents(void) xen_setup_cpu_clockevents(); } -__init void xen_hvm_init_time_ops(void) +void __init xen_hvm_init_time_ops(void) { /* vector callback is needed otherwise we cannot receive interrupts * on cpu > 0 and at this point we don't know how many cpus are diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 3112f55638c4..97dfdc8757b3 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -74,7 +74,7 @@ static inline void xen_hvm_smp_init(void) {} #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init xen_init_spinlocks(void); -__cpuinit void xen_init_lock_cpu(int cpu); +void __cpuinit xen_init_lock_cpu(int cpu); void xen_uninit_lock_cpu(int cpu); #else static inline void xen_init_spinlocks(void) |