diff options
Diffstat (limited to 'arch')
268 files changed, 4259 insertions, 2996 deletions
diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile index cd143887380a..8399bd0e68e8 100644 --- a/arch/alpha/boot/Makefile +++ b/arch/alpha/boot/Makefile @@ -14,6 +14,9 @@ targets := vmlinux.gz vmlinux \ tools/bootpzh bootloader bootpheader bootpzheader OBJSTRIP := $(obj)/tools/objstrip +HOSTCFLAGS := -Wall -I$(objtree)/usr/include +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) + # SRM bootable image. Copy to offset 512 of a partition. $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh ( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ @@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE $(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE $(call if_changed,objstrip) -LDFLAGS_bootloader := -static -uvsprintf -T #-N -relax -LDFLAGS_bootpheader := -static -uvsprintf -T #-N -relax -LDFLAGS_bootpzheader := -static -uvsprintf -T #-N -relax +LDFLAGS_bootloader := -static -T # -N -relax +LDFLAGS_bootloader := -static -T # -N -relax +LDFLAGS_bootpheader := -static -T # -N -relax +LDFLAGS_bootpzheader := -static -T # -N -relax -OBJ_bootlx := $(obj)/head.o $(obj)/main.o -OBJ_bootph := $(obj)/head.o $(obj)/bootp.o -OBJ_bootpzh := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o +OBJ_bootlx := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o +OBJ_bootph := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o +OBJ_bootpzh := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o $(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE $(call if_changed,ld) diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c index 3baf2d1e908d..dd6eb4a33582 100644 --- a/arch/alpha/boot/main.c +++ b/arch/alpha/boot/main.c @@ -19,7 +19,6 @@ #include "ksize.h" -extern int vsprintf(char *, const char *, va_list); extern unsigned long switch_to_osf_pal(unsigned long nr, struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, unsigned long *vptb); diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c new file mode 100644 index 000000000000..f844dae8a54a --- /dev/null +++ b/arch/alpha/boot/stdio.c @@ -0,0 +1,306 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <stdarg.h> +#include <stddef.h> + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +# define do_div(n, base) ({ \ + unsigned int __base = (base); \ + unsigned int __rem; \ + __rem = ((unsigned long long)(n)) % __base; \ + (n) = ((unsigned long long)(n)) / __base; \ + __rem; \ +}) + + +static int skip_atoi(const char **s) +{ + int i, c; + + for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s) + i = i*10 + c - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * str, unsigned long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if ((signed long long)num < 0) { + sign = '-'; + num = - (signed long long)num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) { + tmp[i++] = digits[do_div(num, base)]; + } + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) + while(size-->0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base==8) + *str++ = '0'; + else if (base==16) { + *str++ = '0'; + *str++ = digits[33]; + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char * str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + + for (str=buf ; *fmt ; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if ('0' <= *fmt && *fmt <= '9') + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if ('0' <= *fmt && *fmt <= '9') + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'l' && *(fmt + 1) == 'l') { + qualifier = 'q'; + fmt += 2; + } else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' + || *fmt == 'Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char) va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = "<NULL>"; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long) va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'q') { + num = va_arg(args, unsigned long long); + if (flags & SIGN) + num = (signed long long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str-buf; +} + +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c index 367d53d031fc..dee82695f48b 100644 --- a/arch/alpha/boot/tools/objstrip.c +++ b/arch/alpha/boot/tools/objstrip.c @@ -27,6 +27,9 @@ #include <linux/param.h> #ifdef __ELF__ # include <linux/elf.h> +# define elfhdr elf64_hdr +# define elf_phdr elf64_phdr +# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) #endif /* bootfile size must be multiple of BLOCK_SIZE: */ diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h index f61e1a56c378..4cb4b6d3452c 100644 --- a/arch/alpha/include/asm/types.h +++ b/arch/alpha/include/asm/types.h @@ -2,6 +2,5 @@ #define _ALPHA_TYPES_H #include <asm-generic/int-ll64.h> -#include <uapi/asm/types.h> #endif /* _ALPHA_TYPES_H */ diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index c509d306db45..a56e608db2f9 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -3,7 +3,7 @@ #include <uapi/asm/unistd.h> -#define NR_SYSCALLS 511 +#define NR_SYSCALLS 514 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h index d214a0358100..aa33bf5aacb6 100644 --- a/arch/alpha/include/uapi/asm/unistd.h +++ b/arch/alpha/include/uapi/asm/unistd.h @@ -472,5 +472,8 @@ #define __NR_sched_setattr 508 #define __NR_sched_getattr 509 #define __NR_renameat2 510 +#define __NR_getrandom 511 +#define __NR_memfd_create 512 +#define __NR_execveat 513 #endif /* _UAPI_ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c index 253cf1a87481..51267ac5729b 100644 --- a/arch/alpha/kernel/err_ev6.c +++ b/arch/alpha/kernel/err_ev6.c @@ -6,7 +6,6 @@ * Error handling code supporting Alpha systems */ -#include <linux/init.h> #include <linux/sched.h> #include <asm/io.h> diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 7b2be251c30f..51f2c8654253 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -19,7 +19,6 @@ #include <linux/ptrace.h> #include <linux/interrupt.h> #include <linux/random.h> -#include <linux/init.h> #include <linux/irq.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index e51f578636a5..36dc91ace83a 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv, if (tv) { if (get_tv32((struct timeval *)&kts, tv)) return -EFAULT; + kts.tv_nsec *= 1000; } if (tz) { if (copy_from_user(&ktz, tz, sizeof(*tz))) return -EFAULT; } - kts.tv_nsec *= 1000; - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 1941a07b5811..84d13263ce46 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task) } /* - * Copy an alpha thread.. + * Copy architecture-specific thread state */ - int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, + unsigned long kthread_arg, struct task_struct *p) { extern void ret_from_fork(void); @@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, sizeof(struct switch_stack) + sizeof(struct pt_regs)); childstack->r26 = (unsigned long) ret_from_kernel_thread; childstack->r9 = usp; /* function */ - childstack->r10 = arg; + childstack->r10 = kthread_arg; childregs->hae = alpha_mv.hae_cache, childti->pcb.usp = 0; return 0; diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 99ac36d5de4e..2f24447fef92 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -63,7 +63,6 @@ static struct { enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, }; @@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier) return -EINVAL; } - static void send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation) { @@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs) generic_smp_call_function_interrupt(); break; - case IPI_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case IPI_CPU_STOP: halt(); @@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC); } static void diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 6f01d9ad7b81..72b59511e59a 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -237,8 +237,7 @@ srmcons_init(void) return -ENODEV; } - -module_init(srmcons_init); +device_initcall(srmcons_init); /* diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index f21d61fab678..24e41bd7d3c9 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin) pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline); irq = intline; - msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI); + msi_loc = dev->msi_cap; msg_ctl = 0; if (msi_loc) pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl); diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 24789713f1ea..9b62e3fd4f03 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -529,6 +529,9 @@ sys_call_table: .quad sys_sched_setattr .quad sys_sched_getattr .quad sys_renameat2 /* 510 */ + .quad sys_getrandom + .quad sys_memfd_create + .quad sys_execveat .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 9c4c189eb22f..74aceead06e9 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -14,7 +14,6 @@ #include <linux/tty.h> #include <linux/delay.h> #include <linux/module.h> -#include <linux/init.h> #include <linux/kallsyms.h> #include <linux/ratelimit.h> diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c index 18aa9b4f94f1..086a0d5445c5 100644 --- a/arch/alpha/oprofile/op_model_ev4.c +++ b/arch/alpha/oprofile/op_model_ev4.c @@ -8,7 +8,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c index c32f8a0ad925..c300f5ef3482 100644 --- a/arch/alpha/oprofile/op_model_ev5.c +++ b/arch/alpha/oprofile/op_model_ev5.c @@ -8,7 +8,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c index 1c84cc257fc7..02edf5971614 100644 --- a/arch/alpha/oprofile/op_model_ev6.c +++ b/arch/alpha/oprofile/op_model_ev6.c @@ -8,7 +8,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c index 34a57a126553..adb1744d20f3 100644 --- a/arch/alpha/oprofile/op_model_ev67.c +++ b/arch/alpha/oprofile/op_model_ev67.c @@ -9,7 +9,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> diff --git a/arch/arc/Kconfig.debug b/arch/arc/Kconfig.debug index a7fc0da25650..ff6a4b5ce927 100644 --- a/arch/arc/Kconfig.debug +++ b/arch/arc/Kconfig.debug @@ -2,19 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config EARLY_PRINTK - bool "Early printk" if EMBEDDED - default y - help - Write kernel log output directly into the VGA buffer or to a serial - port. - - This is useful for kernel debugging when your machine crashes very - early before the console code is initialized. For normal operation - it is not recommended because it looks ugly and doesn't cooperate - with klogd/syslogd or the X server. You should normally N here, - unless you want to debug such a crash. - config 16KSTACKS bool "Use 16Kb for kernel stacks instead of 8Kb" help diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 067551b6920a..9917a45fc430 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -99,7 +99,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ atomic_ops_unlock(flags); \ } -#define ATOMIC_OP_RETURN(op, c_op) \ +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ unsigned long flags; \ diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 8c3a3e02ba92..12b2100db073 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -266,7 +266,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, * Machine specific helpers for Entire D-Cache or Per Line ops */ -static unsigned int __before_dc_op(const int op) +static inline unsigned int __before_dc_op(const int op) { unsigned int reg = reg; @@ -284,7 +284,7 @@ static unsigned int __before_dc_op(const int op) return reg; } -static void __after_dc_op(const int op, unsigned int reg) +static inline void __after_dc_op(const int op, unsigned int reg) { if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS); diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 86217db2937a..992736b5229b 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \ imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \ imx25-karo-tx25.dtb \ imx25-pdk.dtb -dtb-$(CONFIG_SOC_IMX31) += \ +dtb-$(CONFIG_SOC_IMX27) += \ imx27-apf27.dtb \ imx27-apf27dev.dtb \ imx27-eukrea-mbimxsd27-baseboard.dtb \ diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts index 5c42d259fa68..901739fcb85a 100644 --- a/arch/arm/boot/dts/am335x-boneblack.dts +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -80,7 +80,3 @@ status = "okay"; }; }; - -&rtc { - system-power-controller; -}; diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 87fc7a35e802..156d05efcb70 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -654,7 +654,7 @@ wlcore: wlcore@2 { compatible = "ti,wl1271"; reg = <2>; - interrupt-parent = <&gpio1>; + interrupt-parent = <&gpio0>; interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */ ref-clock-frequency = <38400000>; }; diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 8ae29c955c11..c17097d2c167 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -49,7 +49,7 @@ pinctrl-0 = <&matrix_keypad_pins>; debounce-delay-ms = <5>; - col-scan-delay-us = <1500>; + col-scan-delay-us = <5>; row-gpios = <&gpio5 5 GPIO_ACTIVE_HIGH /* Bank5, pin5 */ &gpio5 6 GPIO_ACTIVE_HIGH>; /* Bank5, pin6 */ @@ -473,7 +473,7 @@ interrupt-parent = <&gpio0>; interrupts = <31 0>; - wake-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio1 28 GPIO_ACTIVE_LOW>; touchscreen-size-x = <480>; touchscreen-size-y = <272>; diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 15f198e4864d..7128fad991ac 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -18,6 +18,7 @@ aliases { rtc0 = &mcp_rtc; rtc1 = &tps659038_rtc; + rtc2 = &rtc; }; memory { @@ -83,7 +84,7 @@ gpio_fan: gpio_fan { /* Based on 5v 500mA AFB02505HHB */ compatible = "gpio-fan"; - gpios = <&tps659038_gpio 1 GPIO_ACTIVE_HIGH>; + gpios = <&tps659038_gpio 2 GPIO_ACTIVE_HIGH>; gpio-fan,speed-map = <0 0>, <13000 1>; #cooling-cells = <2>; @@ -130,8 +131,8 @@ uart3_pins_default: uart3_pins_default { pinctrl-single,pins = < - 0x248 (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_rxd.rxd */ - 0x24c (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_txd.txd */ + 0x3f8 (PIN_INPUT_SLEW | MUX_MODE2) /* uart2_ctsn.uart3_rxd */ + 0x3fc (PIN_INPUT_SLEW | MUX_MODE1) /* uart2_rtsn.uart3_txd */ >; }; @@ -455,7 +456,7 @@ mcp_rtc: rtc@6f { compatible = "microchip,mcp7941x"; reg = <0x6f>; - interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>; /* IRQ_SYS_1N */ + interrupts = <GIC_SPI 2 IRQ_TYPE_EDGE_RISING>; /* IRQ_SYS_1N */ pinctrl-names = "default"; pinctrl-0 = <&mcp79410_pins_default>; @@ -478,7 +479,7 @@ &uart3 { status = "okay"; interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>, - <&dra7_pmx_core 0x248>; + <&dra7_pmx_core 0x3f8>; pinctrl-names = "default"; pinctrl-0 = <&uart3_pins_default>; diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi index c675257f2377..f076ff856d8b 100644 --- a/arch/arm/boot/dts/armada-375.dtsi +++ b/arch/arm/boot/dts/armada-375.dtsi @@ -69,7 +69,7 @@ mainpll: mainpll { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <2000000000>; + clock-frequency = <1000000000>; }; /* 25 MHz reference crystal */ refclk: oscillator { diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index ed2dd8ba4080..218a2acd36e5 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -585,7 +585,7 @@ mainpll: mainpll { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <2000000000>; + clock-frequency = <1000000000>; }; /* 25 MHz reference crystal */ diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi index 0e85fc15ceda..ecd1318109ba 100644 --- a/arch/arm/boot/dts/armada-39x.dtsi +++ b/arch/arm/boot/dts/armada-39x.dtsi @@ -502,7 +502,7 @@ mainpll: mainpll { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <2000000000>; + clock-frequency = <1000000000>; }; }; }; diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts index e3b08fb959e5..990e8a2100f0 100644 --- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts +++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts @@ -105,6 +105,10 @@ }; internal-regs { + rtc@10300 { + /* No crystal connected to the internal RTC */ + status = "disabled"; + }; serial@12000 { status = "okay"; }; diff --git a/arch/arm/boot/dts/dove-cubox.dts b/arch/arm/boot/dts/dove-cubox.dts index aae7efc09b0b..e6fa251e17b9 100644 --- a/arch/arm/boot/dts/dove-cubox.dts +++ b/arch/arm/boot/dts/dove-cubox.dts @@ -87,6 +87,7 @@ /* connect xtal input to 25MHz reference */ clocks = <&ref25>; + clock-names = "xtal"; /* connect xtal input as source of pll0 and pll1 */ silabs,pll-source = <0 0>, <1 0>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 5332b57b4950..f03a091cd076 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -911,7 +911,7 @@ ti,clock-cycles = <16>; reg = <0x4ae07ddc 0x4>, <0x4ae07de0 0x4>, - <0x4ae06014 0x4>, <0x4a003b20 0x8>, + <0x4ae06014 0x4>, <0x4a003b20 0xc>, <0x4ae0c158 0x4>; reg-names = "setup-address", "control-address", "int-address", "efuse-address", @@ -944,7 +944,7 @@ ti,clock-cycles = <16>; reg = <0x4ae07e34 0x4>, <0x4ae07e24 0x4>, - <0x4ae06010 0x4>, <0x4a0025cc 0x8>, + <0x4ae06010 0x4>, <0x4a0025cc 0xc>, <0x4a002470 0x4>; reg-names = "setup-address", "control-address", "int-address", "efuse-address", @@ -977,7 +977,7 @@ ti,clock-cycles = <16>; reg = <0x4ae07e30 0x4>, <0x4ae07e20 0x4>, - <0x4ae06010 0x4>, <0x4a0025e0 0x8>, + <0x4ae06010 0x4>, <0x4a0025e0 0xc>, <0x4a00246c 0x4>; reg-names = "setup-address", "control-address", "int-address", "efuse-address", @@ -1010,7 +1010,7 @@ ti,clock-cycles = <16>; reg = <0x4ae07de4 0x4>, <0x4ae07de8 0x4>, - <0x4ae06010 0x4>, <0x4a003b08 0x8>, + <0x4ae06010 0x4>, <0x4a003b08 0xc>, <0x4ae0c154 0x4>; reg-names = "setup-address", "control-address", "int-address", "efuse-address", @@ -1203,7 +1203,7 @@ status = "disabled"; }; - rtc@48838000 { + rtc: rtc@48838000 { compatible = "ti,am3352-rtc"; reg = <0x48838000 0x100>; interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index 8de12af7c276..d6b49e5b32e9 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -9,6 +9,7 @@ #include <dt-bindings/sound/samsung-i2s.h> #include <dt-bindings/input/input.h> +#include <dt-bindings/clock/maxim,max77686.h> #include "exynos4412.dtsi" / { @@ -105,6 +106,8 @@ rtc@10070000 { status = "okay"; + clocks = <&clock CLK_RTC>, <&max77686 MAX77686_CLK_AP>; + clock-names = "rtc", "rtc_src"; }; g2d@10800000 { diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts index 173ffa479ad3..792394dd0f2a 100644 --- a/arch/arm/boot/dts/exynos4412-trats2.dts +++ b/arch/arm/boot/dts/exynos4412-trats2.dts @@ -736,7 +736,7 @@ display-timings { timing-0 { - clock-frequency = <0>; + clock-frequency = <57153600>; hactive = <720>; vactive = <1280>; hfront-porch = <5>; diff --git a/arch/arm/boot/dts/exynos5250-snow.dts b/arch/arm/boot/dts/exynos5250-snow.dts index 2657e842e5a5..1eca97ee4bd6 100644 --- a/arch/arm/boot/dts/exynos5250-snow.dts +++ b/arch/arm/boot/dts/exynos5250-snow.dts @@ -567,6 +567,7 @@ num-slots = <1>; broken-cd; cap-sdio-irq; + keep-power-in-suspend; card-detect-delay = <200>; samsung,dw-mshc-ciu-div = <3>; samsung,dw-mshc-sdr-timing = <2 3>; diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index 0788d08fb43e..146e71118a72 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -711,6 +711,7 @@ num-slots = <1>; broken-cd; cap-sdio-irq; + keep-power-in-suspend; card-detect-delay = <200>; clock-frequency = <400000000>; samsung,dw-mshc-ciu-div = <1>; diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi index 5d31fc140823..2180a0152c9b 100644 --- a/arch/arm/boot/dts/exynos5420-trip-points.dtsi +++ b/arch/arm/boot/dts/exynos5420-trip-points.dtsi @@ -28,7 +28,7 @@ trips { type = "active"; }; cpu-crit-0 { - temperature = <1200000>; /* millicelsius */ + temperature = <120000>; /* millicelsius */ hysteresis = <0>; /* millicelsius */ type = "critical"; }; diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi index f67b23f303c3..45317538bbae 100644 --- a/arch/arm/boot/dts/exynos5420.dtsi +++ b/arch/arm/boot/dts/exynos5420.dtsi @@ -536,6 +536,7 @@ clock-names = "dp"; phys = <&dp_phy>; phy-names = "dp"; + power-domains = <&disp_pd>; }; mipi_phy: video-phy@10040714 { diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi index 48adfa8f4300..356e963edf11 100644 --- a/arch/arm/boot/dts/exynos5440-trip-points.dtsi +++ b/arch/arm/boot/dts/exynos5440-trip-points.dtsi @@ -18,7 +18,7 @@ trips { type = "active"; }; cpu-crit-0 { - temperature = <1050000>; /* millicelsius */ + temperature = <105000>; /* millicelsius */ hysteresis = <0>; /* millicelsius */ type = "critical"; }; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index 412f41d62686..02eb8b15374f 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -674,6 +674,7 @@ num-slots = <1>; broken-cd; cap-sdio-irq; + keep-power-in-suspend; card-detect-delay = <200>; clock-frequency = <400000000>; samsung,dw-mshc-ciu-div = <1>; diff --git a/arch/arm/boot/dts/imx23-olinuxino.dts b/arch/arm/boot/dts/imx23-olinuxino.dts index 7e6eef2488e8..82045398bf1f 100644 --- a/arch/arm/boot/dts/imx23-olinuxino.dts +++ b/arch/arm/boot/dts/imx23-olinuxino.dts @@ -12,6 +12,7 @@ */ /dts-v1/; +#include <dt-bindings/gpio/gpio.h> #include "imx23.dtsi" / { @@ -93,6 +94,7 @@ ahb@80080000 { usb0: usb@80080000 { + dr_mode = "host"; vbus-supply = <®_usb0_vbus>; status = "okay"; }; @@ -122,7 +124,7 @@ user { label = "green"; - gpios = <&gpio2 1 1>; + gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; }; }; }; diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index e4d3aecc4ed2..677f81d9dcd5 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -428,6 +428,7 @@ pwm4: pwm@53fc8000 { compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; + #pwm-cells = <2>; reg = <0x53fc8000 0x4000>; clocks = <&clks 108>, <&clks 52>; clock-names = "ipg", "per"; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index 6951b66d1ab7..bc215e4b75fd 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -533,7 +533,7 @@ fec: ethernet@1002b000 { compatible = "fsl,imx27-fec"; - reg = <0x1002b000 0x4000>; + reg = <0x1002b000 0x1000>; interrupts = <50>; clocks = <&clks IMX27_CLK_FEC_IPG_GATE>, <&clks IMX27_CLK_FEC_AHB_GATE>; diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 25e25f82fbae..4e073e854742 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -913,7 +913,7 @@ 80 81 68 69 70 71 72 73 74 75 76 77>; - interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty", + interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty", "saif0", "saif1", "i2c0", "i2c1", "auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx", "auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx"; diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi index 19cc269a08d4..1ce6133b67f5 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi @@ -31,6 +31,7 @@ regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; gpio = <&gpio4 15 0>; + enable-active-high; }; reg_usb_h1_vbus: regulator@1 { @@ -40,6 +41,7 @@ regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; gpio = <&gpio1 0 0>; + enable-active-high; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index 46b2fed7c319..3b24b12651b2 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -185,7 +185,6 @@ &i2c3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c3>; - pinctrl-assert-gpios = <&gpio5 4 GPIO_ACTIVE_HIGH>; status = "okay"; max7310_a: gpio@30 { diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index 134d3f27a8ec..921de6605f07 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -110,6 +110,8 @@ nand@0,0 { reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ nand-bus-width = <16>; + gpmc,device-width = <2>; + ti,nand-ecc-opt = "sw"; gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index a29315833ecd..5c16145920ea 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -498,6 +498,8 @@ DRVDD-supply = <&vmmc2>; IOVDD-supply = <&vio>; DVDD-supply = <&vio>; + + ai3x-micbias-vg = <1>; }; tlv320aic3x_aux: tlv320aic3x@19 { @@ -509,6 +511,8 @@ DRVDD-supply = <&vmmc2>; IOVDD-supply = <&vio>; DVDD-supply = <&vio>; + + ai3x-micbias-vg = <2>; }; tsl2563: tsl2563@29 { diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index d18a90f5eca3..69a40cfc1f29 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -456,6 +456,7 @@ }; mmu_isp: mmu@480bd400 { + #iommu-cells = <0>; compatible = "ti,omap2-iommu"; reg = <0x480bd400 0x80>; interrupts = <24>; @@ -464,6 +465,7 @@ }; mmu_iva: mmu@5d000000 { + #iommu-cells = <0>; compatible = "ti,omap2-iommu"; reg = <0x5d000000 0x80>; interrupts = <28>; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index efe5f737f39b..7d24ae0306b5 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -128,7 +128,7 @@ * hierarchy. */ ocp { - compatible = "ti,omap4-l3-noc", "simple-bus"; + compatible = "ti,omap5-l3-noc", "simple-bus"; #address-cells = <1>; #size-cells = <1>; ranges; diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts index 74c3212f1f11..824ddab9c3ad 100644 --- a/arch/arm/boot/dts/r8a7791-koelsch.dts +++ b/arch/arm/boot/dts/r8a7791-koelsch.dts @@ -545,7 +545,7 @@ compatible = "adi,adv7511w"; reg = <0x39>; interrupt-parent = <&gpio3>; - interrupts = <29 IRQ_TYPE_EDGE_FALLING>; + interrupts = <29 IRQ_TYPE_LEVEL_LOW>; adi,input-depth = <8>; adi,input-colorspace = "rgb"; diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index bfd3f1c734b8..2201cd5da3bb 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -1017,23 +1017,6 @@ status = "disabled"; }; - vmmci: regulator-gpio { - compatible = "regulator-gpio"; - - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <2900000>; - regulator-name = "mmci-reg"; - regulator-type = "voltage"; - - startup-delay-us = <100>; - enable-active-high; - - states = <1800000 0x1 - 2900000 0x0>; - - status = "disabled"; - }; - mcde@a0350000 { compatible = "stericsson,mcde"; reg = <0xa0350000 0x1000>, /* MCDE */ diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi index bf8f0eddc2c0..744c1e3a744d 100644 --- a/arch/arm/boot/dts/ste-href.dtsi +++ b/arch/arm/boot/dts/ste-href.dtsi @@ -111,6 +111,21 @@ pinctrl-1 = <&i2c3_sleep_mode>; }; + vmmci: regulator-gpio { + compatible = "regulator-gpio"; + + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <2900000>; + regulator-name = "mmci-reg"; + regulator-type = "voltage"; + + startup-delay-us = <100>; + enable-active-high; + + states = <1800000 0x1 + 2900000 0x0>; + }; + // External Micro SD slot sdi0_per1@80126000 { arm,primecell-periphid = <0x10480180>; diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index 206826a855c0..1bc84ebdccaa 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -146,8 +146,21 @@ }; vmmci: regulator-gpio { + compatible = "regulator-gpio"; + gpios = <&gpio7 4 0x4>; enable-gpio = <&gpio6 25 0x4>; + + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <2900000>; + regulator-name = "mmci-reg"; + regulator-type = "voltage"; + + startup-delay-us = <100>; + enable-active-high; + + states = <1800000 0x1 + 2900000 0x0>; }; // External Micro SD slot diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index cf01c818b8ea..13cc7ca5e031 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -826,7 +826,7 @@ <&tegra_car TEGRA124_CLK_PLL_U>, <&tegra_car TEGRA124_CLK_USBD>; clock-names = "reg", "pll_u", "utmi-pads"; - resets = <&tegra_car 59>, <&tegra_car 22>; + resets = <&tegra_car 22>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; nvidia,hssync-start-delay = <0>; nvidia,idle-wait-delay = <17>; @@ -838,6 +838,7 @@ nvidia,hssquelch-level = <2>; nvidia,hsdiscon-level = <5>; nvidia,xcvr-hsslew = <12>; + nvidia,has-utmi-pad-registers; status = "disabled"; }; @@ -862,7 +863,7 @@ <&tegra_car TEGRA124_CLK_PLL_U>, <&tegra_car TEGRA124_CLK_USBD>; clock-names = "reg", "pll_u", "utmi-pads"; - resets = <&tegra_car 22>, <&tegra_car 22>; + resets = <&tegra_car 58>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; nvidia,hssync-start-delay = <0>; nvidia,idle-wait-delay = <17>; @@ -874,7 +875,6 @@ nvidia,hssquelch-level = <2>; nvidia,hsdiscon-level = <5>; nvidia,xcvr-hsslew = <12>; - nvidia,has-utmi-pad-registers; status = "disabled"; }; @@ -899,7 +899,7 @@ <&tegra_car TEGRA124_CLK_PLL_U>, <&tegra_car TEGRA124_CLK_USBD>; clock-names = "reg", "pll_u", "utmi-pads"; - resets = <&tegra_car 58>, <&tegra_car 22>; + resets = <&tegra_car 59>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; nvidia,hssync-start-delay = <0>; nvidia,idle-wait-delay = <17>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 7a2aeacd62c0..107395c32d82 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -191,6 +191,7 @@ compatible = "arm,cortex-a15-pmu"; interrupts = <0 68 4>, <0 69 4>; + interrupt-affinity = <&cpu0>, <&cpu1>; }; oscclk6a: oscclk6a { diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts index 23662b5a5e9d..d949facba376 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts @@ -33,28 +33,28 @@ #address-cells = <1>; #size-cells = <0>; - cpu@0 { + A9_0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0>; next-level-cache = <&L2>; }; - cpu@1 { + A9_1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <1>; next-level-cache = <&L2>; }; - cpu@2 { + A9_2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <2>; next-level-cache = <&L2>; }; - cpu@3 { + A9_3: cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <3>; @@ -170,6 +170,7 @@ compatible = "arm,pl310-cache"; reg = <0x1e00a000 0x1000>; interrupts = <0 43 4>; + cache-unified; cache-level = <2>; arm,data-latency = <1 1 1>; arm,tag-latency = <1 1 1>; @@ -181,6 +182,8 @@ <0 61 4>, <0 62 4>, <0 63 4>; + interrupt-affinity = <&A9_0>, <&A9_1>, <&A9_2>, <&A9_3>; + }; dcc { diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi index a5cd2eda3edf..9ea54b3dba09 100644 --- a/arch/arm/boot/dts/zynq-7000.dtsi +++ b/arch/arm/boot/dts/zynq-7000.dtsi @@ -193,7 +193,7 @@ }; gem0: ethernet@e000b000 { - compatible = "cdns,gem"; + compatible = "cdns,zynq-gem"; reg = <0xe000b000 0x1000>; status = "disabled"; interrupts = <0 22 4>; @@ -204,7 +204,7 @@ }; gem1: ethernet@e000c000 { - compatible = "cdns,gem"; + compatible = "cdns,zynq-gem"; reg = <0xe000c000 0x1000>; status = "disabled"; interrupts = <0 45 4>; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index ab86655c1f4b..fbbb1915c6a9 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -39,11 +39,14 @@ CONFIG_ARCH_HIP04=y CONFIG_ARCH_KEYSTONE=y CONFIG_ARCH_MESON=y CONFIG_ARCH_MXC=y +CONFIG_SOC_IMX50=y CONFIG_SOC_IMX51=y CONFIG_SOC_IMX53=y CONFIG_SOC_IMX6Q=y CONFIG_SOC_IMX6SL=y +CONFIG_SOC_IMX6SX=y CONFIG_SOC_VF610=y +CONFIG_SOC_LS1021A=y CONFIG_ARCH_OMAP3=y CONFIG_ARCH_OMAP4=y CONFIG_SOC_OMAP5=y @@ -426,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_EHCI_TEGRA=y CONFIG_USB_EHCI_HCD_STI=y CONFIG_USB_EHCI_HCD_PLATFORM=y -CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_ISP1760=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_STI=y CONFIG_USB_OHCI_HCD_PLATFORM=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 9ff7b54b2a83..3743ca221d40 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -393,7 +393,7 @@ CONFIG_TI_EDMA=y CONFIG_DMA_OMAP=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXTCON=m -CONFIG_EXTCON_GPIO=m +CONFIG_EXTCON_USB_GPIO=m CONFIG_EXTCON_PALMAS=m CONFIG_TI_EMIF=m CONFIG_PWM=y diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 8e3fcb924db6..2ef282f96651 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -25,7 +25,7 @@ struct dma_iommu_mapping { }; struct dma_iommu_mapping * -arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size); +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size); void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping); diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f8ccc21fa032..4e7f40c577e6 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -33,7 +33,9 @@ ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq @ disable interrupts - ldr r1, [tsk, #TI_FLAGS] + ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing + tst r1, #_TIF_SYSCALL_WORK + bne __sys_trace_return tst r1, #_TIF_WORK_MASK bne fast_work_pending asm_trace_hardirqs_on diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 91c7ba182dcd..3b8c2833c537 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -303,9 +303,15 @@ static int probe_current_pmu(struct arm_pmu *pmu) static int of_pmu_irq_cfg(struct platform_device *pdev) { - int i; - int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + int i, irq; + int *irqs; + /* Don't bother with PPIs; they're already affine */ + irq = platform_get_irq(pdev, 0); + if (irq >= 0 && irq_is_percpu(irq)) + return 0; + + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) return -ENOMEM; @@ -317,7 +323,7 @@ static int of_pmu_irq_cfg(struct platform_device *pdev) i); if (!dn) { pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", - of_node_full_name(dn), i); + of_node_full_name(pdev->dev.of_node), i); break; } diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h index acd5b560b728..5f5cd562c593 100644 --- a/arch/arm/mach-exynos/common.h +++ b/arch/arm/mach-exynos/common.h @@ -159,6 +159,8 @@ extern void exynos_enter_aftr(void); extern struct cpuidle_exynos_data cpuidle_coupled_exynos_data; +extern void exynos_set_delayed_reset_assertion(bool enable); + extern void s5p_init_cpu(void __iomem *cpuid_addr); extern unsigned int samsung_rev(void); extern void __iomem *cpu_boot_reg_base(void); diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index bcde0dd668df..5917a30eee33 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -167,6 +167,33 @@ static void __init exynos_init_io(void) } /* + * Set or clear the USE_DELAYED_RESET_ASSERTION option. Used by smp code + * and suspend. + * + * This is necessary only on Exynos4 SoCs. When system is running + * USE_DELAYED_RESET_ASSERTION should be set so the ARM CLK clock down + * feature could properly detect global idle state when secondary CPU is + * powered down. + * + * However this should not be set when such system is going into suspend. + */ +void exynos_set_delayed_reset_assertion(bool enable) +{ + if (of_machine_is_compatible("samsung,exynos4")) { + unsigned int tmp, core_id; + + for (core_id = 0; core_id < num_possible_cpus(); core_id++) { + tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id)); + if (enable) + tmp |= S5P_USE_DELAYED_RESET_ASSERTION; + else + tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION); + pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id)); + } + } +} + +/* * Apparently, these SoCs are not able to wake-up from suspend using * the PMU. Too bad. Should they suddenly become capable of such a * feat, the matches below should be moved to suspend.c. diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index ebd135bb0995..a825bca2a2b6 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -34,30 +34,6 @@ extern void exynos4_secondary_startup(void); -/* - * Set or clear the USE_DELAYED_RESET_ASSERTION option, set on Exynos4 SoCs - * during hot-(un)plugging CPUx. - * - * The feature can be cleared safely during first boot of secondary CPU. - * - * Exynos4 SoCs require setting USE_DELAYED_RESET_ASSERTION during powering - * down a CPU so the CPU idle clock down feature could properly detect global - * idle state when CPUx is off. - */ -static void exynos_set_delayed_reset_assertion(u32 core_id, bool enable) -{ - if (soc_is_exynos4()) { - unsigned int tmp; - - tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id)); - if (enable) - tmp |= S5P_USE_DELAYED_RESET_ASSERTION; - else - tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION); - pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id)); - } -} - #ifdef CONFIG_HOTPLUG_CPU static inline void cpu_leave_lowpower(u32 core_id) { @@ -73,8 +49,6 @@ static inline void cpu_leave_lowpower(u32 core_id) : "=&r" (v) : "Ir" (CR_C), "Ir" (0x40) : "cc"); - - exynos_set_delayed_reset_assertion(core_id, false); } static inline void platform_do_lowpower(unsigned int cpu, int *spurious) @@ -87,14 +61,6 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious) /* Turn the CPU off on next WFI instruction. */ exynos_cpu_power_down(core_id); - /* - * Exynos4 SoCs require setting - * USE_DELAYED_RESET_ASSERTION so the CPU idle - * clock down feature could properly detect - * global idle state when CPUx is off. - */ - exynos_set_delayed_reset_assertion(core_id, true); - wfi(); if (pen_release == core_id) { @@ -371,9 +337,6 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) udelay(10); } - /* No harm if this is called during first boot of secondary CPU */ - exynos_set_delayed_reset_assertion(core_id, false); - /* * now the secondary core is starting up let it run its * calibrations, then wait for it to finish @@ -420,6 +383,8 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) exynos_sysram_init(); + exynos_set_delayed_reset_assertion(true); + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) scu_enable(scu_base_addr()); diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c index cbe56b35aea0..a9686535f9ed 100644 --- a/arch/arm/mach-exynos/pm_domains.c +++ b/arch/arm/mach-exynos/pm_domains.c @@ -188,7 +188,7 @@ no_clk: args.np = np; args.args_count = 0; child_domain = of_genpd_get_from_provider(&args); - if (!child_domain) + if (IS_ERR(child_domain)) continue; if (of_parse_phandle_with_args(np, "power-domains", @@ -196,7 +196,7 @@ no_clk: continue; parent_domain = of_genpd_get_from_provider(&args); - if (!parent_domain) + if (IS_ERR(parent_domain)) continue; if (pm_genpd_add_subdomain(parent_domain, child_domain)) diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 3e6aea7f83af..c0b6dccbf7bd 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -342,6 +342,8 @@ static void exynos_pm_enter_sleep_mode(void) static void exynos_pm_prepare(void) { + exynos_set_delayed_reset_assertion(false); + /* Set wake-up mask registers */ exynos_pm_set_wakeup_mask(); @@ -482,6 +484,7 @@ early_wakeup: /* Clear SLEEP mode set in INFORM1 */ pmu_raw_writel(0x0, S5P_INFORM1); + exynos_set_delayed_reset_assertion(true); } static void exynos3250_pm_resume(void) @@ -723,8 +726,10 @@ void __init exynos_pm_init(void) return; } - if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) + if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) { pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); + return; + } pm_data = (const struct exynos_pm_data *) match->data; diff --git a/arch/arm/mach-gemini/common.h b/arch/arm/mach-gemini/common.h index 38a45260a7c8..dd883698ff7e 100644 --- a/arch/arm/mach-gemini/common.h +++ b/arch/arm/mach-gemini/common.h @@ -12,6 +12,8 @@ #ifndef __GEMINI_COMMON_H__ #define __GEMINI_COMMON_H__ +#include <linux/reboot.h> + struct mtd_partition; extern void gemini_map_io(void); @@ -26,6 +28,6 @@ extern int platform_register_pflash(unsigned int size, struct mtd_partition *parts, unsigned int nr_parts); -extern void gemini_restart(char mode, const char *cmd); +extern void gemini_restart(enum reboot_mode mode, const char *cmd); #endif /* __GEMINI_COMMON_H__ */ diff --git a/arch/arm/mach-gemini/reset.c b/arch/arm/mach-gemini/reset.c index b26659759e27..21a6d6d4f9c4 100644 --- a/arch/arm/mach-gemini/reset.c +++ b/arch/arm/mach-gemini/reset.c @@ -14,7 +14,9 @@ #include <mach/hardware.h> #include <mach/global_reg.h> -void gemini_restart(char mode, const char *cmd) +#include "common.h" + +void gemini_restart(enum reboot_mode mode, const char *cmd) { __raw_writel(RESET_GLOBAL | RESET_CPU1, IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET); diff --git a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c index fb8d4a2ad48c..a5edd7d60266 100644 --- a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c +++ b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Pengutronix, Wolfram Sang <w.sang@pengutronix.de> + * Copyright (C) 2010 Pengutronix, Wolfram Sang <kernel@pengutronix.de> * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 2 as published by the diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 4d60005e9277..6d0893a3828e 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void) struct device_node *np; np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc"); - if (WARN_ON(!np || - !of_find_property(np, "interrupt-controller", NULL))) - pr_warn("Outdated DT detected, system is about to crash!!!\n"); + if (WARN_ON(!np)) + return; + + if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) { + pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); + + /* map GPC, so that at least CPUidle and WARs keep working */ + gpc_base = of_iomap(np, 0); + } } #ifdef CONFIG_PM_GENERIC_DOMAINS @@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev) struct regulator *pu_reg; int ret; + /* bail out if DT too old and doesn't provide the necessary info */ + if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells")) + return 0; + pu_reg = devm_regulator_get_optional(&pdev->dev, "pu"); if (PTR_ERR(pu_reg) == -ENODEV) pu_reg = NULL; diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 355b08936871..752969ff9de0 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -171,6 +171,12 @@ */ #define LINKS_PER_OCP_IF 2 +/* + * Address offset (in bytes) between the reset control and the reset + * status registers: 4 bytes on OMAP4 + */ +#define OMAP4_RST_CTRL_ST_OFFSET 4 + /** * struct omap_hwmod_soc_ops - fn ptrs for some SoC-specific operations * @enable_module: function to enable a module (via MODULEMODE) @@ -3016,10 +3022,12 @@ static int _omap4_deassert_hardreset(struct omap_hwmod *oh, if (ohri->st_shift) pr_err("omap_hwmod: %s: %s: hwmod data error: OMAP4 does not support st_shift\n", oh->name, ohri->name); - return omap_prm_deassert_hardreset(ohri->rst_shift, 0, + return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->rst_shift, oh->clkdm->pwrdm.ptr->prcm_partition, oh->clkdm->pwrdm.ptr->prcm_offs, - oh->prcm.omap4.rstctrl_offs, 0); + oh->prcm.omap4.rstctrl_offs, + oh->prcm.omap4.rstctrl_offs + + OMAP4_RST_CTRL_ST_OFFSET); } /** @@ -3048,27 +3056,6 @@ static int _omap4_is_hardreset_asserted(struct omap_hwmod *oh, } /** - * _am33xx_assert_hardreset - call AM33XX PRM hardreset fn with hwmod args - * @oh: struct omap_hwmod * to assert hardreset - * @ohri: hardreset line data - * - * Call am33xx_prminst_assert_hardreset() with parameters extracted - * from the hwmod @oh and the hardreset line data @ohri. Only - * intended for use as an soc_ops function pointer. Passes along the - * return value from am33xx_prminst_assert_hardreset(). XXX This - * function is scheduled for removal when the PRM code is moved into - * drivers/. - */ -static int _am33xx_assert_hardreset(struct omap_hwmod *oh, - struct omap_hwmod_rst_info *ohri) - -{ - return omap_prm_assert_hardreset(ohri->rst_shift, 0, - oh->clkdm->pwrdm.ptr->prcm_offs, - oh->prcm.omap4.rstctrl_offs); -} - -/** * _am33xx_deassert_hardreset - call AM33XX PRM hardreset fn with hwmod args * @oh: struct omap_hwmod * to deassert hardreset * @ohri: hardreset line data @@ -3083,32 +3070,13 @@ static int _am33xx_assert_hardreset(struct omap_hwmod *oh, static int _am33xx_deassert_hardreset(struct omap_hwmod *oh, struct omap_hwmod_rst_info *ohri) { - return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift, 0, + return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift, + oh->clkdm->pwrdm.ptr->prcm_partition, oh->clkdm->pwrdm.ptr->prcm_offs, oh->prcm.omap4.rstctrl_offs, oh->prcm.omap4.rstst_offs); } -/** - * _am33xx_is_hardreset_asserted - call AM33XX PRM hardreset fn with hwmod args - * @oh: struct omap_hwmod * to test hardreset - * @ohri: hardreset line data - * - * Call am33xx_prminst_is_hardreset_asserted() with parameters - * extracted from the hwmod @oh and the hardreset line data @ohri. - * Only intended for use as an soc_ops function pointer. Passes along - * the return value from am33xx_prminst_is_hardreset_asserted(). XXX - * This function is scheduled for removal when the PRM code is moved - * into drivers/. - */ -static int _am33xx_is_hardreset_asserted(struct omap_hwmod *oh, - struct omap_hwmod_rst_info *ohri) -{ - return omap_prm_is_hardreset_asserted(ohri->rst_shift, 0, - oh->clkdm->pwrdm.ptr->prcm_offs, - oh->prcm.omap4.rstctrl_offs); -} - /* Public functions */ u32 omap_hwmod_read(struct omap_hwmod *oh, u16 reg_offs) @@ -3908,21 +3876,13 @@ void __init omap_hwmod_init(void) soc_ops.init_clkdm = _init_clkdm; soc_ops.update_context_lost = _omap4_update_context_lost; soc_ops.get_context_lost = _omap4_get_context_lost; - } else if (soc_is_am43xx()) { + } else if (cpu_is_ti816x() || soc_is_am33xx() || soc_is_am43xx()) { soc_ops.enable_module = _omap4_enable_module; soc_ops.disable_module = _omap4_disable_module; soc_ops.wait_target_ready = _omap4_wait_target_ready; soc_ops.assert_hardreset = _omap4_assert_hardreset; - soc_ops.deassert_hardreset = _omap4_deassert_hardreset; - soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted; - soc_ops.init_clkdm = _init_clkdm; - } else if (cpu_is_ti816x() || soc_is_am33xx()) { - soc_ops.enable_module = _omap4_enable_module; - soc_ops.disable_module = _omap4_disable_module; - soc_ops.wait_target_ready = _omap4_wait_target_ready; - soc_ops.assert_hardreset = _am33xx_assert_hardreset; soc_ops.deassert_hardreset = _am33xx_deassert_hardreset; - soc_ops.is_hardreset_asserted = _am33xx_is_hardreset_asserted; + soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted; soc_ops.init_clkdm = _init_clkdm; } else { WARN(1, "omap_hwmod: unknown SoC type\n"); diff --git a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c index e2223148ba4d..17e8004fc20f 100644 --- a/arch/arm/mach-omap2/omap_hwmod_43xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_43xx_data.c @@ -544,6 +544,44 @@ static struct omap_hwmod am43xx_hdq1w_hwmod = { }, }; +static struct omap_hwmod_class_sysconfig am43xx_vpfe_sysc = { + .rev_offs = 0x0, + .sysc_offs = 0x104, + .sysc_flags = SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE, + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | + MSTANDBY_FORCE | MSTANDBY_SMART | MSTANDBY_NO), + .sysc_fields = &omap_hwmod_sysc_type2, +}; + +static struct omap_hwmod_class am43xx_vpfe_hwmod_class = { + .name = "vpfe", + .sysc = &am43xx_vpfe_sysc, +}; + +static struct omap_hwmod am43xx_vpfe0_hwmod = { + .name = "vpfe0", + .class = &am43xx_vpfe_hwmod_class, + .clkdm_name = "l3s_clkdm", + .prcm = { + .omap4 = { + .modulemode = MODULEMODE_SWCTRL, + .clkctrl_offs = AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET, + }, + }, +}; + +static struct omap_hwmod am43xx_vpfe1_hwmod = { + .name = "vpfe1", + .class = &am43xx_vpfe_hwmod_class, + .clkdm_name = "l3s_clkdm", + .prcm = { + .omap4 = { + .modulemode = MODULEMODE_SWCTRL, + .clkctrl_offs = AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET, + }, + }, +}; + /* Interfaces */ static struct omap_hwmod_ocp_if am43xx_l3_main__l4_hs = { .master = &am33xx_l3_main_hwmod, @@ -825,6 +863,34 @@ static struct omap_hwmod_ocp_if am43xx_l4_ls__hdq1w = { .user = OCP_USER_MPU | OCP_USER_SDMA, }; +static struct omap_hwmod_ocp_if am43xx_l3__vpfe0 = { + .master = &am43xx_vpfe0_hwmod, + .slave = &am33xx_l3_main_hwmod, + .clk = "l3_gclk", + .user = OCP_USER_MPU | OCP_USER_SDMA, +}; + +static struct omap_hwmod_ocp_if am43xx_l3__vpfe1 = { + .master = &am43xx_vpfe1_hwmod, + .slave = &am33xx_l3_main_hwmod, + .clk = "l3_gclk", + .user = OCP_USER_MPU | OCP_USER_SDMA, +}; + +static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe0 = { + .master = &am33xx_l4_ls_hwmod, + .slave = &am43xx_vpfe0_hwmod, + .clk = "l4ls_gclk", + .user = OCP_USER_MPU | OCP_USER_SDMA, +}; + +static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe1 = { + .master = &am33xx_l4_ls_hwmod, + .slave = &am43xx_vpfe1_hwmod, + .clk = "l4ls_gclk", + .user = OCP_USER_MPU | OCP_USER_SDMA, +}; + static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = { &am33xx_l4_wkup__synctimer, &am43xx_l4_ls__timer8, @@ -925,6 +991,10 @@ static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = { &am43xx_l4_ls__dss_dispc, &am43xx_l4_ls__dss_rfbi, &am43xx_l4_ls__hdq1w, + &am43xx_l3__vpfe0, + &am43xx_l3__vpfe1, + &am43xx_l4_ls__vpfe0, + &am43xx_l4_ls__vpfe1, NULL, }; diff --git a/arch/arm/mach-omap2/prcm43xx.h b/arch/arm/mach-omap2/prcm43xx.h index 48df3b55057e..d0261996db6d 100644 --- a/arch/arm/mach-omap2/prcm43xx.h +++ b/arch/arm/mach-omap2/prcm43xx.h @@ -144,5 +144,6 @@ #define AM43XX_CM_PER_USBPHYOCP2SCP1_CLKCTRL_OFFSET 0x05C0 #define AM43XX_CM_PER_DSS_CLKCTRL_OFFSET 0x0a20 #define AM43XX_CM_PER_HDQ1W_CLKCTRL_OFFSET 0x04a0 - +#define AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET 0x0068 +#define AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET 0x0070 #endif diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h index cbefbd7cfdb5..661d753df584 100644 --- a/arch/arm/mach-omap2/prm-regbits-34xx.h +++ b/arch/arm/mach-omap2/prm-regbits-34xx.h @@ -112,6 +112,7 @@ #define OMAP3430_VC_CMD_ONLP_SHIFT 16 #define OMAP3430_VC_CMD_RET_SHIFT 8 #define OMAP3430_VC_CMD_OFF_SHIFT 0 +#define OMAP3430_SREN_MASK (1 << 4) #define OMAP3430_HSEN_MASK (1 << 3) #define OMAP3430_MCODE_MASK (0x7 << 0) #define OMAP3430_VALID_MASK (1 << 24) diff --git a/arch/arm/mach-omap2/prm-regbits-44xx.h b/arch/arm/mach-omap2/prm-regbits-44xx.h index b1c7a33e00e7..e794828dee55 100644 --- a/arch/arm/mach-omap2/prm-regbits-44xx.h +++ b/arch/arm/mach-omap2/prm-regbits-44xx.h @@ -35,6 +35,7 @@ #define OMAP4430_GLOBAL_WARM_SW_RST_SHIFT 1 #define OMAP4430_GLOBAL_WUEN_MASK (1 << 16) #define OMAP4430_HSMCODE_MASK (0x7 << 0) +#define OMAP4430_SRMODEEN_MASK (1 << 4) #define OMAP4430_HSMODEEN_MASK (1 << 3) #define OMAP4430_HSSCLL_SHIFT 24 #define OMAP4430_ICEPICK_RST_SHIFT 9 diff --git a/arch/arm/mach-omap2/prminst44xx.c b/arch/arm/mach-omap2/prminst44xx.c index c4859c4d3646..d0b15dbafa2e 100644 --- a/arch/arm/mach-omap2/prminst44xx.c +++ b/arch/arm/mach-omap2/prminst44xx.c @@ -87,12 +87,6 @@ u32 omap4_prminst_rmw_inst_reg_bits(u32 mask, u32 bits, u8 part, s16 inst, return v; } -/* - * Address offset (in bytes) between the reset control and the reset - * status registers: 4 bytes on OMAP4 - */ -#define OMAP4_RST_CTRL_ST_OFFSET 4 - /** * omap4_prminst_is_hardreset_asserted - read the HW reset line state of * submodules contained in the hwmod module @@ -141,11 +135,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst, * omap4_prminst_deassert_hardreset - deassert a submodule hardreset line and * wait * @shift: register bit shift corresponding to the reset line to deassert - * @st_shift: status bit offset, not used for OMAP4+ + * @st_shift: status bit offset corresponding to the reset line * @part: PRM partition * @inst: PRM instance offset * @rstctrl_offs: reset register offset - * @st_offs: reset status register offset, not used for OMAP4+ + * @rstst_offs: reset status register offset * * Some IPs like dsp, ipu or iva contain processors that require an HW * reset line to be asserted / deasserted in order to fully enable the @@ -157,11 +151,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst, * of reset, or -EBUSY if the submodule did not exit reset promptly. */ int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst, - u16 rstctrl_offs, u16 st_offs) + u16 rstctrl_offs, u16 rstst_offs) { int c; u32 mask = 1 << shift; - u16 rstst_offs = rstctrl_offs + OMAP4_RST_CTRL_ST_OFFSET; + u32 st_mask = 1 << st_shift; /* Check the current status to avoid de-asserting the line twice */ if (omap4_prminst_is_hardreset_asserted(shift, part, inst, @@ -169,13 +163,13 @@ int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst, return -EEXIST; /* Clear the reset status by writing 1 to the status bit */ - omap4_prminst_rmw_inst_reg_bits(0xffffffff, mask, part, inst, + omap4_prminst_rmw_inst_reg_bits(0xffffffff, st_mask, part, inst, rstst_offs); /* de-assert the reset control line */ omap4_prminst_rmw_inst_reg_bits(mask, 0, part, inst, rstctrl_offs); /* wait the status to be set */ - omap_test_timeout(omap4_prminst_is_hardreset_asserted(shift, part, inst, - rstst_offs), + omap_test_timeout(omap4_prminst_is_hardreset_asserted(st_shift, part, + inst, rstst_offs), MAX_MODULE_HARDRESET_WAIT, c); return (c == MAX_MODULE_HARDRESET_WAIT) ? -EBUSY : 0; diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index cef67af9e9b8..cac46d852da1 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -298,14 +298,11 @@ static int __init omap_dm_timer_init_one(struct omap_dm_timer *timer, if (IS_ERR(src)) return PTR_ERR(src); - if (clk_get_parent(timer->fclk) != src) { - r = clk_set_parent(timer->fclk, src); - if (r < 0) { - pr_warn("%s: %s cannot set source\n", __func__, - oh->name); - clk_put(src); - return r; - } + r = clk_set_parent(timer->fclk, src); + if (r < 0) { + pr_warn("%s: %s cannot set source\n", __func__, oh->name); + clk_put(src); + return r; } clk_put(src); diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c index be9ef834fa81..076fd20d7e5a 100644 --- a/arch/arm/mach-omap2/vc.c +++ b/arch/arm/mach-omap2/vc.c @@ -316,7 +316,8 @@ static void __init omap3_vc_init_pmic_signaling(struct voltagedomain *voltdm) * idle. And we can also scale voltages to zero for off-idle. * Note that no actual voltage scaling during off-idle will * happen unless the board specific twl4030 PMIC scripts are - * loaded. + * loaded. See also omap_vc_i2c_init for comments regarding + * erratum i531. */ val = voltdm->read(OMAP3_PRM_VOLTCTRL_OFFSET); if (!(val & OMAP3430_PRM_VOLTCTRL_SEL_OFF)) { @@ -704,9 +705,16 @@ static void __init omap_vc_i2c_init(struct voltagedomain *voltdm) return; } + /* + * Note that for omap3 OMAP3430_SREN_MASK clears SREN to work around + * erratum i531 "Extra Power Consumed When Repeated Start Operation + * Mode Is Enabled on I2C Interface Dedicated for Smart Reflex (I2C4)". + * Otherwise I2C4 eventually leads into about 23mW extra power being + * consumed even during off idle using VMODE. + */ i2c_high_speed = voltdm->pmic->i2c_high_speed; if (i2c_high_speed) - voltdm->rmw(vc->common->i2c_cfg_hsen_mask, + voltdm->rmw(vc->common->i2c_cfg_clear_mask, vc->common->i2c_cfg_hsen_mask, vc->common->i2c_cfg_reg); diff --git a/arch/arm/mach-omap2/vc.h b/arch/arm/mach-omap2/vc.h index cdbdd78e755e..89b83b7ff3ec 100644 --- a/arch/arm/mach-omap2/vc.h +++ b/arch/arm/mach-omap2/vc.h @@ -34,6 +34,7 @@ struct voltagedomain; * @cmd_ret_shift: RET field shift in PRM_VC_CMD_VAL_* register * @cmd_off_shift: OFF field shift in PRM_VC_CMD_VAL_* register * @i2c_cfg_reg: I2C configuration register offset + * @i2c_cfg_clear_mask: high-speed mode bit clear mask in I2C config register * @i2c_cfg_hsen_mask: high-speed mode bit field mask in I2C config register * @i2c_mcode_mask: MCODE field mask for I2C config register * @@ -52,6 +53,7 @@ struct omap_vc_common { u8 cmd_ret_shift; u8 cmd_off_shift; u8 i2c_cfg_reg; + u8 i2c_cfg_clear_mask; u8 i2c_cfg_hsen_mask; u8 i2c_mcode_mask; }; diff --git a/arch/arm/mach-omap2/vc3xxx_data.c b/arch/arm/mach-omap2/vc3xxx_data.c index 75bc4aa22b3a..71d74c9172c1 100644 --- a/arch/arm/mach-omap2/vc3xxx_data.c +++ b/arch/arm/mach-omap2/vc3xxx_data.c @@ -40,6 +40,7 @@ static struct omap_vc_common omap3_vc_common = { .cmd_onlp_shift = OMAP3430_VC_CMD_ONLP_SHIFT, .cmd_ret_shift = OMAP3430_VC_CMD_RET_SHIFT, .cmd_off_shift = OMAP3430_VC_CMD_OFF_SHIFT, + .i2c_cfg_clear_mask = OMAP3430_SREN_MASK | OMAP3430_HSEN_MASK, .i2c_cfg_hsen_mask = OMAP3430_HSEN_MASK, .i2c_cfg_reg = OMAP3_PRM_VC_I2C_CFG_OFFSET, .i2c_mcode_mask = OMAP3430_MCODE_MASK, diff --git a/arch/arm/mach-omap2/vc44xx_data.c b/arch/arm/mach-omap2/vc44xx_data.c index 085e5d6a04fd..2abd5fa8a697 100644 --- a/arch/arm/mach-omap2/vc44xx_data.c +++ b/arch/arm/mach-omap2/vc44xx_data.c @@ -42,6 +42,7 @@ static const struct omap_vc_common omap4_vc_common = { .cmd_ret_shift = OMAP4430_RET_SHIFT, .cmd_off_shift = OMAP4430_OFF_SHIFT, .i2c_cfg_reg = OMAP4_PRM_VC_CFG_I2C_MODE_OFFSET, + .i2c_cfg_clear_mask = OMAP4430_SRMODEEN_MASK | OMAP4430_HSMODEEN_MASK, .i2c_cfg_hsen_mask = OMAP4430_HSMODEEN_MASK, .i2c_mcode_mask = OMAP4430_HSMCODE_MASK, }; diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 8896e71586f5..f09683687963 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -691,4 +691,13 @@ config SHARPSL_PM_MAX1111 config PXA310_ULPI bool +config PXA_SYSTEMS_CPLDS + tristate "Motherboard cplds" + default ARCH_LUBBOCK || MACH_MAINSTONE + help + This driver supports the Lubbock and Mainstone multifunction chip + found on the pxa25x development platform system (Lubbock) and pxa27x + development platform system (Mainstone). This IO board supports the + interrupts handling, ethernet controller, flash chips, etc ... + endif diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile index eb0bf7678a99..4087d334ecdf 100644 --- a/arch/arm/mach-pxa/Makefile +++ b/arch/arm/mach-pxa/Makefile @@ -90,4 +90,5 @@ obj-$(CONFIG_MACH_RAUMFELD_CONNECTOR) += raumfeld.o obj-$(CONFIG_MACH_RAUMFELD_SPEAKER) += raumfeld.o obj-$(CONFIG_MACH_ZIPIT2) += z2.o +obj-$(CONFIG_PXA_SYSTEMS_CPLDS) += pxa_cplds_irqs.o obj-$(CONFIG_TOSA_BT) += tosa-bt.o diff --git a/arch/arm/mach-pxa/include/mach/lubbock.h b/arch/arm/mach-pxa/include/mach/lubbock.h index 958cd6af9384..1eecf794acd2 100644 --- a/arch/arm/mach-pxa/include/mach/lubbock.h +++ b/arch/arm/mach-pxa/include/mach/lubbock.h @@ -37,7 +37,9 @@ #define LUB_GP __LUB_REG(LUBBOCK_FPGA_PHYS + 0x100) /* Board specific IRQs */ -#define LUBBOCK_IRQ(x) (IRQ_BOARD_START + (x)) +#define LUBBOCK_NR_IRQS IRQ_BOARD_START + +#define LUBBOCK_IRQ(x) (LUBBOCK_NR_IRQS + (x)) #define LUBBOCK_SD_IRQ LUBBOCK_IRQ(0) #define LUBBOCK_SA1111_IRQ LUBBOCK_IRQ(1) #define LUBBOCK_USB_IRQ LUBBOCK_IRQ(2) /* usb connect */ @@ -47,8 +49,7 @@ #define LUBBOCK_USB_DISC_IRQ LUBBOCK_IRQ(6) /* usb disconnect */ #define LUBBOCK_LAST_IRQ LUBBOCK_IRQ(6) -#define LUBBOCK_SA1111_IRQ_BASE (IRQ_BOARD_START + 16) -#define LUBBOCK_NR_IRQS (IRQ_BOARD_START + 16 + 55) +#define LUBBOCK_SA1111_IRQ_BASE (LUBBOCK_NR_IRQS + 32) #ifndef __ASSEMBLY__ extern void lubbock_set_misc_wr(unsigned int mask, unsigned int set); diff --git a/arch/arm/mach-pxa/include/mach/mainstone.h b/arch/arm/mach-pxa/include/mach/mainstone.h index 1bfc4e822a41..e82a7d31104e 100644 --- a/arch/arm/mach-pxa/include/mach/mainstone.h +++ b/arch/arm/mach-pxa/include/mach/mainstone.h @@ -120,7 +120,9 @@ #define MST_PCMCIA_PWR_VCC_50 0x4 /* voltage VCC = 5.0V */ /* board specific IRQs */ -#define MAINSTONE_IRQ(x) (IRQ_BOARD_START + (x)) +#define MAINSTONE_NR_IRQS IRQ_BOARD_START + +#define MAINSTONE_IRQ(x) (MAINSTONE_NR_IRQS + (x)) #define MAINSTONE_MMC_IRQ MAINSTONE_IRQ(0) #define MAINSTONE_USIM_IRQ MAINSTONE_IRQ(1) #define MAINSTONE_USBC_IRQ MAINSTONE_IRQ(2) @@ -136,6 +138,4 @@ #define MAINSTONE_S1_STSCHG_IRQ MAINSTONE_IRQ(14) #define MAINSTONE_S1_IRQ MAINSTONE_IRQ(15) -#define MAINSTONE_NR_IRQS (IRQ_BOARD_START + 16) - #endif diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index d8a1be619f21..4ac9ab80d24b 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -12,6 +12,7 @@ * published by the Free Software Foundation. */ #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -123,84 +124,6 @@ void lubbock_set_misc_wr(unsigned int mask, unsigned int set) } EXPORT_SYMBOL(lubbock_set_misc_wr); -static unsigned long lubbock_irq_enabled; - -static void lubbock_mask_irq(struct irq_data *d) -{ - int lubbock_irq = (d->irq - LUBBOCK_IRQ(0)); - LUB_IRQ_MASK_EN = (lubbock_irq_enabled &= ~(1 << lubbock_irq)); -} - -static void lubbock_unmask_irq(struct irq_data *d) -{ - int lubbock_irq = (d->irq - LUBBOCK_IRQ(0)); - /* the irq can be acknowledged only if deasserted, so it's done here */ - LUB_IRQ_SET_CLR &= ~(1 << lubbock_irq); - LUB_IRQ_MASK_EN = (lubbock_irq_enabled |= (1 << lubbock_irq)); -} - -static struct irq_chip lubbock_irq_chip = { - .name = "FPGA", - .irq_ack = lubbock_mask_irq, - .irq_mask = lubbock_mask_irq, - .irq_unmask = lubbock_unmask_irq, -}; - -static void lubbock_irq_handler(unsigned int irq, struct irq_desc *desc) -{ - unsigned long pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled; - do { - /* clear our parent irq */ - desc->irq_data.chip->irq_ack(&desc->irq_data); - if (likely(pending)) { - irq = LUBBOCK_IRQ(0) + __ffs(pending); - generic_handle_irq(irq); - } - pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled; - } while (pending); -} - -static void __init lubbock_init_irq(void) -{ - int irq; - - pxa25x_init_irq(); - - /* setup extra lubbock irqs */ - for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_LAST_IRQ; irq++) { - irq_set_chip_and_handler(irq, &lubbock_irq_chip, - handle_level_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); - } - - irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), lubbock_irq_handler); - irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING); -} - -#ifdef CONFIG_PM - -static void lubbock_irq_resume(void) -{ - LUB_IRQ_MASK_EN = lubbock_irq_enabled; -} - -static struct syscore_ops lubbock_irq_syscore_ops = { - .resume = lubbock_irq_resume, -}; - -static int __init lubbock_irq_device_init(void) -{ - if (machine_is_lubbock()) { - register_syscore_ops(&lubbock_irq_syscore_ops); - return 0; - } - return -ENODEV; -} - -device_initcall(lubbock_irq_device_init); - -#endif - static int lubbock_udc_is_connected(void) { return (LUB_MISC_RD & (1 << 9)) == 0; @@ -383,11 +306,38 @@ static struct platform_device lubbock_flash_device[2] = { }, }; +static struct resource lubbock_cplds_resources[] = { + [0] = { + .start = LUBBOCK_FPGA_PHYS + 0xc0, + .end = LUBBOCK_FPGA_PHYS + 0xe0 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = PXA_GPIO_TO_IRQ(0), + .end = PXA_GPIO_TO_IRQ(0), + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE, + }, + [2] = { + .start = LUBBOCK_IRQ(0), + .end = LUBBOCK_IRQ(6), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device lubbock_cplds_device = { + .name = "pxa_cplds_irqs", + .id = -1, + .resource = &lubbock_cplds_resources[0], + .num_resources = 3, +}; + + static struct platform_device *devices[] __initdata = { &sa1111_device, &smc91x_device, &lubbock_flash_device[0], &lubbock_flash_device[1], + &lubbock_cplds_device, }; static struct pxafb_mode_info sharp_lm8v31_mode = { @@ -648,7 +598,7 @@ MACHINE_START(LUBBOCK, "Intel DBPXA250 Development Platform (aka Lubbock)") /* Maintainer: MontaVista Software Inc. */ .map_io = lubbock_map_io, .nr_irqs = LUBBOCK_NR_IRQS, - .init_irq = lubbock_init_irq, + .init_irq = pxa25x_init_irq, .handle_irq = pxa25x_handle_irq, .init_time = pxa_timer_init, .init_machine = lubbock_init, diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index 78b84c0dfc79..2c0658cf6be2 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -13,6 +13,7 @@ * published by the Free Software Foundation. */ #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/init.h> #include <linux/platform_device.h> #include <linux/syscore_ops.h> @@ -122,92 +123,6 @@ static unsigned long mainstone_pin_config[] = { GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH, }; -static unsigned long mainstone_irq_enabled; - -static void mainstone_mask_irq(struct irq_data *d) -{ - int mainstone_irq = (d->irq - MAINSTONE_IRQ(0)); - MST_INTMSKENA = (mainstone_irq_enabled &= ~(1 << mainstone_irq)); -} - -static void mainstone_unmask_irq(struct irq_data *d) -{ - int mainstone_irq = (d->irq - MAINSTONE_IRQ(0)); - /* the irq can be acknowledged only if deasserted, so it's done here */ - MST_INTSETCLR &= ~(1 << mainstone_irq); - MST_INTMSKENA = (mainstone_irq_enabled |= (1 << mainstone_irq)); -} - -static struct irq_chip mainstone_irq_chip = { - .name = "FPGA", - .irq_ack = mainstone_mask_irq, - .irq_mask = mainstone_mask_irq, - .irq_unmask = mainstone_unmask_irq, -}; - -static void mainstone_irq_handler(unsigned int irq, struct irq_desc *desc) -{ - unsigned long pending = MST_INTSETCLR & mainstone_irq_enabled; - do { - /* clear useless edge notification */ - desc->irq_data.chip->irq_ack(&desc->irq_data); - if (likely(pending)) { - irq = MAINSTONE_IRQ(0) + __ffs(pending); - generic_handle_irq(irq); - } - pending = MST_INTSETCLR & mainstone_irq_enabled; - } while (pending); -} - -static void __init mainstone_init_irq(void) -{ - int irq; - - pxa27x_init_irq(); - - /* setup extra Mainstone irqs */ - for(irq = MAINSTONE_IRQ(0); irq <= MAINSTONE_IRQ(15); irq++) { - irq_set_chip_and_handler(irq, &mainstone_irq_chip, - handle_level_irq); - if (irq == MAINSTONE_IRQ(10) || irq == MAINSTONE_IRQ(14)) - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE | IRQF_NOAUTOEN); - else - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); - } - set_irq_flags(MAINSTONE_IRQ(8), 0); - set_irq_flags(MAINSTONE_IRQ(12), 0); - - MST_INTMSKENA = 0; - MST_INTSETCLR = 0; - - irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), mainstone_irq_handler); - irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING); -} - -#ifdef CONFIG_PM - -static void mainstone_irq_resume(void) -{ - MST_INTMSKENA = mainstone_irq_enabled; -} - -static struct syscore_ops mainstone_irq_syscore_ops = { - .resume = mainstone_irq_resume, -}; - -static int __init mainstone_irq_device_init(void) -{ - if (machine_is_mainstone()) - register_syscore_ops(&mainstone_irq_syscore_ops); - - return 0; -} - -device_initcall(mainstone_irq_device_init); - -#endif - - static struct resource smc91x_resources[] = { [0] = { .start = (MST_ETH_PHYS + 0x300), @@ -487,11 +402,37 @@ static struct platform_device mst_gpio_keys_device = { }, }; +static struct resource mst_cplds_resources[] = { + [0] = { + .start = MST_FPGA_PHYS + 0xc0, + .end = MST_FPGA_PHYS + 0xe0 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = PXA_GPIO_TO_IRQ(0), + .end = PXA_GPIO_TO_IRQ(0), + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE, + }, + [2] = { + .start = MAINSTONE_IRQ(0), + .end = MAINSTONE_IRQ(15), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device mst_cplds_device = { + .name = "pxa_cplds_irqs", + .id = -1, + .resource = &mst_cplds_resources[0], + .num_resources = 3, +}; + static struct platform_device *platform_devices[] __initdata = { &smc91x_device, &mst_flash_device[0], &mst_flash_device[1], &mst_gpio_keys_device, + &mst_cplds_device, }; static struct pxaohci_platform_data mainstone_ohci_platform_data = { @@ -718,7 +659,7 @@ MACHINE_START(MAINSTONE, "Intel HCDDBBVA0 Development Platform (aka Mainstone)") .atag_offset = 0x100, /* BLOB boot parameter setting */ .map_io = mainstone_map_io, .nr_irqs = MAINSTONE_NR_IRQS, - .init_irq = mainstone_init_irq, + .init_irq = pxa27x_init_irq, .handle_irq = pxa27x_handle_irq, .init_time = pxa_timer_init, .init_machine = mainstone_init, diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c new file mode 100644 index 000000000000..2385052b0ce1 --- /dev/null +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -0,0 +1,200 @@ +/* + * Intel Reference Systems cplds + * + * Copyright (C) 2014 Robert Jarzmik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Cplds motherboard driver, supporting lubbock and mainstone SoC board. + */ + +#include <linux/bitops.h> +#include <linux/gpio.h> +#include <linux/gpio/consumer.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/mfd/core.h> +#include <linux/module.h> +#include <linux/of_platform.h> + +#define FPGA_IRQ_MASK_EN 0x0 +#define FPGA_IRQ_SET_CLR 0x10 + +#define CPLDS_NB_IRQ 32 + +struct cplds { + void __iomem *base; + int irq; + unsigned int irq_mask; + struct gpio_desc *gpio0; + struct irq_domain *irqdomain; +}; + +static irqreturn_t cplds_irq_handler(int in_irq, void *d) +{ + struct cplds *fpga = d; + unsigned long pending; + unsigned int bit; + + pending = readl(fpga->base + FPGA_IRQ_SET_CLR) & fpga->irq_mask; + for_each_set_bit(bit, &pending, CPLDS_NB_IRQ) + generic_handle_irq(irq_find_mapping(fpga->irqdomain, bit)); + + return IRQ_HANDLED; +} + +static void cplds_irq_mask_ack(struct irq_data *d) +{ + struct cplds *fpga = irq_data_get_irq_chip_data(d); + unsigned int cplds_irq = irqd_to_hwirq(d); + unsigned int set, bit = BIT(cplds_irq); + + fpga->irq_mask &= ~bit; + writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN); + set = readl(fpga->base + FPGA_IRQ_SET_CLR); + writel(set & ~bit, fpga->base + FPGA_IRQ_SET_CLR); +} + +static void cplds_irq_unmask(struct irq_data *d) +{ + struct cplds *fpga = irq_data_get_irq_chip_data(d); + unsigned int cplds_irq = irqd_to_hwirq(d); + unsigned int bit = BIT(cplds_irq); + + fpga->irq_mask |= bit; + writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN); +} + +static struct irq_chip cplds_irq_chip = { + .name = "pxa_cplds", + .irq_mask_ack = cplds_irq_mask_ack, + .irq_unmask = cplds_irq_unmask, + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE, +}; + +static int cplds_irq_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + struct cplds *fpga = d->host_data; + + irq_set_chip_and_handler(irq, &cplds_irq_chip, handle_level_irq); + irq_set_chip_data(irq, fpga); + + return 0; +} + +static const struct irq_domain_ops cplds_irq_domain_ops = { + .xlate = irq_domain_xlate_twocell, + .map = cplds_irq_domain_map, +}; + +static int cplds_resume(struct platform_device *pdev) +{ + struct cplds *fpga = platform_get_drvdata(pdev); + + writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN); + + return 0; +} + +static int cplds_probe(struct platform_device *pdev) +{ + struct resource *res; + struct cplds *fpga; + int ret; + int base_irq; + unsigned long irqflags = 0; + + fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL); + if (!fpga) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (res) { + fpga->irq = (unsigned int)res->start; + irqflags = res->flags; + } + if (!fpga->irq) + return -ENODEV; + + base_irq = platform_get_irq(pdev, 1); + if (base_irq < 0) + base_irq = 0; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + fpga->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(fpga->base)) + return PTR_ERR(fpga->base); + + platform_set_drvdata(pdev, fpga); + + writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN); + writel(0, fpga->base + FPGA_IRQ_SET_CLR); + + ret = devm_request_irq(&pdev->dev, fpga->irq, cplds_irq_handler, + irqflags, dev_name(&pdev->dev), fpga); + if (ret == -ENOSYS) + return -EPROBE_DEFER; + + if (ret) { + dev_err(&pdev->dev, "couldn't request main irq%d: %d\n", + fpga->irq, ret); + return ret; + } + + irq_set_irq_wake(fpga->irq, 1); + fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node, + CPLDS_NB_IRQ, + &cplds_irq_domain_ops, fpga); + if (!fpga->irqdomain) + return -ENODEV; + + if (base_irq) { + ret = irq_create_strict_mappings(fpga->irqdomain, base_irq, 0, + CPLDS_NB_IRQ); + if (ret) { + dev_err(&pdev->dev, "couldn't create the irq mapping %d..%d\n", + base_irq, base_irq + CPLDS_NB_IRQ); + return ret; + } + } + + return 0; +} + +static int cplds_remove(struct platform_device *pdev) +{ + struct cplds *fpga = platform_get_drvdata(pdev); + + irq_set_chip_and_handler(fpga->irq, NULL, NULL); + + return 0; +} + +static const struct of_device_id cplds_id_table[] = { + { .compatible = "intel,lubbock-cplds-irqs", }, + { .compatible = "intel,mainstone-cplds-irqs", }, + { } +}; +MODULE_DEVICE_TABLE(of, cplds_id_table); + +static struct platform_driver cplds_driver = { + .driver = { + .name = "pxa_cplds_irqs", + .of_match_table = of_match_ptr(cplds_id_table), + }, + .probe = cplds_probe, + .remove = cplds_remove, + .resume = cplds_resume, +}; + +module_platform_driver(cplds_driver); + +MODULE_DESCRIPTION("PXA Cplds interrupts driver"); +MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>"); +MODULE_LICENSE("GPL"); diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c index b07d88602073..b0dcbe28f78c 100644 --- a/arch/arm/mach-rockchip/pm.c +++ b/arch/arm/mach-rockchip/pm.c @@ -83,6 +83,13 @@ static void rk3288_slp_mode_set(int level) SGRF_PCLK_WDT_GATE | SGRF_FAST_BOOT_EN | SGRF_PCLK_WDT_GATE_WRITE | SGRF_FAST_BOOT_EN_WRITE); + /* + * The dapswjdp can not auto reset before resume, that cause it may + * access some illegal address during resume. Let's disable it before + * suspend, and the MASKROM will enable it back. + */ + regmap_write(sgrf_regmap, RK3288_SGRF_CPU_CON0, SGRF_DAPDEVICEEN_WRITE); + /* booting address of resuming system is from this register value */ regmap_write(sgrf_regmap, RK3288_SGRF_FAST_BOOT_ADDR, rk3288_bootram_phy); diff --git a/arch/arm/mach-rockchip/pm.h b/arch/arm/mach-rockchip/pm.h index 03ff31d8282d..3e8d39c0c3d5 100644 --- a/arch/arm/mach-rockchip/pm.h +++ b/arch/arm/mach-rockchip/pm.h @@ -55,6 +55,10 @@ static inline void rockchip_suspend_init(void) #define SGRF_FAST_BOOT_EN BIT(8) #define SGRF_FAST_BOOT_EN_WRITE BIT(24) +#define RK3288_SGRF_CPU_CON0 (0x40) +#define SGRF_DAPDEVICEEN BIT(0) +#define SGRF_DAPDEVICEEN_WRITE BIT(16) + #define RK3288_CRU_MODE_CON 0x50 #define RK3288_CRU_SEL0_CON 0x60 #define RK3288_CRU_SEL1_CON 0x64 diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c index d360ec044b66..b6cf3b449428 100644 --- a/arch/arm/mach-rockchip/rockchip.c +++ b/arch/arm/mach-rockchip/rockchip.c @@ -30,11 +30,30 @@ #include "pm.h" #define RK3288_GRF_SOC_CON0 0x244 +#define RK3288_TIMER6_7_PHYS 0xff810000 static void __init rockchip_timer_init(void) { if (of_machine_is_compatible("rockchip,rk3288")) { struct regmap *grf; + void __iomem *reg_base; + + /* + * Most/all uboot versions for rk3288 don't enable timer7 + * which is needed for the architected timer to work. + * So make sure it is running during early boot. + */ + reg_base = ioremap(RK3288_TIMER6_7_PHYS, SZ_16K); + if (reg_base) { + writel(0, reg_base + 0x30); + writel(0xffffffff, reg_base + 0x20); + writel(0xffffffff, reg_base + 0x24); + writel(1, reg_base + 0x30); + dsb(); + iounmap(reg_base); + } else { + pr_err("rockchip: could not map timer7 registers\n"); + } /* * Disable auto jtag/sdmmc switching that causes issues diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 09c5fe3d30c2..7e7583ddd607 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1878,7 +1878,7 @@ struct dma_map_ops iommu_coherent_ops = { * arm_iommu_attach_device function. */ struct dma_iommu_mapping * -arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size) +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size) { unsigned int bits = size >> PAGE_SHIFT; unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); @@ -1886,6 +1886,10 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size) int extensions = 1; int err = -ENOMEM; + /* currently only 32-bit DMA address space is supported */ + if (size > DMA_BIT_MASK(32) + 1) + return ERR_PTR(-ERANGE); + if (!bitmap_size) return ERR_PTR(-EINVAL); @@ -2057,13 +2061,6 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, if (!iommu) return false; - /* - * currently arm_iommu_create_mapping() takes a max of size_t - * for size param. So check this limit for now. - */ - if (size > SIZE_MAX) - return false; - mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); if (IS_ERR(mapping)) { pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4e6ef896c619..7186382672b5 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void) } /* - * Find the first non-section-aligned page, and point + * Find the first non-pmd-aligned page, and point * memblock_limit at it. This relies on rounding the - * limit down to be section-aligned, which happens at - * the end of this function. + * limit down to be pmd-aligned, which happens at the + * end of this function. * * With this algorithm, the start or end of almost any - * bank can be non-section-aligned. The only exception - * is that the start of the bank 0 must be section- + * bank can be non-pmd-aligned. The only exception is + * that the start of the bank 0 must be section- * aligned, since otherwise memory would need to be * allocated when mapping the start of bank 0, which * occurs before any free memory is mapped. */ if (!memblock_limit) { - if (!IS_ALIGNED(block_start, SECTION_SIZE)) + if (!IS_ALIGNED(block_start, PMD_SIZE)) memblock_limit = block_start; - else if (!IS_ALIGNED(block_end, SECTION_SIZE)) + else if (!IS_ALIGNED(block_end, PMD_SIZE)) memblock_limit = arm_lowmem_limit; } @@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void) high_memory = __va(arm_lowmem_limit - 1) + 1; /* - * Round the memblock limit down to a section size. This + * Round the memblock limit down to a pmd size. This * helps to ensure that we will allocate memory from the - * last full section, which should be mapped. + * last full pmd, which should be mapped. */ if (memblock_limit) - memblock_limit = round_down(memblock_limit, SECTION_SIZE); + memblock_limit = round_down(memblock_limit, PMD_SIZE); if (!memblock_limit) memblock_limit = arm_lowmem_limit; diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index aa0519eed698..774ef1323554 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -22,8 +22,6 @@ * * These are the low level assembler for performing cache and TLB * functions on the arm1020. - * - * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt */ #include <linux/linkage.h> #include <linux/init.h> diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index bff4c7f70fd6..ae3c27b71594 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -22,8 +22,6 @@ * * These are the low level assembler for performing cache and TLB * functions on the arm1020e. - * - * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt */ #include <linux/linkage.h> #include <linux/init.h> diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index ede8c54ab4aa..32a47cc19076 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -441,9 +441,6 @@ ENTRY(cpu_arm925_set_pte_ext) .type __arm925_setup, #function __arm925_setup: mov r0, #0 -#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE) - orr r0,r0,#1 << 7 -#endif /* Transparent on, D-cache clean & flush mode. See NOTE2 above */ orr r0,r0,#1 << 1 @ transparent mode on diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index e494d6d6acbe..92e08bf37aad 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -602,7 +602,6 @@ __\name\()_proc_info: PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ initfn __feroceon_setup, __\name\()_proc_info - .long __feroceon_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e1268f905026..e0e23582c8b4 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -54,6 +54,7 @@ #define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) #define FLAG_NEED_X_RESET (1 << 0) +#define FLAG_IMM_OVERFLOW (1 << 1) struct jit_ctx { const struct bpf_prog *skf; @@ -293,6 +294,15 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx) /* PC in ARM mode == address of the instruction + 8 */ imm = offset - (8 + ctx->idx * 4); + if (imm & ~0xfff) { + /* + * literal pool is too far, signal it into flags. we + * can only detect it on the second pass unfortunately. + */ + ctx->flags |= FLAG_IMM_OVERFLOW; + return 0; + } + return imm; } @@ -449,10 +459,21 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx) return; } #endif - if (rm != ARM_R0) - emit(ARM_MOV_R(ARM_R0, rm), ctx); + + /* + * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4 + * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into + * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm + * before using it as a source for ARM_R1. + * + * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is + * ARM_R5 (r_X) so there is no particular register overlap + * issues. + */ if (rn != ARM_R1) emit(ARM_MOV_R(ARM_R1, rn), ctx); + if (rm != ARM_R0) + emit(ARM_MOV_R(ARM_R0, rm), ctx); ctx->seen |= SEEN_CALL; emit_mov_i(ARM_R3, (u32)jit_udiv, ctx); @@ -855,6 +876,14 @@ b_epilogue: default: return -1; } + + if (ctx->flags & FLAG_IMM_OVERFLOW) + /* + * this instruction generated an overflow when + * trying to access the literal pool, so + * delegate this filter to the kernel interpreter. + */ + return -1; } /* compute offsets only during the first pass */ @@ -917,7 +946,14 @@ void bpf_jit_compile(struct bpf_prog *fp) ctx.idx = 0; build_prologue(&ctx); - build_body(&ctx); + if (build_body(&ctx) < 0) { +#if __LINUX_ARM_ARCH__ < 7 + if (ctx.imm_count) + kfree(ctx.imms); +#endif + bpf_jit_binary_free(header); + goto out; + } build_epilogue(&ctx); flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx)); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 224081ccc92f..7d0f07020c80 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -272,6 +272,7 @@ void xen_arch_pre_suspend(void) { } void xen_arch_post_suspend(int suspend_cancelled) { } void xen_timer_resume(void) { } void xen_arch_resume(void) { } +void xen_arch_suspend(void) { } /* In the hypervisor.S file. */ diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi index c138b95a8356..351c95bda89e 100644 --- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi @@ -21,6 +21,20 @@ clock-output-names = "juno_mb:clk25mhz"; }; + v2m_refclk1mhz: refclk1mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1000000>; + clock-output-names = "juno_mb:refclk1mhz"; + }; + + v2m_refclk32khz: refclk32khz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "juno_mb:refclk32khz"; + }; + motherboard { compatible = "arm,vexpress,v2p-p1", "simple-bus"; #address-cells = <2>; /* SMB chipselect number and offset */ @@ -66,6 +80,15 @@ #size-cells = <1>; ranges = <0 3 0 0x200000>; + v2m_sysctl: sysctl@020000 { + compatible = "arm,sp810", "arm,primecell"; + reg = <0x020000 0x1000>; + clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&mb_clk24mhz>; + clock-names = "refclk", "timclk", "apb_pclk"; + #clock-cells = <1>; + clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3"; + }; + mmci@050000 { compatible = "arm,pl180", "arm,primecell"; reg = <0x050000 0x1000>; @@ -106,16 +129,16 @@ compatible = "arm,sp804", "arm,primecell"; reg = <0x110000 0x10000>; interrupts = <9>; - clocks = <&mb_clk24mhz>, <&soc_smc50mhz>; - clock-names = "timclken1", "apb_pclk"; + clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&mb_clk24mhz>; + clock-names = "timclken1", "timclken2", "apb_pclk"; }; v2m_timer23: timer@120000 { compatible = "arm,sp804", "arm,primecell"; reg = <0x120000 0x10000>; interrupts = <9>; - clocks = <&mb_clk24mhz>, <&soc_smc50mhz>; - clock-names = "timclken1", "apb_pclk"; + clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&mb_clk24mhz>; + clock-names = "timclken1", "timclken2", "apb_pclk"; }; rtc@170000 { diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c index 9499199924ae..6a37c3c6b11d 100644 --- a/arch/arm64/crypto/crc32-arm64.c +++ b/arch/arm64/crypto/crc32-arm64.c @@ -147,13 +147,21 @@ static int chksum_final(struct shash_desc *desc, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + put_unaligned_le32(ctx->crc, out); + return 0; +} + +static int chksumc_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + put_unaligned_le32(~ctx->crc, out); return 0; } static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) { - put_unaligned_le32(~crc32_arm64_le_hw(crc, data, len), out); + put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out); return 0; } @@ -199,6 +207,14 @@ static int crc32_cra_init(struct crypto_tfm *tfm) { struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + mctx->key = 0; + return 0; +} + +static int crc32c_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + mctx->key = ~0; return 0; } @@ -229,7 +245,7 @@ static struct shash_alg crc32c_alg = { .setkey = chksum_setkey, .init = chksum_init, .update = chksumc_update, - .final = chksum_final, + .final = chksumc_final, .finup = chksumc_finup, .digest = chksumc_digest, .descsize = sizeof(struct chksum_desc_ctx), @@ -241,7 +257,7 @@ static struct shash_alg crc32c_alg = { .cra_alignmask = 0, .cra_ctxsize = sizeof(struct chksum_ctx), .cra_module = THIS_MODULE, - .cra_init = crc32_cra_init, + .cra_init = crc32c_cra_init, } }; diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 114e7cc5de8c..aefda9868627 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -74,6 +74,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, static int sha1_ce_final(struct shash_desc *desc, u8 *out) { + struct sha1_ce_state *sctx = shash_desc_ctx(desc); + + sctx->finalize = 0; kernel_neon_begin_partial(16); sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); kernel_neon_end(); diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 1340e44c048b..7cd587564a41 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -75,6 +75,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, static int sha256_ce_final(struct shash_desc *desc, u8 *out) { + struct sha256_ce_state *sctx = shash_desc_ctx(desc); + + sctx->finalize = 0; kernel_neon_begin_partial(28); sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 21033bba9390..28f8365edc4c 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -24,7 +24,6 @@ #include <asm/cacheflush.h> #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <asm/insn.h> #include <linux/stop_machine.h> extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; @@ -34,48 +33,6 @@ struct alt_region { struct alt_instr *end; }; -/* - * Decode the imm field of a b/bl instruction, and return the byte - * offset as a signed value (so it can be used when computing a new - * branch target). - */ -static s32 get_branch_offset(u32 insn) -{ - s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn); - - /* sign-extend the immediate before turning it into a byte offset */ - return (imm << 6) >> 4; -} - -static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr) -{ - u32 insn; - - aarch64_insn_read(altinsnptr, &insn); - - /* Stop the world on instructions we don't support... */ - BUG_ON(aarch64_insn_is_cbz(insn)); - BUG_ON(aarch64_insn_is_cbnz(insn)); - BUG_ON(aarch64_insn_is_bcond(insn)); - /* ... and there is probably more. */ - - if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) { - enum aarch64_insn_branch_type type; - unsigned long target; - - if (aarch64_insn_is_b(insn)) - type = AARCH64_INSN_BRANCH_NOLINK; - else - type = AARCH64_INSN_BRANCH_LINK; - - target = (unsigned long)altinsnptr + get_branch_offset(insn); - insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr, - target, type); - } - - return insn; -} - static int __apply_alternatives(void *alt_region) { struct alt_instr *alt; @@ -83,9 +40,6 @@ static int __apply_alternatives(void *alt_region) u8 *origptr, *replptr; for (alt = region->begin; alt < region->end; alt++) { - u32 insn; - int i; - if (!cpus_have_cap(alt->cpufeature)) continue; @@ -95,12 +49,7 @@ static int __apply_alternatives(void *alt_region) origptr = (u8 *)&alt->orig_offset + alt->orig_offset; replptr = (u8 *)&alt->alt_offset + alt->alt_offset; - - for (i = 0; i < alt->alt_len; i += sizeof(insn)) { - insn = get_alt_insn(origptr + i, replptr + i); - aarch64_insn_write(origptr + i, insn); - } - + memcpy(origptr, replptr, alt->alt_len); flush_icache_range((uintptr_t)origptr, (uintptr_t)(origptr + alt->alt_len)); } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 23f25acf43a9..cce18c85d2e8 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1315,15 +1315,15 @@ static int armpmu_device_probe(struct platform_device *pdev) if (!cpu_pmu) return -ENODEV; - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) return 0; + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + for (i = 0; i < pdev->num_resources; ++i) { struct device_node *dn; int cpu; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 74c256744b25..f3d6221cd5bd 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -328,10 +328,12 @@ static int ptdump_init(void) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; +#ifdef CONFIG_SPARSEMEM_VMEMMAP address_markers[VMEMMAP_START_NR].start_address = (unsigned long)virt_to_page(PAGE_OFFSET); address_markers[VMEMMAP_END_NR].start_address = (unsigned long)virt_to_page(high_memory); +#endif pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index edba042b2325..dc6a4842683a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -487,7 +487,7 @@ emit_cond_jmp: return -EINVAL; } - imm64 = (u64)insn1.imm << 32 | imm; + imm64 = (u64)insn1.imm << 32 | (u32)imm; emit_a64_mov_i64(dst, imm64, ctx); return 1; diff --git a/arch/ia64/include/asm/irq_remapping.h b/arch/ia64/include/asm/irq_remapping.h index e3b3556e2e1b..a8687b1d8906 100644 --- a/arch/ia64/include/asm/irq_remapping.h +++ b/arch/ia64/include/asm/irq_remapping.h @@ -1,6 +1,4 @@ #ifndef __IA64_INTR_REMAPPING_H #define __IA64_INTR_REMAPPING_H #define irq_remapping_enabled 0 -#define dmar_alloc_hwirq create_irq -#define dmar_free_hwirq destroy_irq #endif diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 9dd7464f8c17..d70bf15c690a 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -165,7 +165,7 @@ static struct irq_chip dmar_msi_type = { .irq_retrigger = ia64_msi_retrigger_irq, }; -static int +static void msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { struct irq_cfg *cfg = irq_cfg + irq; @@ -186,21 +186,29 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) MSI_DATA_LEVEL_ASSERT | MSI_DATA_DELIVERY_FIXED | MSI_DATA_VECTOR(cfg->vector); - return 0; } -int arch_setup_dmar_msi(unsigned int irq) +int dmar_alloc_hwirq(int id, int node, void *arg) { - int ret; + int irq; struct msi_msg msg; - ret = msi_compose_msg(NULL, irq, &msg); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; + irq = create_irq(); + if (irq > 0) { + irq_set_handler_data(irq, arg); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, + handle_edge_irq, "edge"); + msi_compose_msg(NULL, irq, &msg); + dmar_msi_write(irq, &msg); + } + + return irq; +} + +void dmar_free_hwirq(int irq) +{ + irq_set_handler_data(irq, NULL); + destroy_irq(irq); } #endif /* CONFIG_INTEL_IOMMU */ diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index d4e162d35b34..7cc3be9fa7c6 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - struct pci_controller *controller = bridge->bus->sysdata; - - ACPI_COMPANION_SET(&bridge->dev, controller->companion); + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_controller *controller = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, controller->companion); + } return 0; } diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index ce7aea34fdf4..c18ddc74ef9a 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c @@ -45,7 +45,7 @@ static volatile unsigned long flushcache_cpumask = 0; /* * For flush_tlb_others() */ -static volatile cpumask_t flush_cpumask; +static cpumask_t flush_cpumask; static struct mm_struct *flush_mm; static struct vm_area_struct *flush_vma; static volatile unsigned long flush_va; @@ -415,7 +415,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, */ send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0); - while (!cpumask_empty((cpumask_t*)&flush_cpumask)) { + while (!cpumask_empty(&flush_cpumask)) { /* nothing. lockup detection does not belong here */ mb(); } @@ -468,7 +468,7 @@ void smp_invalidate_interrupt(void) __flush_tlb_page(va); } } - cpumask_clear_cpu(cpu_id, (cpumask_t*)&flush_cpumask); + cpumask_clear_cpu(cpu_id, &flush_cpumask); } /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/ diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 5200f649dd4e..ae2dd59050f7 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -277,7 +277,7 @@ LDFLAGS += -m $(ld-emul) ifdef CONFIG_MIPS CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \ - sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/") + sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') ifdef CONFIG_64BIT CHECKFLAGS += -m64 endif diff --git a/arch/mips/ath79/prom.c b/arch/mips/ath79/prom.c index e1fe63051136..597899ad5438 100644 --- a/arch/mips/ath79/prom.c +++ b/arch/mips/ath79/prom.c @@ -1,6 +1,7 @@ /* * Atheros AR71XX/AR724X/AR913X specific prom routines * + * Copyright (C) 2015 Laurent Fasnacht <l@libres.ch> * Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org> * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org> * @@ -25,12 +26,14 @@ void __init prom_init(void) { fw_init_cmdline(); +#ifdef CONFIG_BLK_DEV_INITRD /* Read the initrd address from the firmware environment */ initrd_start = fw_getenvl("initrd_start"); if (initrd_start) { initrd_start = KSEG0ADDR(initrd_start); initrd_end = initrd_start + fw_getenvl("initrd_size"); } +#endif } void __init prom_free_prom_memory(void) diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 002680648dcb..b2a577ebce0b 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m CONFIG_USB_C67X00_HCD=m CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_ISP1760_HCD=m +CONFIG_USB_ISP1760=m CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=m CONFIG_USB_R8A66597_HCD=m diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index a594d8ed9698..f19e890b99d2 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -304,7 +304,7 @@ do { \ \ current->thread.abi = &mips_abi; \ \ - current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31; \ + current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31; \ } while (0) #endif /* CONFIG_32BIT */ @@ -366,7 +366,7 @@ do { \ else \ current->thread.abi = &mips_abi; \ \ - current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31; \ + current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31; \ \ p = personality(current->personality); \ if (p != PER_LINUX32 && p != PER_LINUX) \ diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index bb02fac9b4fa..2b25d1ba1ea0 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -45,7 +45,7 @@ extern int __cpu_logical_map[NR_CPUS]; #define SMP_DUMP 0x8 #define SMP_ASK_C0COUNT 0x10 -extern volatile cpumask_t cpu_callin_map; +extern cpumask_t cpu_callin_map; /* Mask of CPUs which are currently definitely operating coherently */ extern cpumask_t cpu_coherent_mask; diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index be4899f3c393..4a4d9e067c89 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -76,14 +76,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf, /* Lets see if this is an O32 ELF */ if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) { - /* FR = 1 for N32 */ - if (ehdr32->e_flags & EF_MIPS_ABI2) - state->overall_fp_mode = FP_FR1; - else - /* Set a good default FPU mode for O32 */ - state->overall_fp_mode = cpu_has_mips_r6 ? - FP_FRE : FP_FR0; - if (ehdr32->e_flags & EF_MIPS_FP64) { /* * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it @@ -104,9 +96,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf, (char *)&abiflags, sizeof(abiflags)); } else { - /* FR=1 is really the only option for 64-bit */ - state->overall_fp_mode = FP_FR1; - if (phdr64->p_type != PT_MIPS_ABIFLAGS) return 0; if (phdr64->p_filesz < sizeof(abiflags)) @@ -137,6 +126,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, struct elf32_hdr *ehdr = _ehdr; struct mode_req prog_req, interp_req; int fp_abi, interp_fp_abi, abi0, abi1, max_abi; + bool is_mips64; if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT)) return 0; @@ -152,10 +142,22 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, abi0 = abi1 = fp_abi; } - /* ABI limits. O32 = FP_64A, N32/N64 = FP_SOFT */ - max_abi = ((ehdr->e_ident[EI_CLASS] == ELFCLASS32) && - (!(ehdr->e_flags & EF_MIPS_ABI2))) ? - MIPS_ABI_FP_64A : MIPS_ABI_FP_SOFT; + is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) || + (ehdr->e_flags & EF_MIPS_ABI2); + + if (is_mips64) { + /* MIPS64 code always uses FR=1, thus the default is easy */ + state->overall_fp_mode = FP_FR1; + + /* Disallow access to the various FPXX & FP64 ABIs */ + max_abi = MIPS_ABI_FP_SOFT; + } else { + /* Default to a mode capable of running code expecting FR=0 */ + state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0; + + /* Allow all ABIs we know about */ + max_abi = MIPS_ABI_FP_64A; + } if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) || (abi1 > max_abi && abi1 != MIPS_ABI_FP_UNKNOWN)) diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index d2bfbc2e8995..51f57d841662 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -29,7 +29,7 @@ int kgdb_early_setup; #endif -static unsigned long irq_map[NR_IRQS / BITS_PER_LONG]; +static DECLARE_BITMAP(irq_map, NR_IRQS); int allocate_irqno(void) { diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index d544e774eea6..e933a309f2ea 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -176,7 +176,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data) __get_user(value, data + 64); fcr31 = child->thread.fpu.fcr31; - mask = current_cpu_data.fpu_msk31; + mask = boot_cpu_data.fpu_msk31; child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask); /* FIR may not be written. */ diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index fd528d7ea278..336708ae5c5b 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = { static void bmips_wr_vec(unsigned long dst, char *start, char *end) { memcpy((void *)dst, start, end - start); - dma_cache_wback((unsigned long)start, end - start); + dma_cache_wback(dst, end - start); local_flush_icache_range(dst, dst + (end - start)); instruction_hazard(); } diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 7e011f95bb8e..4251d390b5b6 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -92,7 +92,7 @@ static void __init cps_smp_setup(void) #ifdef CONFIG_MIPS_MT_FPAFF /* If we have an FPU, enroll ourselves in the FPU-full mask */ if (cpu_has_fpu) - cpu_set(0, mt_fpu_cpumask); + cpumask_set_cpu(0, &mt_fpu_cpumask); #endif /* CONFIG_MIPS_MT_FPAFF */ } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 193ace7955fb..faa46ebd9dda 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -43,7 +43,7 @@ #include <asm/time.h> #include <asm/setup.h> -volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ +cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ EXPORT_SYMBOL(__cpu_number_map); @@ -218,8 +218,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) /* * Trust is futile. We should really have timeouts ... */ - while (!cpumask_test_cpu(cpu, &cpu_callin_map)) + while (!cpumask_test_cpu(cpu, &cpu_callin_map)) { udelay(100); + schedule(); + } synchronise_count_master(cpu); return 0; diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index ba32e48d4697..d2d1c1933bc9 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -269,7 +269,6 @@ static void __show_regs(const struct pt_regs *regs) */ printk("epc : %0*lx %pS\n", field, regs->cp0_epc, (void *) regs->cp0_epc); - printk(" %s\n", print_tainted()); printk("ra : %0*lx %pS\n", field, regs->regs[31], (void *) regs->regs[31]); diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 6230f376a44e..4b50c5787e25 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -2389,7 +2389,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, { unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr]; enum emulation_result er = EMULATE_DONE; - unsigned long curr_pc; if (run->mmio.len > sizeof(*gpr)) { kvm_err("Bad MMIO length: %d", run->mmio.len); @@ -2397,11 +2396,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, goto done; } - /* - * Update PC and hold onto current PC in case there is - * an error and we want to rollback the PC - */ - curr_pc = vcpu->arch.pc; er = update_pc(vcpu, vcpu->arch.pending_load_cause); if (er == EMULATE_FAIL) return er; diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index 7d12c0dded3d..77e64942f004 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm) FEXPORT(__strnlen_\func\()_nocheck_asm) move v0, a0 PTR_ADDU a1, a0 # stop pointer -1: beq v0, a1, 1f # limit reached? +1: +#ifdef CONFIG_CPU_DADDI_WORKAROUNDS + .set noat + li AT, 1 +#endif + beq v0, a1, 1f # limit reached? .ifeqs "\func", "kernel" EX(lb, t0, (v0), .Lfault\@) .else @@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm) .endif .set noreorder bnez t0, 1b -1: PTR_ADDIU v0, 1 +1: +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS + PTR_ADDIU v0, 1 +#else + PTR_ADDU v0, AT + .set at +#endif .set reorder PTR_SUBU v0, a0 jr ra diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index d31c537ace1d..22b9b2cb9219 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -889,7 +889,7 @@ static inline void cop1_cfc(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case FPCREG_RID: - value = current_cpu_data.fpu_id; + value = boot_cpu_data.fpu_id; break; default: @@ -921,7 +921,7 @@ static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx, (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); /* Preserve read-only bits. */ - mask = current_cpu_data.fpu_msk31; + mask = boot_cpu_data.fpu_msk31; fcr31 = (value & ~mask) | (fcr31 & mask); break; diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index a27a088e6f9f..08318ecb803a 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -495,7 +495,7 @@ static void r4k_tlb_configure(void) if (cpu_has_rixi) { /* - * Enable the no read, no exec bits, and enable large virtual + * Enable the no read, no exec bits, and enable large physical * address. */ #ifdef CONFIG_64BIT diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c index 0134db2ad0a8..5a2a82148d8d 100644 --- a/arch/mips/sgi-ip32/ip32-platform.c +++ b/arch/mips/sgi-ip32/ip32-platform.c @@ -130,9 +130,9 @@ struct platform_device ip32_rtc_device = { .resource = ip32_rtc_resources, }; -+static int __init sgio2_rtc_devinit(void) +static __init int sgio2_rtc_devinit(void) { return platform_device_register(&ip32_rtc_device); } -device_initcall(sgio2_cmos_devinit); +device_initcall(sgio2_rtc_devinit); diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 3391d061eccc..78c9fd32c554 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -348,6 +348,10 @@ struct pt_regs; /* forward declaration... */ #define ELF_HWCAP 0 +#define STACK_RND_MASK (is_32bit_task() ? \ + 0x7ff >> (PAGE_SHIFT - 12) : \ + 0x3ffff >> (PAGE_SHIFT - 12)) + struct mm_struct; extern unsigned long arch_randomize_brk(struct mm_struct *); #define arch_randomize_brk arch_randomize_brk diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 8a488c22a99f..809905a811ed 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -181,9 +181,12 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) return 1; } +/* + * Copy architecture-specific thread state + */ int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, struct task_struct *p) + unsigned long kthread_arg, struct task_struct *p) { struct pt_regs *cregs = &(p->thread.regs); void *stack = task_stack_page(p); @@ -195,11 +198,10 @@ copy_thread(unsigned long clone_flags, unsigned long usp, extern void * const child_return; if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ memset(cregs, 0, sizeof(struct pt_regs)); if (!usp) /* idle thread */ return 0; - - /* kernel thread */ /* Must exit via ret_from_kernel_thread in order * to call schedule_tail() */ @@ -215,7 +217,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, #else cregs->gr[26] = usp; #endif - cregs->gr[25] = arg; + cregs->gr[25] = kthread_arg; } else { /* user thread */ /* usp must be word aligned. This also prevents users from diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index e1ffea2f9a0b..5aba01ac457f 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -77,6 +77,9 @@ static unsigned long mmap_upper_limit(void) if (stack_base > STACK_SIZE_MAX) stack_base = STACK_SIZE_MAX; + /* Add space for stack randomization. */ + stack_base += (STACK_RND_MASK << PAGE_SHIFT); + return PAGE_ALIGN(STACK_TOP - stack_base); } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 15c99b649b04..b2eb4686bd8f 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,7 +73,7 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __this_cpu_inc_return(mce_nest_count); + int index = __this_cpu_inc_return(mce_nest_count) - 1; struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); /* @@ -184,7 +184,7 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __this_cpu_inc_return(mce_queue_count); + index = __this_cpu_inc_return(mce_queue_count) - 1; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { __this_cpu_dec(mce_queue_count); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index f096e72262f4..1db685104ffc 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -213,6 +213,7 @@ SECTIONS *(.opd) } + . = ALIGN(256); .got : AT(ADDR(.got) - LOAD_OFFSET) { __toc_start = .; #ifndef CONFIG_RELOCATABLE diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 48d3c5d2ecc9..df81caab7383 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc) */ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vnext; int i; int srcu_idx; @@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if ((threads_per_core > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, + arch.run_list) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0ce968b00b7c..3385e3d0506e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -689,27 +689,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { - pte_t *ptep; - struct page *page; + pte_t *ptep, pte; unsigned shift; unsigned long mask, flags; + struct page *page = ERR_PTR(-EINVAL); + + local_irq_save(flags); + ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); + if (!ptep) + goto no_page; + pte = READ_ONCE(*ptep); /* + * Verify it is a huge page else bail. * Transparent hugepages are handled by generic code. We can skip them * here. */ - local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); + if (!shift || pmd_trans_huge(__pmd(pte_val(pte)))) + goto no_page; - /* Verify it is a huge page else bail. */ - if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) { - local_irq_restore(flags); - return ERR_PTR(-EINVAL); + if (!pte_present(pte)) { + page = NULL; + goto no_page; } mask = (1UL << shift) - 1; - page = pte_page(*ptep); + page = pte_page(pte); if (page) page += (address & mask) / PAGE_SIZE; +no_page: local_irq_restore(flags); return page; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 59daa5eeec25..6bfadf1aa5cb 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -839,6 +839,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm, * hash fault look at them. */ memset(pgtable, 0, PTE_FRAG_SIZE); + /* + * Serialize against find_linux_pte_or_hugepte which does lock-less + * lookup in page tables with local interrupts disabled. For huge pages + * it casts pmd_t to pte_t. Since format of pte_t is different from + * pmd_t we want to prevent transit from pmd pointing to page table + * to pmd pointing to huge page (and back) while interrupts are disabled. + * We clear pmd to possibly replace it with page table pointer in + * different code paths. So make sure we wait for the parallel + * find_linux_pte_or_hugepage to finish. + */ + kick_all_cpus_sync(); return old_pmd; } diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 7940dc90e80b..b258110da952 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -16,11 +16,12 @@ #define GHASH_DIGEST_SIZE 16 struct ghash_ctx { - u8 icv[16]; - u8 key[16]; + u8 key[GHASH_BLOCK_SIZE]; }; struct ghash_desc_ctx { + u8 icv[GHASH_BLOCK_SIZE]; + u8 key[GHASH_BLOCK_SIZE]; u8 buffer[GHASH_BLOCK_SIZE]; u32 bytes; }; @@ -28,8 +29,10 @@ struct ghash_desc_ctx { static int ghash_init(struct shash_desc *desc) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); memset(dctx, 0, sizeof(*dctx)); + memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); return 0; } @@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm, } memcpy(ctx->key, key, GHASH_BLOCK_SIZE); - memset(ctx->icv, 0, GHASH_BLOCK_SIZE); return 0; } @@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); unsigned int n; u8 *buf = dctx->buffer; int ret; @@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc, src += n; if (!dctx->bytes) { - ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, + ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); if (ret != GHASH_BLOCK_SIZE) return -EIO; @@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc, n = srclen & ~(GHASH_BLOCK_SIZE - 1); if (n) { - ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); + ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n); if (ret != n) return -EIO; src += n; @@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc, return 0; } -static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +static int ghash_flush(struct ghash_desc_ctx *dctx) { u8 *buf = dctx->buffer; int ret; @@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) memset(pos, 0, dctx->bytes); - ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); + ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); if (ret != GHASH_BLOCK_SIZE) return -EIO; + + dctx->bytes = 0; } - dctx->bytes = 0; return 0; } static int ghash_final(struct shash_desc *desc, u8 *dst) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); int ret; - ret = ghash_flush(ctx, dctx); + ret = ghash_flush(dctx); if (!ret) - memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); + memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); return ret; } diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 1f374b39a4ec..9d5192c94963 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -125,7 +125,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes) /* fill page with urandom bytes */ get_random_bytes(pg, PAGE_SIZE); /* exor page with stckf values */ - for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) { + for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) { u64 *p = ((u64 *)pg) + n; *p ^= get_tod_clock_fast(); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fc642399b489..ef24a212eeb7 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -494,7 +494,7 @@ static inline int pmd_large(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; } -static inline int pmd_pfn(pmd_t pmd) +static inline unsigned long pmd_pfn(pmd_t pmd) { unsigned long origin_mask; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 7690dc8e1ab5..20c146d1251a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -443,8 +443,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) /* * Compile one eBPF instruction into s390x code + * + * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of + * stack space for the large switch statement. */ -static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) +static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) { struct bpf_insn *insn = &fp->insnsi[i]; int jmp_off, last, insn_count = 1; @@ -588,8 +591,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9160000, dst_reg, rc_reg); break; } - case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */ - case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */ + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */ { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -602,10 +605,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4_IMM(0xa7090000, REG_W0, 0); /* lgr %w1,%dst */ EMIT4(0xb9040000, REG_W1, dst_reg); - /* llgfr %dst,%src (u32 cast) */ - EMIT4(0xb9160000, dst_reg, src_reg); /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, dst_reg); + EMIT4(0xb9870000, REG_W0, src_reg); /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; @@ -632,8 +633,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9160000, dst_reg, rc_reg); break; } - case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */ - case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */ + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */ { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -649,7 +650,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) EMIT4(0xb9040000, REG_W1, dst_reg); /* dlg %w0,<d(imm)>(%l) */ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, - EMIT_CONST_U64((u32) imm)); + EMIT_CONST_U64(imm)); /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index a6e424d185d0..a6cfdabb6054 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -24,7 +24,8 @@ typedef struct { unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - int core_id; + unsigned short sock_id; + unsigned short core_id; int proc_id; } cpuinfo_sparc; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index dc165ebdf05a..2a52c91d2c8a 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) " sllx %1, 32, %1\n" " or %0, %1, %0\n" " .previous\n" + " .section .sun_m7_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sethi %%hi(%4), %0\n" + " .word 662b\n" + " or %1, %%ulo(%4), %1\n" + " or %0, %%lo(%4), %0\n" + " .word 663b\n" + " sllx %1, 32, %1\n" + " or %0, %1, %0\n" + " .previous\n" : "=r" (mask), "=r" (tmp) : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U), "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V), + "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | + _PAGE_CP_4V | _PAGE_E_4V | _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V)); return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); @@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) " andn %0, %4, %0\n" " or %0, %5, %0\n" " .previous\n" + " .section .sun_m7_2insn_patch, \"ax\"\n" + " .word 661b\n" + " andn %0, %6, %0\n" + " or %0, %5, %0\n" + " .previous\n" : "=r" (val) : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U), - "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V)); + "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V), + "i" (_PAGE_CP_4V)); return __pgprot(val); } diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index ed8f071132e4..d1761df5cca6 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #ifdef CONFIG_SMP #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) -#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; +extern cpumask_t cpu_core_sib_map[NR_CPUS]; static inline const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_core_map[cpu]; diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index 6fd4436d32f0..ec9c04de3664 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry { }; extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, __sun4v_2insn_patch_end; +extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch, + __sun_m7_2insn_patch_end; #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 07cc49e541f4..0f679421b468 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, struct sun4v_1insn_patch_entry *); void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, struct sun4v_2insn_patch_entry *); +void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *, + struct sun4v_2insn_patch_entry *); extern unsigned int dcache_parity_tl1_occurred; extern unsigned int icache_parity_tl1_occurred; diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index 94e392bdee7d..814fb1729b12 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev) err = -ENOMEM; goto err1; } - memset(grpci2priv, 0, sizeof(*grpci2priv)); priv->regs = regs; priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */ priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT; diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 26c80e18d7b1..6f80936e0eea 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp) } } -static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) +static void find_back_node_value(struct mdesc_handle *hp, u64 node, + char *srch_val, + void (*func)(struct mdesc_handle *, u64, int), + u64 val, int depth) { - u64 a; - - mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { - u64 t = mdesc_arc_target(hp, a); - const char *name; - const u64 *id; + u64 arc; - name = mdesc_node_name(hp, t); - if (!strcmp(name, "cpu")) { - id = mdesc_get_property(hp, t, "id", NULL); - if (*id < NR_CPUS) - cpu_data(*id).core_id = core_id; - } else { - u64 j; + /* Since we have an estimate of recursion depth, do a sanity check. */ + if (depth == 0) + return; - mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) { - u64 n = mdesc_arc_target(hp, j); - const char *n_name; + mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) { + u64 n = mdesc_arc_target(hp, arc); + const char *name = mdesc_node_name(hp, n); - n_name = mdesc_node_name(hp, n); - if (strcmp(n_name, "cpu")) - continue; + if (!strcmp(srch_val, name)) + (*func)(hp, n, val); - id = mdesc_get_property(hp, n, "id", NULL); - if (*id < NR_CPUS) - cpu_data(*id).core_id = core_id; - } - } + find_back_node_value(hp, n, srch_val, func, val, depth-1); } } +static void __mark_core_id(struct mdesc_handle *hp, u64 node, + int core_id) +{ + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + if (*id < num_possible_cpus()) + cpu_data(*id).core_id = core_id; +} + +static void __mark_sock_id(struct mdesc_handle *hp, u64 node, + int sock_id) +{ + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + if (*id < num_possible_cpus()) + cpu_data(*id).sock_id = sock_id; +} + +static void mark_core_ids(struct mdesc_handle *hp, u64 mp, + int core_id) +{ + find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); +} + +static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, + int sock_id) +{ + find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); +} + static void set_core_ids(struct mdesc_handle *hp) { int idx; u64 mp; idx = 1; + + /* Identify unique cores by looking for cpus backpointed to by + * level 1 instruction caches. + */ mdesc_for_each_node_by_name(hp, mp, "cache") { const u64 *level; const char *type; @@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp) continue; mark_core_ids(hp, mp, idx); + idx++; + } +} + +static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) +{ + u64 mp; + int idx = 1; + int fnd = 0; + + /* Identify unique sockets by looking for cpus backpointed to by + * shared level n caches. + */ + mdesc_for_each_node_by_name(hp, mp, "cache") { + const u64 *cur_lvl; + + cur_lvl = mdesc_get_property(hp, mp, "level", NULL); + if (*cur_lvl != level) + continue; + + mark_sock_ids(hp, mp, idx); + idx++; + fnd = 1; + } + return fnd; +} + +static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp) +{ + int idx = 1; + mdesc_for_each_node_by_name(hp, mp, "socket") { + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 t = mdesc_arc_target(hp, a); + const char *name; + const u64 *id; + + name = mdesc_node_name(hp, t); + if (strcmp(name, "cpu")) + continue; + + id = mdesc_get_property(hp, t, "id", NULL); + if (*id < num_possible_cpus()) + cpu_data(*id).sock_id = idx; + } idx++; } } +static void set_sock_ids(struct mdesc_handle *hp) +{ + u64 mp; + + /* If machine description exposes sockets data use it. + * Otherwise fallback to use shared L3 or L2 caches. + */ + mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); + if (mp != MDESC_NODE_NULL) + return set_sock_ids_by_socket(hp, mp); + + if (!set_sock_ids_by_cache(hp, 3)) + set_sock_ids_by_cache(hp, 2); +} + static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) { u64 a; @@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name) continue; mark_proc_ids(hp, mp, idx); - idx++; } } @@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask) set_core_ids(hp); set_proc_ids(hp); + set_sock_ids(hp); mdesc_release(hp); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 6f7251fd2eab..c928bc64b4ba 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -1002,6 +1002,38 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); #ifdef CONFIG_SYSFS + +#define SLOT_NAME_SIZE 11 /* Max decimal digits + null in u32 */ + +static void pcie_bus_slot_names(struct pci_bus *pbus) +{ + struct pci_dev *pdev; + struct pci_bus *bus; + + list_for_each_entry(pdev, &pbus->devices, bus_list) { + char name[SLOT_NAME_SIZE]; + struct pci_slot *pci_slot; + const u32 *slot_num; + int len; + + slot_num = of_get_property(pdev->dev.of_node, + "physical-slot#", &len); + + if (slot_num == NULL || len != 4) + continue; + + snprintf(name, sizeof(name), "%u", slot_num[0]); + pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL); + + if (IS_ERR(pci_slot)) + pr_err("PCI: pci_create_slot returned %ld.\n", + PTR_ERR(pci_slot)); + } + + list_for_each_entry(bus, &pbus->children, node) + pcie_bus_slot_names(bus); +} + static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus) { const struct pci_slot_names { @@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void) while ((pbus = pci_find_next_bus(pbus)) != NULL) { struct device_node *node; + struct pci_dev *pdev; + + pdev = list_first_entry(&pbus->devices, struct pci_dev, + bus_list); - if (pbus->self) { - /* PCI->PCI bridge */ - node = pbus->self->dev.of_node; + if (pdev && pci_is_pcie(pdev)) { + pcie_bus_slot_names(pbus); } else { - struct pci_pbm_info *pbm = pbus->sysdata; - /* Host PCI controller */ - node = pbm->op->dev.of_node; - } + if (pbus->self) { + + /* PCI->PCI bridge */ + node = pbus->self->dev.of_node; + + } else { + struct pci_pbm_info *pbm = pbus->sysdata; - pci_bus_slot_names(node, pbus); + /* Host PCI controller */ + node = pbm->op->dev.of_node; + } + + pci_bus_slot_names(node, pbus); + } } return 0; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index c38d19fc27ba..f7b261749383 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start, } } +void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start, + struct sun4v_2insn_patch_entry *end) +{ + while (start < end) { + unsigned long addr = start->addr; + + *(unsigned int *) (addr + 0) = start->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = start->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + start++; + } +} + static void __init sun4v_patch(void) { extern void sun4v_hvapi_init(void); @@ -267,6 +285,9 @@ static void __init sun4v_patch(void) sun4v_patch_2insn_range(&__sun4v_2insn_patch, &__sun4v_2insn_patch_end); + if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7) + sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, + &__sun_m7_2insn_patch_end); sun4v_hvapi_init(); } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 61139d9924ca..19cd08d18672 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); +EXPORT_SYMBOL(cpu_core_sib_map); static cpumask_t smp_commenced_mask; @@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void) } } + for_each_present_cpu(i) { + unsigned int j; + + for_each_present_cpu(j) { + if (cpu_data(i).sock_id == cpu_data(j).sock_id) + cpumask_set_cpu(j, &cpu_core_sib_map[i]); + } + } + for_each_present_cpu(i) { unsigned int j; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 09243057cb0b..f1a2f688b28a 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -138,6 +138,11 @@ SECTIONS *(.pause_3insn_patch) __pause_3insn_patch_end = .; } + .sun_m7_2insn_patch : { + __sun_m7_2insn_patch = .; + *(.sun_m7_2insn_patch) + __sun_m7_2insn_patch_end = .; + } PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 4ca0d6ba5ec8..559cb744112c 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -54,6 +54,7 @@ #include "init_64.h" unsigned long kern_linear_pte_xor[4] __read_mostly; +static unsigned long page_cache4v_flag; /* A bitmap, two bits for every 256MB of physical memory. These two * bits determine what page size we use for kernel linear @@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void) static void __init sun4v_linear_pte_xor_finalize(void) { + unsigned long pagecv_flag; + + /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead + * enables MCD error. Do not set bit 9 on M7 processor. + */ + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + pagecv_flag = 0x00; + break; + default: + pagecv_flag = _PAGE_CV_4V; + break; + } #ifndef CONFIG_DEBUG_PAGEALLOC if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; @@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; @@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; @@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void) return available; } +#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) +#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) +#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) +#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) +#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) +#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) + /* We need to exclude reserved regions. This exclusion will include * vmlinux and initrd. To be more precise the initrd size could be used to * compute a new lower limit because it is freed later during initialization. @@ -2034,6 +2055,25 @@ void __init paging_init(void) memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); #endif + /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde + * bit on M7 processor. This is a conflicting usage of the same + * bit. Enabling TTE.cv on M7 would turn on Memory Corruption + * Detection error on all pages and this will lead to problems + * later. Kernel does not run with MCD enabled and hence rest + * of the required steps to fully configure memory corruption + * detection are not taken. We need to ensure TTE.mcde is not + * set on M7 processor. Compute the value of cacheability + * flag for use later taking this into consideration. + */ + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + page_cache4v_flag = _PAGE_CP_4V; + break; + default: + page_cache4v_flag = _PAGE_CACHE_4V; + break; + } + if (tlb_type == hypervisor) sun4v_pgprot_init(); else @@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) -#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) -#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) -#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) -#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) -#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) - pgprot_t PAGE_KERNEL __read_mostly; EXPORT_SYMBOL(PAGE_KERNEL); @@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, _PAGE_P_4U | _PAGE_W_4U); if (tlb_type == hypervisor) pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V); pte_base |= _PAGE_PMD_HUGE; @@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void) int i; PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | - _PAGE_CACHE_4V | _PAGE_P_4V | + page_cache4v_flag | _PAGE_P_4V | __ACCESS_BITS_4V | __DIRTY_BITS_4V | _PAGE_EXEC_4V); PAGE_KERNEL_LOCKED = PAGE_KERNEL; _PAGE_IE = _PAGE_IE_4V; _PAGE_E = _PAGE_E_4V; - _PAGE_CACHE = _PAGE_CACHE_4V; + _PAGE_CACHE = page_cache4v_flag; #ifdef CONFIG_DEBUG_PAGEALLOC kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; @@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void) kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ PAGE_OFFSET; #endif - kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V | + _PAGE_W_4V); for (i = 1; i < 4; i++) kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; @@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void) _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); - page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; - page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); - page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_EXEC_4V); - page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_EXEC_4V); page_exec_bit = _PAGE_EXEC_4V; @@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr) _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); if (tlb_type == hypervisor) val = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | + page_cache4v_flag | _PAGE_P_4V | _PAGE_EXEC_4V | _PAGE_W_4V); return val | paddr; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c3333e5be5d7..e68408facf7a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,7 +100,7 @@ config X86 select IRQ_FORCED_THREADING select HAVE_BPF_JIT if X86_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE) + select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE select ARCH_HAS_SG_CHAIN select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -341,7 +341,7 @@ config X86_FEATURE_NAMES config X86_X2APIC bool "Support x2apic" - depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP + depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) ---help--- This enables x2apic support on CPUs that have this feature. @@ -441,6 +441,7 @@ config X86_UV depends on X86_EXTENDED_PLATFORM depends on NUMA depends on X86_X2APIC + depends on PCI ---help--- This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. @@ -466,7 +467,6 @@ config X86_INTEL_CE select X86_REBOOTFIXUPS select OF select OF_EARLY_FLATTREE - select IRQ_DOMAIN ---help--- Select for the Intel CE media processor (CE4100) SOC. This option compiles in support for the CE4100 SOC for settop @@ -915,12 +915,12 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI - select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ + select IRQ_DOMAIN_HIERARCHY + select PCI_MSI_IRQ_DOMAIN if PCI_MSI config X86_IO_APIC def_bool y depends on X86_LOCAL_APIC || X86_UP_IOAPIC - select IRQ_DOMAIN config X86_REROUTE_FOR_BROKEN_BOOT_IRQS bool "Reroute for broken boot IRQs" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 72484a645f05..a5973f851750 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -332,4 +332,15 @@ config X86_DEBUG_STATIC_CPU_HAS If unsure, say N. +config PUNIT_ATOM_DEBUG + tristate "ATOM Punit debug driver" + select DEBUG_FS + select IOSF_MBI + ---help--- + This is a debug driver, which gets the power states + of all Punit North Complex devices. The power states of + each device is exposed as part of the debugfs interface. + The current power state can be read from + /sys/kernel/debug/punit_atom/dev_power_state + endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2fda005bb334..57996ee840dd 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -77,6 +77,12 @@ else KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 + # Align jump targets to 1 byte, not the default 16 bytes: + KBUILD_CFLAGS += -falign-jumps=1 + + # Pack loops tightly as well: + KBUILD_CFLAGS += -falign-loops=1 + # Don't autogenerate traditional x87 instructions KBUILD_CFLAGS += $(call cc-option,-mno-80387) KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) @@ -84,6 +90,9 @@ else # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) + # Use -mskip-rax-setup if supported. + KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 72bf2680f819..63450a596800 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -77,12 +77,6 @@ ENTRY(native_usergs_sysret32) swapgs sysretl ENDPROC(native_usergs_sysret32) - -ENTRY(native_irq_enable_sysexit) - swapgs - sti - sysexit -ENDPROC(native_irq_enable_sysexit) #endif /* @@ -119,7 +113,7 @@ ENTRY(ia32_sysenter_target) * it is too small to ever cause noticeable irq latency. */ SWAPGS_UNSAFE_STACK - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ @@ -142,7 +136,7 @@ ENTRY(ia32_sysenter_target) pushq_cfi_reg rsi /* pt_regs->si */ pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -169,8 +163,6 @@ sysenter_flags_fixed: testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz sysenter_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys sysenter_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -179,8 +171,11 @@ sysenter_do_call: movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ sysenter_dispatch: + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) movq %rax,RAX(%rsp) +1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -247,9 +242,7 @@ sysexit_from_sys_call: movl %ebx,%esi /* 2nd arg: 1st syscall arg */ movl %eax,%edi /* 1st arg: syscall number */ call __audit_syscall_entry - movl RAX(%rsp),%eax /* reload syscall number */ - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys + movl ORIG_RAX(%rsp),%eax /* reload syscall number */ movl %ebx,%edi /* reload 1st syscall arg */ movl RCX(%rsp),%esi /* reload 2nd syscall arg */ movl RDX(%rsp),%edx /* reload 3rd syscall arg */ @@ -300,13 +293,10 @@ sysenter_tracesys: #endif SAVE_EXTRA_REGS CLEAR_RREGS - movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -356,7 +346,7 @@ ENTRY(ia32_cstar_target) SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ @@ -376,7 +366,7 @@ ENTRY(ia32_cstar_target) pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rbp /* pt_regs->cx */ movl %ebp,%ecx - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -392,8 +382,6 @@ ENTRY(ia32_cstar_target) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz cstar_tracesys - cmpq $IA32_NR_syscalls-1,%rax - ja ia32_badsys cstar_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -402,8 +390,11 @@ cstar_do_call: movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ cstar_dispatch: + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) movq %rax,RAX(%rsp) +1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -457,14 +448,11 @@ cstar_tracesys: xchgl %r9d,%ebp SAVE_EXTRA_REGS CLEAR_RREGS r9 - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS xchgl %ebp,%r9d - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ jmp cstar_do_call END(ia32_cstar_target) @@ -523,7 +511,7 @@ ENTRY(ia32_syscall) pushq_cfi_reg rsi /* pt_regs->si */ pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -531,8 +519,6 @@ ENTRY(ia32_syscall) orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz ia32_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys ia32_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -540,9 +526,12 @@ ia32_do_call: xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX(%rsp) +1: ia32_ret_from_sys_call: CLEAR_RREGS jmp int_ret_from_sys_call @@ -550,23 +539,14 @@ ia32_ret_from_sys_call: ia32_tracesys: SAVE_EXTRA_REGS CLEAR_RREGS - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call + CFI_ENDPROC END(ia32_syscall) -ia32_badsys: - movq $0,ORIG_RAX(%rsp) - movq $-ENOSYS,%rax - jmp ia32_sysret - - CFI_ENDPROC - .macro PTREGSCALL label, func ALIGN GLOBAL(\label) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index bdf02eeee765..e7636bac7372 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -18,6 +18,12 @@ .endm #endif +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ .macro altinstruction_entry orig alt feature orig_len alt_len pad_len .long \orig - . .long \alt - . @@ -27,6 +33,12 @@ .byte \pad_len .endm +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ .macro ALTERNATIVE oldinstr, newinstr, feature 140: \oldinstr @@ -55,6 +67,12 @@ */ #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 140: \oldinstr diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 976b86a325e5..c8393634ca0c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -644,6 +644,12 @@ static inline void entering_ack_irq(void) entering_irq(); } +static inline void ipi_entering_ack_irq(void) +{ + ack_APIC_irq(); + irq_enter(); +} + static inline void exiting_irq(void) { irq_exit(); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7730c1c5c83a..189679aba703 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -63,6 +63,31 @@ _ASM_ALIGN ; \ _ASM_PTR (entry); \ .popsection + +.macro ALIGN_DESTINATION + /* check for bad alignment of destination */ + movl %edi,%ecx + andl $7,%ecx + jz 102f /* already aligned */ + subl $8,%ecx + negl %ecx + subl %ecx,%edx +100: movb (%rsi),%al +101: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 100b +102: + .section .fixup,"ax" +103: addl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + _ASM_EXTABLE(100b,103b) + _ASM_EXTABLE(101b,103b) + .endm + #else # define _ASM_EXTABLE(from,to) \ " .pushsection \"__ex_table\",\"a\"\n" \ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 5e5cd123fdfb..e9168955c42f 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -22,7 +22,7 @@ * * Atomically reads the value of @v. */ -static inline int atomic_read(const atomic_t *v) +static __always_inline int atomic_read(const atomic_t *v) { return ACCESS_ONCE((v)->counter); } @@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v) * * Atomically sets the value of @v to @i. */ -static inline void atomic_set(atomic_t *v, int i) +static __always_inline void atomic_set(atomic_t *v, int i) { v->counter = i; } @@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i) * * Atomically adds @i to @v. */ -static inline void atomic_add(int i, atomic_t *v) +static __always_inline void atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v) * * Atomically subtracts @i from @v. */ -static inline void atomic_sub(int i, atomic_t *v) +static __always_inline void atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline int atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } @@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) * * Atomically increments @v by 1. */ -static inline void atomic_inc(atomic_t *v) +static __always_inline void atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); @@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic_dec(atomic_t *v) +static __always_inline void atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); @@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic_dec_and_test(atomic_t *v) +static __always_inline int atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } @@ -126,7 +126,7 @@ static inline int atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic_inc_and_test(atomic_t *v) +static __always_inline int atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } @@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic_add_negative(int i, atomic_t *v) +static __always_inline int atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } @@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } @@ -164,7 +164,7 @@ static inline int atomic_add_return(int i, atomic_t *v) * * Atomically subtracts @i from @v and returns @v - @i */ -static inline int atomic_sub_return(int i, atomic_t *v) +static __always_inline int atomic_sub_return(int i, atomic_t *v) { return atomic_add_return(-i, v); } @@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) -static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); } @@ -191,7 +191,7 @@ static inline int atomic_xchg(atomic_t *v, int new) * Atomically adds @a to @v, so long as @v was not already @u. * Returns the old value of @v. */ -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); @@ -213,7 +213,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) * Atomically adds 1 to @v * Returns the new value of @u */ -static inline short int atomic_inc_short(short int *v) +static __always_inline short int atomic_inc_short(short int *v) { asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); return *v; diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index f8d273e18516..b965f9e03f2a 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -40,7 +40,7 @@ static inline void atomic64_set(atomic64_t *v, long i) * * Atomically adds @i to @v. */ -static inline void atomic64_add(long i, atomic64_t *v) +static __always_inline void atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) @@ -81,7 +81,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) * * Atomically increments @v by 1. */ -static inline void atomic64_inc(atomic64_t *v) +static __always_inline void atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) @@ -94,7 +94,7 @@ static inline void atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic64_dec(atomic64_t *v) +static __always_inline void atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) @@ -148,7 +148,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline long atomic64_add_return(long i, atomic64_t *v) +static __always_inline long atomic64_add_return(long i, atomic64_t *v) { return i + xadd(&v->counter, i); } diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index dc5fa661465f..27ca0afcccd7 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) #ifdef CONFIG_HAVE_KVM BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR, smp_kvm_posted_intr_ipi) +BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR, + smp_kvm_posted_intr_wakeup_ipi) #endif /* diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f5fb6b6567e..986606539395 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -14,6 +14,7 @@ typedef struct { #endif #ifdef CONFIG_HAVE_KVM unsigned int kvm_posted_intr_ipis; + unsigned int kvm_posted_intr_wakeup_ipis; #endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 36f7125945e3..5fa9fb0f8809 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -74,20 +74,16 @@ extern unsigned int hpet_readl(unsigned int a); extern void force_hpet_resume(void); struct irq_data; +struct hpet_dev; +struct irq_domain; + extern void hpet_msi_unmask(struct irq_data *data); extern void hpet_msi_mask(struct irq_data *data); -struct hpet_dev; extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); - -#ifdef CONFIG_PCI_MSI -extern int default_setup_hpet_msi(unsigned int irq, unsigned int id); -#else -static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id) -{ - return -EINVAL; -} -#endif +extern struct irq_domain *hpet_create_irq_domain(int hpet_id); +extern int hpet_assign_irq(struct irq_domain *domain, + struct hpet_dev *dev, int dev_num); #ifdef CONFIG_HPET_EMULATE_RTC diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e9571ddabc4f..10c80d4f8386 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -29,6 +29,7 @@ extern asmlinkage void apic_timer_interrupt(void); extern asmlinkage void x86_platform_ipi(void); extern asmlinkage void kvm_posted_intr_ipi(void); +extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); @@ -36,40 +37,6 @@ extern asmlinkage void spurious_interrupt(void); extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); -extern asmlinkage void invalidate_interrupt(void); -extern asmlinkage void invalidate_interrupt0(void); -extern asmlinkage void invalidate_interrupt1(void); -extern asmlinkage void invalidate_interrupt2(void); -extern asmlinkage void invalidate_interrupt3(void); -extern asmlinkage void invalidate_interrupt4(void); -extern asmlinkage void invalidate_interrupt5(void); -extern asmlinkage void invalidate_interrupt6(void); -extern asmlinkage void invalidate_interrupt7(void); -extern asmlinkage void invalidate_interrupt8(void); -extern asmlinkage void invalidate_interrupt9(void); -extern asmlinkage void invalidate_interrupt10(void); -extern asmlinkage void invalidate_interrupt11(void); -extern asmlinkage void invalidate_interrupt12(void); -extern asmlinkage void invalidate_interrupt13(void); -extern asmlinkage void invalidate_interrupt14(void); -extern asmlinkage void invalidate_interrupt15(void); -extern asmlinkage void invalidate_interrupt16(void); -extern asmlinkage void invalidate_interrupt17(void); -extern asmlinkage void invalidate_interrupt18(void); -extern asmlinkage void invalidate_interrupt19(void); -extern asmlinkage void invalidate_interrupt20(void); -extern asmlinkage void invalidate_interrupt21(void); -extern asmlinkage void invalidate_interrupt22(void); -extern asmlinkage void invalidate_interrupt23(void); -extern asmlinkage void invalidate_interrupt24(void); -extern asmlinkage void invalidate_interrupt25(void); -extern asmlinkage void invalidate_interrupt26(void); -extern asmlinkage void invalidate_interrupt27(void); -extern asmlinkage void invalidate_interrupt28(void); -extern asmlinkage void invalidate_interrupt29(void); -extern asmlinkage void invalidate_interrupt30(void); -extern asmlinkage void invalidate_interrupt31(void); - extern asmlinkage void irq_move_cleanup_interrupt(void); extern asmlinkage void reboot_interrupt(void); extern asmlinkage void threshold_interrupt(void); @@ -92,55 +59,87 @@ extern void trace_call_function_single_interrupt(void); #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt #define trace_reboot_interrupt reboot_interrupt #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi +#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi #endif /* CONFIG_TRACING */ -#ifdef CONFIG_IRQ_REMAP -/* Intel specific interrupt remapping information */ -struct irq_2_iommu { - struct intel_iommu *iommu; - u16 irte_index; - u16 sub_handle; - u8 irte_mask; -}; - -/* AMD specific interrupt remapping information */ -struct irq_2_irte { - u16 devid; /* Device ID for IRTE table */ - u16 index; /* Index into IRTE table*/ -}; -#endif /* CONFIG_IRQ_REMAP */ - #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; +struct pci_dev; +struct msi_desc; + +enum irq_alloc_type { + X86_IRQ_ALLOC_TYPE_IOAPIC = 1, + X86_IRQ_ALLOC_TYPE_HPET, + X86_IRQ_ALLOC_TYPE_MSI, + X86_IRQ_ALLOC_TYPE_MSIX, + X86_IRQ_ALLOC_TYPE_DMAR, + X86_IRQ_ALLOC_TYPE_UV, +}; -struct irq_cfg { - cpumask_var_t domain; - cpumask_var_t old_domain; - u8 vector; - u8 move_in_progress : 1; -#ifdef CONFIG_IRQ_REMAP - u8 remapped : 1; +struct irq_alloc_info { + enum irq_alloc_type type; + u32 flags; + const struct cpumask *mask; /* CPU mask for vector allocation */ union { - struct irq_2_iommu irq_2_iommu; - struct irq_2_irte irq_2_irte; - }; + int unused; +#ifdef CONFIG_HPET_TIMER + struct { + int hpet_id; + int hpet_index; + void *hpet_data; + }; #endif - union { -#ifdef CONFIG_X86_IO_APIC +#ifdef CONFIG_PCI_MSI struct { - struct list_head irq_2_pin; + struct pci_dev *msi_dev; + irq_hw_number_t msi_hwirq; + }; +#endif +#ifdef CONFIG_X86_IO_APIC + struct { + int ioapic_id; + int ioapic_pin; + int ioapic_node; + u32 ioapic_trigger : 1; + u32 ioapic_polarity : 1; + u32 ioapic_valid : 1; + struct IO_APIC_route_entry *ioapic_entry; + }; +#endif +#ifdef CONFIG_DMAR_TABLE + struct { + int dmar_id; + void *dmar_data; + }; +#endif +#ifdef CONFIG_HT_IRQ + struct { + int ht_pos; + int ht_idx; + struct pci_dev *ht_dev; + void *ht_update; + }; +#endif +#ifdef CONFIG_X86_UV + struct { + int uv_limit; + int uv_blade; + unsigned long uv_offset; + char *uv_name; }; #endif }; }; +struct irq_cfg { + unsigned int dest_apicid; + u8 vector; +}; + extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); -extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); extern void lock_vector_lock(void); extern void unlock_vector_lock(void); -extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); -extern void clear_irq_vector(int irq, struct irq_cfg *cfg); extern void setup_vector_irq(int cpu); #ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); @@ -150,10 +149,7 @@ static inline void send_cleanup_vector(struct irq_cfg *c) { } static inline void irq_complete_move(struct irq_cfg *c) { } #endif -extern int apic_retrigger_irq(struct irq_data *data); extern void apic_ack_edge(struct irq_data *data); -extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id); #else /* CONFIG_X86_LOCAL_APIC */ static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} @@ -163,8 +159,7 @@ static inline void unlock_vector_lock(void) {} extern atomic_t irq_err_count; extern atomic_t irq_mis_count; -/* EISA */ -extern void eisa_set_level_irq(unsigned int irq); +extern void elcr_set_level_irq(unsigned int irq); /* SMP */ extern __visible void smp_apic_timer_interrupt(struct pt_regs *); @@ -178,7 +173,6 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void); extern __visible void smp_reschedule_interrupt(struct pt_regs *); extern __visible void smp_call_function_interrupt(struct pt_regs *); extern __visible void smp_call_function_single_interrupt(struct pt_regs *); -extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif extern char irq_entries_start[]; diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 34a5b93704d3..a2b97404922d 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -177,6 +177,7 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) * look at pci_iomap(). */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); @@ -338,6 +339,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, #define IO_SPACE_LIMIT 0xffff #ifdef CONFIG_MTRR +extern int __must_check arch_phys_wc_index(int handle); +#define arch_phys_wc_index arch_phys_wc_index + extern int __must_check arch_phys_wc_add(unsigned long base, unsigned long size); extern void arch_phys_wc_del(int handle); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 2f91685fe1cd..6cbf2cfb3f8a 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -95,9 +95,22 @@ struct IR_IO_APIC_route_entry { index : 15; } __attribute__ ((packed)); -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 +struct irq_alloc_info; +struct ioapic_domain_cfg; + +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#define IOAPIC_MASKED 1 +#define IOAPIC_UNMASKED 0 + +#define IOAPIC_POL_HIGH 0 +#define IOAPIC_POL_LOW 1 + +#define IOAPIC_DEST_MODE_PHYSICAL 0 +#define IOAPIC_DEST_MODE_LOGICAL 1 + #define IOAPIC_MAP_ALLOC 0x1 #define IOAPIC_MAP_CHECK 0x2 @@ -110,9 +123,6 @@ extern int nr_ioapics; extern int mpc_ioapic_id(int ioapic); extern unsigned int mpc_ioapic_addr(int ioapic); -extern struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic); - -#define MP_MAX_IOAPIC_PIN 127 /* # of MP IRQ source entries */ extern int mp_irq_entries; @@ -120,9 +130,6 @@ extern int mp_irq_entries; /* MP IRQ source entries */ extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; -/* Older SiS APIC requires we rewrite the index register */ -extern int sis_apic_bug; - /* 1 if "noapic" boot option passed */ extern int skip_ioapic_setup; @@ -132,6 +139,8 @@ extern int noioapicquirk; /* -1 if "noapic" boot option passed */ extern int noioapicreroute; +extern u32 gsi_top; + extern unsigned long io_apic_irqs; #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs)) @@ -147,13 +156,6 @@ struct irq_cfg; extern void ioapic_insert_resources(void); extern int arch_early_ioapic_init(void); -extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, - unsigned int, int, - struct io_apic_irq_attr *); -extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); - -extern void native_eoi_ioapic_pin(int apic, int pin, int vector); - extern int save_ioapic_entries(void); extern void mask_ioapic_entries(void); extern int restore_ioapic_entries(void); @@ -161,82 +163,32 @@ extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); extern void setup_ioapic_ids_from_mpc_nocheck(void); -struct io_apic_irq_attr { - int ioapic; - int ioapic_pin; - int trigger; - int polarity; -}; - -enum ioapic_domain_type { - IOAPIC_DOMAIN_INVALID, - IOAPIC_DOMAIN_LEGACY, - IOAPIC_DOMAIN_STRICT, - IOAPIC_DOMAIN_DYNAMIC, -}; - -struct device_node; -struct irq_domain; -struct irq_domain_ops; - -struct ioapic_domain_cfg { - enum ioapic_domain_type type; - const struct irq_domain_ops *ops; - struct device_node *dev; -}; - -struct mp_ioapic_gsi{ - u32 gsi_base; - u32 gsi_end; -}; -extern u32 gsi_top; - extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); -extern u32 mp_pin_to_gsi(int ioapic, int pin); -extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); +extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info); extern void mp_unmap_irq(int irq); extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg); extern int mp_unregister_ioapic(u32 gsi_base); extern int mp_ioapic_registered(u32 gsi_base); -extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq); -extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); -extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node); -extern void __init pre_init_apic_IRQ0(void); + +extern void ioapic_set_alloc_attr(struct irq_alloc_info *info, + int node, int trigger, int polarity); extern void mp_save_irq(struct mpc_intsrc *m); extern void disable_ioapic_support(void); -extern void __init native_io_apic_init_mappings(void); +extern void __init io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); -extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); -extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); extern void native_disable_io_apic(void); -extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); -extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); -extern int native_ioapic_set_affinity(struct irq_data *, - const struct cpumask *, - bool); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { return x86_io_apic_ops.read(apic, reg); } -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - x86_io_apic_ops.write(apic, reg, value); -} -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - x86_io_apic_ops.modify(apic, reg, value); -} - -extern void io_apic_eoi(unsigned int apic, unsigned int vector); - extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); @@ -253,8 +205,12 @@ static inline int arch_early_ioapic_init(void) { return 0; } static inline void print_IO_APICs(void) {} #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } -static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } -static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } +static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info) +{ + return gsi; +} + static inline void mp_unmap_irq(int irq) { } static inline int save_ioapic_entries(void) @@ -268,17 +224,11 @@ static inline int restore_ioapic_entries(void) return -ENOMEM; } -static inline void mp_save_irq(struct mpc_intsrc *m) { }; +static inline void mp_save_irq(struct mpc_intsrc *m) { } static inline void disable_ioapic_support(void) { } -#define native_io_apic_init_mappings NULL +static inline void io_apic_init_mappings(void) { } #define native_io_apic_read NULL -#define native_io_apic_write NULL -#define native_io_apic_modify NULL #define native_disable_io_apic NULL -#define native_io_apic_print_entries NULL -#define native_ioapic_set_affinity NULL -#define native_setup_ioapic_entry NULL -#define native_eoi_ioapic_pin NULL static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index a80cbb88ea91..8008d06581c7 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -30,6 +30,10 @@ extern void fixup_irqs(void); extern void irq_force_complete_move(int); #endif +#ifdef CONFIG_HAVE_KVM +extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); +#endif + extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 6224d316c405..78974fbc33b4 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -22,14 +22,12 @@ #ifndef __X86_IRQ_REMAPPING_H #define __X86_IRQ_REMAPPING_H +#include <asm/irqdomain.h> +#include <asm/hw_irq.h> #include <asm/io_apic.h> -struct IO_APIC_route_entry; -struct io_apic_irq_attr; -struct irq_chip; struct msi_msg; -struct pci_dev; -struct irq_cfg; +struct irq_alloc_info; #ifdef CONFIG_IRQ_REMAP @@ -39,22 +37,21 @@ extern int irq_remapping_enable(void); extern void irq_remapping_disable(void); extern int irq_remapping_reenable(int); extern int irq_remap_enable_fault_handling(void); -extern int setup_ioapic_remapped_entry(int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, - int vector, - struct io_apic_irq_attr *attr); -extern void free_remapped_irq(int irq); -extern void compose_remapped_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id); -extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); extern void panic_if_irq_remap(const char *msg); -extern bool setup_remapped_irq(int irq, - struct irq_cfg *cfg, - struct irq_chip *chip); -void irq_remap_modify_chip_defaults(struct irq_chip *chip); +extern struct irq_domain * +irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info); +extern struct irq_domain * +irq_remapping_get_irq_domain(struct irq_alloc_info *info); + +/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ +extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent); + +/* Get parent irqdomain for interrupt remapping irqdomain */ +static inline struct irq_domain *arch_get_ir_parent_domain(void) +{ + return x86_vector_domain; +} #else /* CONFIG_IRQ_REMAP */ @@ -64,42 +61,22 @@ static inline int irq_remapping_enable(void) { return -ENODEV; } static inline void irq_remapping_disable(void) { } static inline int irq_remapping_reenable(int eim) { return -ENODEV; } static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; } -static inline int setup_ioapic_remapped_entry(int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, - int vector, - struct io_apic_irq_attr *attr) -{ - return -ENODEV; -} -static inline void free_remapped_irq(int irq) { } -static inline void compose_remapped_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) -{ -} -static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) -{ - return -ENODEV; -} static inline void panic_if_irq_remap(const char *msg) { } -static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +static inline struct irq_domain * +irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info) { + return NULL; } -static inline bool setup_remapped_irq(int irq, - struct irq_cfg *cfg, - struct irq_chip *chip) +static inline struct irq_domain * +irq_remapping_get_irq_domain(struct irq_alloc_info *info) { - return false; + return NULL; } -#endif /* CONFIG_IRQ_REMAP */ - -#define dmar_alloc_hwirq() irq_alloc_hwirq(-1) -#define dmar_free_hwirq irq_free_hwirq +#endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 666c89ec4bd7..0ed29ac13a9d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -47,31 +47,12 @@ #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR #define IA32_SYSCALL_VECTOR 0x80 -#ifdef CONFIG_X86_32 -# define SYSCALL_VECTOR 0x80 -#endif /* * Vectors 0x30-0x3f are used for ISA interrupts. * round up to the next 16-vector boundary */ -#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) - -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) +#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq) /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff @@ -105,6 +86,7 @@ /* Vector for KVM to deliver posted interrupt IPI */ #ifdef CONFIG_HAVE_KVM #define POSTED_INTR_VECTOR 0xf2 +#define POSTED_INTR_WAKEUP_VECTOR 0xf1 #endif /* @@ -155,18 +137,22 @@ static inline int invalid_vm86_irq(int irq) * static arrays. */ -#define NR_IRQS_LEGACY 16 +#define NR_IRQS_LEGACY 16 -#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) +#define CPU_VECTOR_LIMIT (64 * NR_CPUS) +#define IO_APIC_VECTOR_LIMIT (32 * MAX_IO_APICS) -#ifdef CONFIG_X86_IO_APIC -# define CPU_VECTOR_LIMIT (64 * NR_CPUS) -# define NR_IRQS \ +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_PCI_MSI) +#define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) -#else /* !CONFIG_X86_IO_APIC: */ -# define NR_IRQS NR_IRQS_LEGACY +#elif defined(CONFIG_X86_IO_APIC) +#define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +#elif defined(CONFIG_PCI_MSI) +#define NR_IRQS (NR_VECTORS + CPU_VECTOR_LIMIT) +#else +#define NR_IRQS NR_IRQS_LEGACY #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h new file mode 100644 index 000000000000..d26075b52885 --- /dev/null +++ b/arch/x86/include/asm/irqdomain.h @@ -0,0 +1,63 @@ +#ifndef _ASM_IRQDOMAIN_H +#define _ASM_IRQDOMAIN_H + +#include <linux/irqdomain.h> +#include <asm/hw_irq.h> + +#ifdef CONFIG_X86_LOCAL_APIC +enum { + /* Allocate contiguous CPU vectors */ + X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, +}; + +extern struct irq_domain *x86_vector_domain; + +extern void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask); +extern void copy_irq_alloc_info(struct irq_alloc_info *dst, + struct irq_alloc_info *src); +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC +struct device_node; +struct irq_data; + +enum ioapic_domain_type { + IOAPIC_DOMAIN_INVALID, + IOAPIC_DOMAIN_LEGACY, + IOAPIC_DOMAIN_STRICT, + IOAPIC_DOMAIN_DYNAMIC, +}; + +struct ioapic_domain_cfg { + enum ioapic_domain_type type; + const struct irq_domain_ops *ops; + struct device_node *dev; +}; + +extern const struct irq_domain_ops mp_ioapic_irqdomain_ops; + +extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg); +extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +extern void mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data); +extern void mp_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data); +extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); +#endif /* CONFIG_X86_IO_APIC */ + +#ifdef CONFIG_PCI_MSI +extern void arch_init_msi_domain(struct irq_domain *domain); +#else +static inline void arch_init_msi_domain(struct irq_domain *domain) { } +#endif + +#ifdef CONFIG_HT_IRQ +extern void arch_init_htirq_domain(struct irq_domain *domain); +#else +static inline void arch_init_htirq_domain(struct irq_domain *domain) { } +#endif + +#endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dea2e7e962e3..f4a555beef19 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -207,6 +207,7 @@ union kvm_mmu_page_role { unsigned nxe:1; unsigned cr0_wp:1; unsigned smep_andnot_wp:1; + unsigned smap_andnot_wp:1; }; }; @@ -400,6 +401,7 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_header_cache; struct fpu guest_fpu; + bool eager_fpu; u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; @@ -743,6 +745,7 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + void (*fpu_activate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h new file mode 100644 index 000000000000..93724cc62177 --- /dev/null +++ b/arch/x86/include/asm/msi.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_MSI_H +#define _ASM_X86_MSI_H +#include <asm/hw_irq.h> + +typedef struct irq_alloc_info msi_alloc_info_t; + +#endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index f768f6298419..b94f6f64e23d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -31,7 +31,7 @@ * arch_phys_wc_add and arch_phys_wc_del. */ # ifdef CONFIG_MTRR -extern u8 mtrr_type_lookup(u64 addr, u64 end); +extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform); extern void mtrr_save_fixed_ranges(void *); extern void mtrr_save_state(void); extern int mtrr_add(unsigned long base, unsigned long size, @@ -48,14 +48,13 @@ extern void mtrr_aps_init(void); extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); -extern int phys_wc_to_mtrr_index(int handle); # else -static inline u8 mtrr_type_lookup(u64 addr, u64 end) +static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { /* * Return no-MTRRs: */ - return 0xff; + return MTRR_TYPE_INVALID; } #define mtrr_save_fixed_ranges(arg) do {} while (0) #define mtrr_save_state() do {} while (0) @@ -84,10 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { } -static inline int phys_wc_to_mtrr_index(int handle) -{ - return -1; -} #define mtrr_ap_init() do {} while (0) #define mtrr_bp_init() do {} while (0) @@ -127,4 +122,8 @@ struct mtrr_gentry32 { _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) #endif /* CONFIG_COMPAT */ +/* Bit fields for enabled in struct mtrr_state_type */ +#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01 +#define MTRR_STATE_MTRR_ENABLED 0x02 + #endif /* _ASM_X86_MTRR_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index f7b0b5c112f2..344c646e7f06 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -160,13 +160,14 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); unsigned long long (*read_tscp)(unsigned int *aux); +#ifdef CONFIG_X86_32 /* * Atomically enable interrupts and return to userspace. This - * is only ever used to return to 32-bit processes; in a - * 64-bit kernel, it's used for 32-on-64 compat processes, but - * never native 64-bit processes. (Jump, not call.) + * is only used in 32-bit kernels. 64-bit kernels use + * usergs_sysret32 instead. */ void (*irq_enable_sysexit)(void); +#endif /* * Switch to usermode gs and return to 64-bit usermode using diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 91bc4ba95f91..cdcff7f7f694 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -4,12 +4,7 @@ #include <linux/types.h> #include <asm/pgtable_types.h> -#ifdef CONFIG_X86_PAT -extern int pat_enabled; -#else -static const int pat_enabled; -#endif - +bool pat_enabled(void); extern void pat_init(void); void pat_init_cache_modes(void); diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 4e370a5d8117..d8c80ff32e8c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -96,15 +96,10 @@ extern void pci_iommu_alloc(void); #ifdef CONFIG_PCI_MSI /* implemented in arch/x86/kernel/apic/io_apic. */ struct msi_desc; -void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, - unsigned int dest, struct msi_msg *msg, u8 hpet_id); int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev); -int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset); #else -#define native_compose_msi_msg NULL #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index aeb4666e0c0a..2270e41b32fd 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -215,6 +215,44 @@ static inline void clwb(volatile void *__p) : [pax] "a" (p)); } +/** + * pcommit_sfence() - persistent commit and fence + * + * The PCOMMIT instruction ensures that data that has been flushed from the + * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to + * memory and is durable on the DIMM. The primary use case for this is + * persistent memory. + * + * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT + * with appropriate fencing. + * + * Example: + * void flush_and_commit_buffer(void *vaddr, unsigned int size) + * { + * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; + * void *vend = vaddr + size; + * void *p; + * + * for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + * p < vend; p += boot_cpu_data.x86_clflush_size) + * clwb(p); + * + * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes + * // MFENCE via mb() also works + * wmb(); + * + * // PCOMMIT and the required SFENCE for ordering + * pcommit_sfence(); + * } + * + * After this function completes the data pointed to by 'vaddr' has been + * accepted to memory and will be durable if the 'vaddr' points to persistent + * memory. + * + * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify + * things we include both the PCOMMIT and the required SFENCE in the + * alternatives generated by pcommit_sfence(). + */ static inline void pcommit_sfence(void) { alternative(ASM_NOP7, diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b4bdec3e9523..225ee545e1a0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -177,8 +177,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -DECLARE_PER_CPU(unsigned long, kernel_stack); - static inline struct thread_info *current_thread_info(void) { return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); @@ -197,9 +195,13 @@ static inline unsigned long current_stack_pointer(void) #else /* !__ASSEMBLY__ */ +#ifdef CONFIG_X86_64 +# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) +#endif + /* Load thread_info address into "reg" */ #define GET_THREAD_INFO(reg) \ - _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ _ASM_SUB $(THREAD_SIZE),reg ; /* diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 3c03a5de64d3..0ed5504c6060 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -59,6 +59,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4); return ret; + case 8: + __put_user_size(*(u64 *)from, (u64 __user *)to, + 8, ret, 8); + return ret; } } return __copy_to_user_ll(to, from, n); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index f58a9c7a3c86..48d34d28f5a6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -171,38 +171,17 @@ struct x86_platform_ops { }; struct pci_dev; -struct msi_msg; struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); - void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq, - unsigned int dest, struct msi_msg *msg, - u8 hpet_id); void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev); - int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; -struct IO_APIC_route_entry; -struct io_apic_irq_attr; -struct irq_data; -struct cpumask; - struct x86_io_apic_ops { - void (*init) (void); unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*write) (unsigned int apic, unsigned int reg, unsigned int value); - void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); - void (*print_entries)(unsigned int apic, unsigned int nr_entries); - int (*set_affinity)(struct irq_data *data, - const struct cpumask *mask, - bool force); - int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr); - void (*eoi_ioapic_pin)(int apic, int pin, int vector); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c469490db4a8..3c6bb342a48f 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -140,6 +140,7 @@ #define MSR_CORE_C3_RESIDENCY 0x000003fc #define MSR_CORE_C6_RESIDENCY 0x000003fd #define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff #define MSR_PKG_C2_RESIDENCY 0x0000060d #define MSR_PKG_C8_RESIDENCY 0x00000630 #define MSR_PKG_C9_RESIDENCY 0x00000631 diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h index d0acb658c8f4..7528dcf59691 100644 --- a/arch/x86/include/uapi/asm/mtrr.h +++ b/arch/x86/include/uapi/asm/mtrr.h @@ -103,7 +103,7 @@ struct mtrr_state_type { #define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry) #define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry) -/* These are the region types */ +/* MTRR memory types, which are defined in SDM */ #define MTRR_TYPE_UNCACHABLE 0 #define MTRR_TYPE_WRCOMB 1 /*#define MTRR_TYPE_ 2*/ @@ -113,5 +113,11 @@ struct mtrr_state_type { #define MTRR_TYPE_WRBACK 6 #define MTRR_NUM_TYPES 7 +/* + * Invalid MTRR memory type. mtrr_type_lookup() returns this value when + * MTRRs are disabled. Note, this value is allocated from the reserved + * values (0x7-0xff) of the MTRR memory types. + */ +#define MTRR_TYPE_INVALID 0xff #endif /* _UAPI_ASM_X86_MTRR_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dbe76a14c3c9..e49ee24da85e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -31,12 +31,12 @@ #include <linux/module.h> #include <linux/dmi.h> #include <linux/irq.h> -#include <linux/irqdomain.h> #include <linux/slab.h> #include <linux/bootmem.h> #include <linux/ioport.h> #include <linux/pci.h> +#include <asm/irqdomain.h> #include <asm/pci_x86.h> #include <asm/pgtable.h> #include <asm/io_apic.h> @@ -400,57 +400,13 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } -static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, - int polarity) -{ - int irq, node; - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; - - trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; - polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; - node = dev ? dev_to_node(dev) : NUMA_NO_NODE; - if (mp_set_gsi_attr(gsi, trigger, polarity, node)) { - pr_warn("Failed to set pin attr for GSI%d\n", gsi); - return -1; - } - - irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); - if (irq < 0) - return irq; - - /* Don't set up the ACPI SCI because it's already set up */ - if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi) - mp_config_acpi_gsi(dev, gsi, trigger, polarity); - - return irq; -} - -static void mp_unregister_gsi(u32 gsi) -{ - int irq; - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return; - - irq = mp_map_gsi_to_irq(gsi, 0); - if (irq > 0) - mp_unmap_irq(irq); -} - -static struct irq_domain_ops acpi_irqdomain_ops = { - .map = mp_irqdomain_map, - .unmap = mp_irqdomain_unmap, -}; - static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_DYNAMIC, - .ops = &acpi_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; ioapic = (struct acpi_madt_io_apic *)header; @@ -652,7 +608,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, * Make sure all (legacy) PCI IRQs are set as level-triggered. */ if (trigger == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); + elcr_set_level_irq(gsi); #endif return gsi; @@ -663,10 +619,21 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int trigger, int polarity) { int irq = gsi; - #ifdef CONFIG_X86_IO_APIC + int node; + struct irq_alloc_info info; + + node = dev ? dev_to_node(dev) : NUMA_NO_NODE; + trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; + polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; + ioapic_set_alloc_attr(&info, node, trigger, polarity); + mutex_lock(&acpi_ioapic_lock); - irq = mp_register_gsi(dev, gsi, trigger, polarity); + irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info); + /* Don't set up the ACPI SCI because it's already set up */ + if (irq >= 0 && enable_update_mptable && + acpi_gbl_FADT.sci_interrupt != gsi) + mp_config_acpi_gsi(dev, gsi, trigger, polarity); mutex_unlock(&acpi_ioapic_lock); #endif @@ -676,8 +643,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, static void acpi_unregister_gsi_ioapic(u32 gsi) { #ifdef CONFIG_X86_IO_APIC + int irq; + mutex_lock(&acpi_ioapic_lock); - mp_unregister_gsi(gsi); + irq = mp_map_gsi_to_irq(gsi, 0, NULL); + if (irq > 0) + mp_unmap_irq(irq); mutex_unlock(&acpi_ioapic_lock); #endif } @@ -786,7 +757,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) u64 addr; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_DYNAMIC, - .ops = &acpi_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr); diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index ae693b51ed8e..8c35df468104 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -62,7 +62,7 @@ ENTRY(do_suspend_lowlevel) pushfq popq pt_regs_flags(%rax) - movq $resume_point, saved_rip(%rip) + movq $.Lresume_point, saved_rip(%rip) movq %rsp, saved_rsp movq %rbp, saved_rbp @@ -75,10 +75,10 @@ ENTRY(do_suspend_lowlevel) xorl %eax, %eax call x86_acpi_enter_sleep_state /* in case something went wrong, restore the machine status and go on */ - jmp resume_point + jmp .Lresume_point .align 4 -resume_point: +.Lresume_point: /* We don't restore %rax, it must be 0 anyway */ movq $saved_context, %rax movq saved_context_cr4(%rax), %rbx diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef653193160..b0932c4341b3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -227,6 +227,15 @@ void __init arch_init_ideal_nops(void) #endif } break; + + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 > 0xf) { + ideal_nops = p6_nops; + return; + } + + /* fall through */ + default: #ifdef CONFIG_X86_64 ideal_nops = k8_nops; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 6a7c23ff21d3..ede92c3364d3 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -171,10 +171,6 @@ static int __init apbt_clockevent_register(void) static void apbt_setup_irq(struct apbt_dev *adev) { - /* timer0 irq has been setup early */ - if (adev->irq == 0) - return; - irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); } diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index 816f36e979ad..ae50d3454d78 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu <jiang.liu@linux.intel.com> + * Add support of hierarchical irqdomain * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,78 +16,112 @@ #include <linux/device.h> #include <linux/pci.h> #include <linux/htirq.h> +#include <asm/irqdomain.h> #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/hypertransport.h> +static struct irq_domain *htirq_domain; + /* * Hypertransport interrupt support */ -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - static int ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest; + struct irq_data *parent = data->parent_data; int ret; - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - target_ht_irq(data->irq, dest, cfg->vector); - return IRQ_SET_MASK_OK_NOCOPY; + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0) { + struct ht_irq_msg msg; + struct irq_cfg *cfg = irqd_cfg(data); + + fetch_ht_irq_msg(data->irq, &msg); + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | + HT_IRQ_LOW_DEST_ID_MASK); + msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) | + HT_IRQ_LOW_DEST_ID(cfg->dest_apicid); + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); + write_ht_irq_msg(data->irq, &msg); + } + + return ret; } static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .irq_mask = mask_ht_irq, .irq_unmask = unmask_ht_irq, - .irq_ack = apic_ack_edge, + .irq_ack = irq_chip_ack_parent, .irq_set_affinity = ht_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - struct irq_cfg *cfg; - struct ht_irq_msg msg; - unsigned dest; - int err; + struct ht_irq_cfg *ht_cfg; + struct irq_alloc_info *info = arg; + struct pci_dev *dev; + irq_hw_number_t hwirq; + int ret; - if (disable_apic) - return -ENXIO; + if (nr_irqs > 1 || !info) + return -EINVAL; - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; + dev = info->ht_dev; + hwirq = (info->ht_idx & 0xFF) | + PCI_DEVID(dev->bus->number, dev->devfn) << 8 | + (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24; + if (irq_find_mapping(domain, hwirq) > 0) + return -EEXIST; - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; + ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL); + if (!ht_cfg) + return -ENOMEM; - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); + if (ret < 0) { + kfree(ht_cfg); + return ret; + } + + /* Initialize msg to a value that will never match the first write. */ + ht_cfg->msg.address_lo = 0xffffffff; + ht_cfg->msg.address_hi = 0xffffffff; + ht_cfg->dev = info->ht_dev; + ht_cfg->update = info->ht_update; + ht_cfg->pos = info->ht_pos; + ht_cfg->idx = 0x10 + (info->ht_idx * 2); + irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg, + handle_edge_irq, ht_cfg, "edge"); + + return 0; +} + +static void htirq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + + BUG_ON(nr_irqs != 1); + kfree(irq_data->chip_data); + irq_domain_free_irqs_top(domain, virq, nr_irqs); +} +static void htirq_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct ht_irq_msg msg; + struct irq_cfg *cfg = irqd_cfg(irq_data); + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); msg.address_lo = HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) | HT_IRQ_LOW_VECTOR(cfg->vector) | ((apic->irq_dest_mode == 0) ? HT_IRQ_LOW_DM_PHYSICAL : @@ -95,13 +131,56 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; + write_ht_irq_msg(irq_data->irq, &msg); +} - write_ht_irq_msg(irq, &msg); +static void htirq_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct ht_irq_msg msg; - irq_set_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); + memset(&msg, 0, sizeof(msg)); + write_ht_irq_msg(irq_data->irq, &msg); +} - dev_dbg(&dev->dev, "irq %d for HT\n", irq); +static const struct irq_domain_ops htirq_domain_ops = { + .alloc = htirq_domain_alloc, + .free = htirq_domain_free, + .activate = htirq_domain_activate, + .deactivate = htirq_domain_deactivate, +}; - return 0; +void arch_init_htirq_domain(struct irq_domain *parent) +{ + if (disable_apic) + return; + + htirq_domain = irq_domain_add_tree(NULL, &htirq_domain_ops, NULL); + if (!htirq_domain) + pr_warn("failed to initialize irqdomain for HTIRQ.\n"); + else + htirq_domain->parent = parent; +} + +int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, + ht_irq_update_t *update) +{ + struct irq_alloc_info info; + + if (!htirq_domain) + return -ENOSYS; + + init_irq_alloc_info(&info, NULL); + info.ht_idx = idx; + info.ht_pos = pos; + info.ht_dev = dev; + info.ht_update = update; + + return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev), + &info); +} + +void arch_teardown_ht_irq(unsigned int irq) +{ + irq_domain_free_irqs(irq, 1); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f4dc2462a1ac..845dc0df2002 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -18,6 +18,16 @@ * and Rolf G. Tews * for testing these extensively * Paul Diefenbaugh : Added full ACPI support + * + * Historical information which is worth to be preserved: + * + * - SiS APIC rmw bug: + * + * We used to have a workaround for a bug in SiS chips which + * required to rewrite the index register for a read-modify-write + * operation as the chip lost the index information which was + * setup for the read already. We cache the data now, so that + * workaround has been removed. */ #include <linux/mm.h> @@ -31,13 +41,13 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/syscore_ops.h> -#include <linux/irqdomain.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/jiffies.h> /* time_after() */ #include <linux/slab.h> #include <linux/bootmem.h> +#include <asm/irqdomain.h> #include <asm/idle.h> #include <asm/io.h> #include <asm/smp.h> @@ -63,27 +73,31 @@ #define for_each_ioapic_pin(idx, pin) \ for_each_ioapic((idx)) \ for_each_pin((idx), (pin)) - #define for_each_irq_pin(entry, head) \ list_for_each_entry(entry, &head, list) -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - static DEFINE_RAW_SPINLOCK(ioapic_lock); static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; static int ioapic_initialized; -struct mp_pin_info { +struct irq_pin_list { + struct list_head list; + int apic, pin; +}; + +struct mp_chip_data { + struct list_head irq_2_pin; + struct IO_APIC_route_entry entry; int trigger; int polarity; - int node; - int set; u32 count; + bool isa_irq; +}; + +struct mp_ioapic_gsi { + u32 gsi_base; + u32 gsi_end; }; static struct ioapic { @@ -101,7 +115,6 @@ static struct ioapic { struct mp_ioapic_gsi gsi_config; struct ioapic_domain_cfg irqdomain_cfg; struct irq_domain *irqdomain; - struct mp_pin_info *pin_info; struct resource *iomem_res; } ioapics[MAX_IO_APICS]; @@ -117,7 +130,7 @@ unsigned int mpc_ioapic_addr(int ioapic_idx) return ioapics[ioapic_idx].mp_config.apicaddr; } -struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) +static inline struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) { return &ioapics[ioapic_idx].gsi_config; } @@ -129,11 +142,16 @@ static inline int mp_ioapic_pin_count(int ioapic) return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; } -u32 mp_pin_to_gsi(int ioapic, int pin) +static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin; } +static inline bool mp_is_legacy_irq(int irq) +{ + return irq >= 0 && irq < nr_legacy_irqs(); +} + /* * Initialize all legacy IRQs and all pins on the first IOAPIC * if we have legacy interrupt controller. Kernel boot option "pirq=" @@ -144,12 +162,7 @@ static inline int mp_init_irq_at_boot(int ioapic, int irq) if (!nr_legacy_irqs()) return 0; - return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs()); -} - -static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin) -{ - return ioapics[ioapic_idx].pin_info + pin; + return ioapic == 0 || mp_is_legacy_irq(irq); } static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic) @@ -216,16 +229,6 @@ void mp_save_irq(struct mpc_intsrc *m) panic("Max # of irq sources exceeded!!\n"); } -struct irq_pin_list { - struct list_head list; - int apic, pin; -}; - -static struct irq_pin_list *alloc_irq_pin_list(int node) -{ - return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); -} - static void alloc_ioapic_saved_registers(int idx) { size_t size; @@ -247,8 +250,7 @@ static void free_ioapic_saved_registers(int idx) int __init arch_early_ioapic_init(void) { - struct irq_cfg *cfg; - int i, node = cpu_to_node(0); + int i; if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; @@ -256,16 +258,6 @@ int __init arch_early_ioapic_init(void) for_each_ioapic(i) alloc_ioapic_saved_registers(i); - /* - * For legacy IRQ's, start with assigning irq0 to irq15 to - * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. - */ - for (i = 0; i < nr_legacy_irqs(); i++) { - cfg = alloc_irq_and_cfg_at(i, node); - cfg->vector = IRQ0_VECTOR + i; - cpumask_setall(cfg->domain); - } - return 0; } @@ -283,7 +275,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mpc_ioapic_addr(idx) & ~PAGE_MASK); } -void io_apic_eoi(unsigned int apic, unsigned int vector) +static inline void io_apic_eoi(unsigned int apic, unsigned int vector) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(vector, &io_apic->eoi); @@ -296,7 +288,8 @@ unsigned int native_io_apic_read(unsigned int apic, unsigned int reg) return readl(&io_apic->data); } -void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +static void io_apic_write(unsigned int apic, unsigned int reg, + unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); @@ -304,21 +297,6 @@ void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int valu writel(value, &io_apic->data); } -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - union entry_union { struct { u32 w1, w2; }; struct IO_APIC_route_entry entry; @@ -378,7 +356,7 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) static void ioapic_mask_entry(int apic, int pin) { unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; + union entry_union eu = { .entry.mask = IOAPIC_MASKED }; raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2*pin, eu.w1); @@ -391,16 +369,17 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static int __add_pin_to_irq_node(struct mp_chip_data *data, + int node, int apic, int pin) { struct irq_pin_list *entry; /* don't allow duplicates */ - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) if (entry->apic == apic && entry->pin == pin) return 0; - entry = alloc_irq_pin_list(node); + entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node); if (!entry) { pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", node, apic, pin); @@ -408,16 +387,16 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi } entry->apic = apic; entry->pin = pin; + list_add_tail(&entry->list, &data->irq_2_pin); - list_add_tail(&entry->list, &cfg->irq_2_pin); return 0; } -static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) +static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin) { struct irq_pin_list *tmp, *entry; - list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list) + list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) if (entry->apic == apic && entry->pin == pin) { list_del(&entry->list); kfree(entry); @@ -425,22 +404,23 @@ static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) } } -static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static void add_pin_to_irq_node(struct mp_chip_data *data, + int node, int apic, int pin) { - if (__add_pin_to_irq_node(cfg, node, apic, pin)) + if (__add_pin_to_irq_node(data, node, apic, pin)) panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); } /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, +static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, int oldapic, int oldpin, int newapic, int newpin) { struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, data->irq_2_pin) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; @@ -450,32 +430,26 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, } /* old apic/pin didn't exist, so just add new ones */ - add_pin_to_irq_node(cfg, node, newapic, newpin); -} - -static void __io_apic_modify_irq(struct irq_pin_list *entry, - int mask_and, int mask_or, - void (*final)(struct irq_pin_list *entry)) -{ - unsigned int reg, pin; - - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); + add_pin_to_irq_node(data, node, newapic, newpin); } -static void io_apic_modify_irq(struct irq_cfg *cfg, +static void io_apic_modify_irq(struct mp_chip_data *data, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { + union entry_union eu; struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) - __io_apic_modify_irq(entry, mask_and, mask_or, final); + eu.entry = data->entry; + eu.w1 &= mask_and; + eu.w1 |= mask_or; + data->entry = eu.entry; + + for_each_irq_pin(entry, data->irq_2_pin) { + io_apic_write(entry->apic, 0x10 + 2 * entry->pin, eu.w1); + if (final) + final(entry); + } } static void io_apic_sync(struct irq_pin_list *entry) @@ -490,39 +464,31 @@ static void io_apic_sync(struct irq_pin_list *entry) readl(&io_apic->data); } -static void mask_ioapic(struct irq_cfg *cfg) +static void mask_ioapic_irq(struct irq_data *irq_data) { + struct mp_chip_data *data = irq_data->chip_data; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } -static void mask_ioapic_irq(struct irq_data *data) +static void __unmask_ioapic(struct mp_chip_data *data) { - mask_ioapic(irqd_cfg(data)); + io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL); } -static void __unmask_ioapic(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); -} - -static void unmask_ioapic(struct irq_cfg *cfg) +static void unmask_ioapic_irq(struct irq_data *irq_data) { + struct mp_chip_data *data = irq_data->chip_data; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - __unmask_ioapic(cfg); + __unmask_ioapic(data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_ioapic_irq(struct irq_data *data) -{ - unmask_ioapic(irqd_cfg(data)); -} - /* * IO-APIC versions below 0x20 don't support EOI register. * For the record, here is the information about various versions: @@ -539,7 +505,7 @@ static void unmask_ioapic_irq(struct irq_data *data) * Otherwise, we simulate the EOI message manually by changing the trigger * mode to edge and then back to level, with RTE being masked during this. */ -void native_eoi_ioapic_pin(int apic, int pin, int vector) +static void __eoi_ioapic_pin(int apic, int pin, int vector) { if (mpc_ioapic_ver(apic) >= 0x20) { io_apic_eoi(apic, vector); @@ -551,7 +517,7 @@ void native_eoi_ioapic_pin(int apic, int pin, int vector) /* * Mask the entry and change the trigger mode to edge. */ - entry1.mask = 1; + entry1.mask = IOAPIC_MASKED; entry1.trigger = IOAPIC_EDGE; __ioapic_write_entry(apic, pin, entry1); @@ -563,15 +529,14 @@ void native_eoi_ioapic_pin(int apic, int pin, int vector) } } -void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +void eoi_ioapic_pin(int vector, struct mp_chip_data *data) { - struct irq_pin_list *entry; unsigned long flags; + struct irq_pin_list *entry; raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) - x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin, - cfg->vector); + for_each_irq_pin(entry, data->irq_2_pin) + __eoi_ioapic_pin(entry->apic, entry->pin, vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -588,8 +553,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) * Make sure the entry is masked and re-read the contents to check * if it is a level triggered pin and if the remote-IRR is set. */ - if (!entry.mask) { - entry.mask = 1; + if (entry.mask == IOAPIC_UNMASKED) { + entry.mask = IOAPIC_MASKED; ioapic_write_entry(apic, pin, entry); entry = ioapic_read_entry(apic, pin); } @@ -602,13 +567,12 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) * doesn't clear the remote-IRR if the trigger mode is not * set to level. */ - if (!entry.trigger) { + if (entry.trigger == IOAPIC_EDGE) { entry.trigger = IOAPIC_LEVEL; ioapic_write_entry(apic, pin, entry); } - raw_spin_lock_irqsave(&ioapic_lock, flags); - x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector); + __eoi_ioapic_pin(apic, pin, entry.vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -706,8 +670,8 @@ void mask_ioapic_entries(void) struct IO_APIC_route_entry entry; entry = ioapics[apic].saved_registers[pin]; - if (!entry.mask) { - entry.mask = 1; + if (entry.mask == IOAPIC_UNMASKED) { + entry.mask = IOAPIC_MASKED; ioapic_write_entry(apic, pin, entry); } } @@ -809,11 +773,11 @@ static int EISA_ELCR(unsigned int irq) #endif -/* ISA interrupts are always polarity zero edge triggered, +/* ISA interrupts are always active high edge triggered, * when listed as conforming in the MP table. */ -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) +#define default_ISA_trigger(idx) (IOAPIC_EDGE) +#define default_ISA_polarity(idx) (IOAPIC_POL_HIGH) /* EISA interrupts are always polarity zero and can be edge or level * trigger depending on the ELCR value. If an interrupt is listed as @@ -823,53 +787,55 @@ static int EISA_ELCR(unsigned int irq) #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) -/* PCI interrupts are always polarity one level triggered, +/* PCI interrupts are always active low level triggered, * when listed as conforming in the MP table. */ -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) +#define default_PCI_trigger(idx) (IOAPIC_LEVEL) +#define default_PCI_polarity(idx) (IOAPIC_POL_LOW) static int irq_polarity(int idx) { int bus = mp_irqs[idx].srcbus; - int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - if (test_bit(bus, mp_bus_not_pci)) - polarity = default_ISA_polarity(idx); - else - polarity = default_PCI_polarity(idx); - break; - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - pr_warn("broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - pr_warn("broken BIOS!!\n"); - polarity = 1; - break; - } + switch (mp_irqs[idx].irqflag & 0x03) { + case 0: + /* conforms to spec, ie. bus-type dependent polarity */ + if (test_bit(bus, mp_bus_not_pci)) + return default_ISA_polarity(idx); + else + return default_PCI_polarity(idx); + case 1: + return IOAPIC_POL_HIGH; + case 2: + pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); + case 3: + default: /* Pointless default required due to do gcc stupidity */ + return IOAPIC_POL_LOW; + } +} + +#ifdef CONFIG_EISA +static int eisa_irq_trigger(int idx, int bus, int trigger) +{ + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_PCI: + case MP_BUS_ISA: + return trigger; + case MP_BUS_EISA: + return default_EISA_trigger(idx); } - return polarity; + pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus); + return IOAPIC_LEVEL; } +#else +static inline int eisa_irq_trigger(int idx, int bus, int trigger) +{ + return trigger; +} +#endif static int irq_trigger(int idx) { @@ -879,153 +845,227 @@ static int irq_trigger(int idx) /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - if (test_bit(bus, mp_bus_not_pci)) - trigger = default_ISA_trigger(idx); - else - trigger = default_PCI_trigger(idx); -#ifdef CONFIG_EISA - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - default: - { - pr_warn("broken BIOS!!\n"); - trigger = 1; - break; - } - } + switch ((mp_irqs[idx].irqflag >> 2) & 0x03) { + case 0: + /* conforms to spec, ie. bus-type dependent trigger mode */ + if (test_bit(bus, mp_bus_not_pci)) + trigger = default_ISA_trigger(idx); + else + trigger = default_PCI_trigger(idx); + /* Take EISA into account */ + return eisa_irq_trigger(idx, bus, trigger); + case 1: + return IOAPIC_EDGE; + case 2: + pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); + case 3: + default: /* Pointless default required due to do gcc stupidity */ + return IOAPIC_LEVEL; + } +} + +void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, + int trigger, int polarity) +{ + init_irq_alloc_info(info, NULL); + info->type = X86_IRQ_ALLOC_TYPE_IOAPIC; + info->ioapic_node = node; + info->ioapic_trigger = trigger; + info->ioapic_polarity = polarity; + info->ioapic_valid = 1; +} + +#ifndef CONFIG_ACPI +int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); #endif - break; - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - pr_warn("broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - pr_warn("broken BIOS!!\n"); - trigger = 0; - break; + +static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst, + struct irq_alloc_info *src, + u32 gsi, int ioapic_idx, int pin) +{ + int trigger, polarity; + + copy_irq_alloc_info(dst, src); + dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC; + dst->ioapic_id = mpc_ioapic_id(ioapic_idx); + dst->ioapic_pin = pin; + dst->ioapic_valid = 1; + if (src && src->ioapic_valid) { + dst->ioapic_node = src->ioapic_node; + dst->ioapic_trigger = src->ioapic_trigger; + dst->ioapic_polarity = src->ioapic_polarity; + } else { + dst->ioapic_node = NUMA_NO_NODE; + if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) { + dst->ioapic_trigger = trigger; + dst->ioapic_polarity = polarity; + } else { + /* + * PCI interrupts are always active low level + * triggered. + */ + dst->ioapic_trigger = IOAPIC_LEVEL; + dst->ioapic_polarity = IOAPIC_POL_LOW; } } - return trigger; } -static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin) +static int ioapic_alloc_attr_node(struct irq_alloc_info *info) +{ + return (info && info->ioapic_valid) ? info->ioapic_node : NUMA_NO_NODE; +} + +static void mp_register_handler(unsigned int irq, unsigned long trigger) +{ + irq_flow_handler_t hdl; + bool fasteoi; + + if (trigger) { + irq_set_status_flags(irq, IRQ_LEVEL); + fasteoi = true; + } else { + irq_clear_status_flags(irq, IRQ_LEVEL); + fasteoi = false; + } + + hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; + __irq_set_handler(irq, hdl, 0, fasteoi ? "fasteoi" : "edge"); +} + +static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info) { + struct mp_chip_data *data = irq_get_chip_data(irq); + + /* + * setup_IO_APIC_irqs() programs all legacy IRQs with default trigger + * and polarity attirbutes. So allow the first user to reprogram the + * pin with real trigger and polarity attributes. + */ + if (irq < nr_legacy_irqs() && data->count == 1) { + if (info->ioapic_trigger != data->trigger) + mp_register_handler(irq, data->trigger); + data->entry.trigger = data->trigger = info->ioapic_trigger; + data->entry.polarity = data->polarity = info->ioapic_polarity; + } + + return data->trigger == info->ioapic_trigger && + data->polarity == info->ioapic_polarity; +} + +static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, + struct irq_alloc_info *info) +{ + bool legacy = false; int irq = -1; - int ioapic = (int)(long)domain->host_data; int type = ioapics[ioapic].irqdomain_cfg.type; switch (type) { case IOAPIC_DOMAIN_LEGACY: /* - * Dynamically allocate IRQ number for non-ISA IRQs in the first 16 - * GSIs on some weird platforms. + * Dynamically allocate IRQ number for non-ISA IRQs in the first + * 16 GSIs on some weird platforms. */ - if (gsi < nr_legacy_irqs()) - irq = irq_create_mapping(domain, pin); - else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0) + if (!ioapic_initialized || gsi >= nr_legacy_irqs()) irq = gsi; + legacy = mp_is_legacy_irq(irq); break; case IOAPIC_DOMAIN_STRICT: - if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0) - irq = gsi; + irq = gsi; break; case IOAPIC_DOMAIN_DYNAMIC: - irq = irq_create_mapping(domain, pin); break; default: WARN(1, "ioapic: unknown irqdomain type %d\n", type); - break; + return -1; + } + + return __irq_domain_alloc_irqs(domain, irq, 1, + ioapic_alloc_attr_node(info), + info, legacy); +} + +/* + * Need special handling for ISA IRQs because there may be multiple IOAPIC pins + * sharing the same ISA IRQ number and irqdomain only supports 1:1 mapping + * between IOAPIC pin and IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are + * used for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H). + * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are available, and + * some BIOSes may use MP Interrupt Source records to override IRQ numbers for + * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be + * multiple pins sharing the same legacy IRQ number when ACPI is disabled. + */ +static int alloc_isa_irq_from_domain(struct irq_domain *domain, + int irq, int ioapic, int pin, + struct irq_alloc_info *info) +{ + struct mp_chip_data *data; + struct irq_data *irq_data = irq_get_irq_data(irq); + int node = ioapic_alloc_attr_node(info); + + /* + * Legacy ISA IRQ has already been allocated, just add pin to + * the pin list assoicated with this IRQ and program the IOAPIC + * entry. The IOAPIC entry + */ + if (irq_data && irq_data->parent_data) { + if (!mp_check_pin_attr(irq, info)) + return -EBUSY; + if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic, + info->ioapic_pin)) + return -ENOMEM; + } else { + irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true); + if (irq >= 0) { + irq_data = irq_domain_get_irq_data(domain, irq); + data = irq_data->chip_data; + data->isa_irq = true; + } } - return irq > 0 ? irq : -1; + return irq; } static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, - unsigned int flags) + unsigned int flags, struct irq_alloc_info *info) { int irq; + bool legacy = false; + struct irq_alloc_info tmp; + struct mp_chip_data *data; struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); - struct mp_pin_info *info = mp_pin_info(ioapic, pin); if (!domain) - return -1; + return -ENOSYS; - mutex_lock(&ioapic_mutex); - - /* - * Don't use irqdomain to manage ISA IRQs because there may be - * multiple IOAPIC pins sharing the same ISA IRQ number and - * irqdomain only supports 1:1 mapping between IOAPIC pin and - * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used - * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H). - * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are - * available, and some BIOSes may use MP Interrupt Source records - * to override IRQ numbers for PIRQs instead of reprogramming - * the interrupt routing logic. Thus there may be multiple pins - * sharing the same legacy IRQ number when ACPI is disabled. - */ if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; - if (flags & IOAPIC_MAP_ALLOC) { - if (info->count == 0 && - mp_irqdomain_map(domain, irq, pin) != 0) - irq = -1; + legacy = mp_is_legacy_irq(irq); + } - /* special handling for timer IRQ0 */ + mutex_lock(&ioapic_mutex); + if (!(flags & IOAPIC_MAP_ALLOC)) { + if (!legacy) { + irq = irq_find_mapping(domain, pin); if (irq == 0) - info->count++; + irq = -ENOENT; } } else { - irq = irq_find_mapping(domain, pin); - if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC)) - irq = alloc_irq_from_domain(domain, gsi, pin); - } - - if (flags & IOAPIC_MAP_ALLOC) { - /* special handling for legacy IRQs */ - if (irq < nr_legacy_irqs() && info->count == 1 && - mp_irqdomain_map(domain, irq, pin) != 0) - irq = -1; - - if (irq > 0) - info->count++; - else if (info->count == 0) - info->set = 0; + ioapic_copy_alloc_attr(&tmp, info, gsi, ioapic, pin); + if (legacy) + irq = alloc_isa_irq_from_domain(domain, irq, + ioapic, pin, &tmp); + else if ((irq = irq_find_mapping(domain, pin)) == 0) + irq = alloc_irq_from_domain(domain, ioapic, gsi, &tmp); + else if (!mp_check_pin_attr(irq, &tmp)) + irq = -EBUSY; + if (irq >= 0) { + data = irq_get_chip_data(irq); + data->count++; + } } - mutex_unlock(&ioapic_mutex); - return irq > 0 ? irq : -1; + return irq; } static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) @@ -1058,10 +1098,10 @@ static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) } #endif - return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL); } -int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) +int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info) { int ioapic, pin, idx; @@ -1074,31 +1114,24 @@ int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) if ((flags & IOAPIC_MAP_CHECK) && idx < 0) return -1; - return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info); } void mp_unmap_irq(int irq) { - struct irq_data *data = irq_get_irq_data(irq); - struct mp_pin_info *info; - int ioapic, pin; + struct irq_data *irq_data = irq_get_irq_data(irq); + struct mp_chip_data *data; - if (!data || !data->domain) + if (!irq_data || !irq_data->domain) return; - ioapic = (int)(long)data->domain->host_data; - pin = (int)data->hwirq; - info = mp_pin_info(ioapic, pin); + data = irq_data->chip_data; + if (!data || data->isa_irq) + return; mutex_lock(&ioapic_mutex); - if (--info->count == 0) { - info->set = 0; - if (irq < nr_legacy_irqs() && - ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY) - mp_irqdomain_unmap(data->domain, irq); - else - irq_dispose_mapping(irq); - } + if (--data->count == 0) + irq_domain_free_irqs(irq, 1); mutex_unlock(&ioapic_mutex); } @@ -1165,7 +1198,7 @@ out: } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -static struct irq_chip ioapic_chip; +static struct irq_chip ioapic_chip, ioapic_ir_chip; #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) @@ -1189,96 +1222,6 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, - unsigned long trigger) -{ - struct irq_chip *chip = &ioapic_chip; - irq_flow_handler_t hdl; - bool fasteoi; - - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { - irq_set_status_flags(irq, IRQ_LEVEL); - fasteoi = true; - } else { - irq_clear_status_flags(irq, IRQ_LEVEL); - fasteoi = false; - } - - if (setup_remapped_irq(irq, cfg, chip)) - fasteoi = trigger != 0; - - hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; - irq_set_chip_and_handler_name(irq, chip, hdl, - fasteoi ? "fasteoi" : "edge"); -} - -int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - memset(entry, 0, sizeof(*entry)); - - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->dest = destination; - entry->vector = vector; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; - - /* - * Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; - - return 0; -} - -static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, - struct io_apic_irq_attr *attr) -{ - struct IO_APIC_route_entry entry; - unsigned int dest; - - if (!IO_APIC_IRQ(irq)) - return; - - if (assign_irq_vector(irq, cfg, apic->target_cpus())) - return; - - if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), - &dest)) { - pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", - mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - clear_irq_vector(irq, cfg); - - return; - } - - apic_printk(APIC_VERBOSE,KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i Dest:%d)\n", - attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, - cfg->vector, irq, attr->trigger, attr->polarity, dest); - - if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - clear_irq_vector(irq, cfg); - - return; - } - - ioapic_register_intr(irq, cfg, attr->trigger); - if (irq < nr_legacy_irqs()) - legacy_pic->mask(irq); - - ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); -} - static void __init setup_IO_APIC_irqs(void) { unsigned int ioapic, pin; @@ -1298,106 +1241,41 @@ static void __init setup_IO_APIC_irqs(void) } } -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, - unsigned int pin, int vector) -{ - struct IO_APIC_route_entry entry; - unsigned int dest; - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), - apic->target_cpus(), &dest))) - dest = BAD_APICID; - - entry.dest_mode = apic->irq_dest_mode; - entry.mask = 0; /* don't mask IRQ for edge */ - entry.dest = dest; - entry.delivery_mode = apic->irq_delivery_mode; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, - "edge"); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_idx, pin, entry); -} - -void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) +void ioapic_zap_locks(void) { - int i; - - pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n"); - - for (i = 0; i <= nr_entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - pr_debug(" %02x %02X ", i, entry.dest); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector); - } + raw_spin_lock_init(&ioapic_lock); } -void intel_ir_io_apic_print_entries(unsigned int apic, - unsigned int nr_entries) +static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) { int i; + char buf[256]; + struct IO_APIC_route_entry entry; + struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry; - pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n"); - + printk(KERN_DEBUG "IOAPIC %d:\n", apic); for (i = 0; i <= nr_entries; i++) { - struct IR_IO_APIC_route_entry *ir_entry; - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, i); - - ir_entry = (struct IR_IO_APIC_route_entry *)&entry; - - pr_debug(" %02x %04X ", i, ir_entry->index); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %X %02X\n", - ir_entry->format, - ir_entry->mask, - ir_entry->trigger, - ir_entry->irr, - ir_entry->polarity, - ir_entry->delivery_status, - ir_entry->index2, - ir_entry->zero, - ir_entry->vector); + snprintf(buf, sizeof(buf), + " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", + i, + entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ", + entry.trigger == IOAPIC_LEVEL ? "level" : "edge ", + entry.polarity == IOAPIC_POL_LOW ? "low " : "high", + entry.vector, entry.irr, entry.delivery_status); + if (ir_entry->format) + printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n", + buf, (ir_entry->index << 15) | ir_entry->index, + ir_entry->zero); + else + printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n", + buf, + entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ? + "logical " : "physical", + entry.dest, entry.delivery_mode); } } -void ioapic_zap_locks(void) -{ - raw_spin_lock_init(&ioapic_lock); -} - static void __init print_IO_APIC(int ioapic_idx) { union IO_APIC_reg_00 reg_00; @@ -1451,16 +1329,13 @@ static void __init print_IO_APIC(int ioapic_idx) } printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); + io_apic_print_entries(ioapic_idx, reg_01.bits.entries); } void __init print_IO_APICs(void) { int ioapic_idx; - struct irq_cfg *cfg; unsigned int irq; - struct irq_chip *chip; printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for_each_ioapic(ioapic_idx) @@ -1480,18 +1355,20 @@ void __init print_IO_APICs(void) printk(KERN_DEBUG "IRQ to pin mappings:\n"); for_each_active_irq(irq) { struct irq_pin_list *entry; + struct irq_chip *chip; + struct mp_chip_data *data; chip = irq_get_chip(irq); - if (chip != &ioapic_chip) + if (chip != &ioapic_chip && chip != &ioapic_ir_chip) continue; - - cfg = irq_cfg(irq); - if (!cfg) + data = irq_get_chip_data(irq); + if (!data) continue; - if (list_empty(&cfg->irq_2_pin)) + if (list_empty(&data->irq_2_pin)) continue; + printk(KERN_DEBUG "IRQ%d ", irq); - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) pr_cont("-> %d:%d", entry->apic, entry->pin); pr_cont("\n"); } @@ -1564,15 +1441,12 @@ void native_disable_io_apic(void) struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest = read_apic_id(); + entry.mask = IOAPIC_UNMASKED; + entry.trigger = IOAPIC_EDGE; + entry.polarity = IOAPIC_POL_HIGH; + entry.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; + entry.delivery_mode = dest_ExtINT; + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: @@ -1582,7 +1456,6 @@ void native_disable_io_apic(void) if (cpu_has_apic || apic_from_smp_config()) disconnect_bsp_APIC(ioapic_i8259.pin != -1); - } /* @@ -1792,7 +1665,6 @@ static int __init timer_irq_works(void) * This is not complete - we should be able to fake * an edge even if it isn't on the 8259A... */ - static unsigned int startup_ioapic_irq(struct irq_data *data) { int was_pending = 0, irq = data->irq; @@ -1804,74 +1676,22 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) if (legacy_pic->irq_pending(irq)) was_pending = 1; } - __unmask_ioapic(irqd_cfg(data)); + __unmask_ioapic(data->chip_data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - - apic = entry->apic; - pin = entry->pin; - - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - } -} - -int native_ioapic_set_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force) -{ - unsigned int dest, irq = data->irq; - unsigned long flags; - int ret; - - if (!config_enabled(CONFIG_SMP)) - return -EPERM; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = apic_set_affinity(data, mask, &dest); - if (!ret) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, irqd_cfg(data)); - ret = IRQ_SET_MASK_OK_NOCOPY; - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return ret; -} - atomic_t irq_mis_count; #ifdef CONFIG_GENERIC_PENDING_IRQ -static bool io_apic_level_ack_pending(struct irq_cfg *cfg) +static bool io_apic_level_ack_pending(struct mp_chip_data *data) { struct irq_pin_list *entry; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, data->irq_2_pin) { unsigned int reg; int pin; @@ -1888,18 +1708,17 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) return false; } -static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +static inline bool ioapic_irqd_mask(struct irq_data *data) { /* If we are moving the irq we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic(cfg); + mask_ioapic_irq(data); return true; } return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, - struct irq_cfg *cfg, bool masked) +static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) { if (unlikely(masked)) { /* Only migrate the irq if the ack has been received. @@ -1928,31 +1747,30 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it. */ - if (!io_apic_level_ack_pending(cfg)) + if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic(cfg); + unmask_ioapic_irq(data); } } #else -static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +static inline bool ioapic_irqd_mask(struct irq_data *data) { return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, - struct irq_cfg *cfg, bool masked) +static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) { } #endif -static void ack_ioapic_level(struct irq_data *data) +static void ioapic_ack_level(struct irq_data *irq_data) { - struct irq_cfg *cfg = irqd_cfg(data); - int i, irq = data->irq; + struct irq_cfg *cfg = irqd_cfg(irq_data); unsigned long v; bool masked; + int i; irq_complete_move(cfg); - masked = ioapic_irqd_mask(data, cfg); + masked = ioapic_irqd_mask(irq_data); /* * It appears there is an erratum which affects at least version 0x11 @@ -2004,11 +1822,49 @@ static void ack_ioapic_level(struct irq_data *data) */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); + eoi_ioapic_pin(cfg->vector, irq_data->chip_data); + } + + ioapic_irqd_unmask(irq_data, masked); +} + +static void ioapic_ir_ack_level(struct irq_data *irq_data) +{ + struct mp_chip_data *data = irq_data->chip_data; + + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + ack_APIC_irq(); + eoi_ioapic_pin(data->entry.vector, data); +} - eoi_ioapic_irq(irq, cfg); +static int ioapic_set_affinity(struct irq_data *irq_data, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = irq_data->parent_data; + struct mp_chip_data *data = irq_data->chip_data; + struct irq_pin_list *entry; + struct irq_cfg *cfg; + unsigned long flags; + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + raw_spin_lock_irqsave(&ioapic_lock, flags); + if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) { + cfg = irqd_cfg(irq_data); + data->entry.dest = cfg->dest_apicid; + data->entry.vector = cfg->vector; + for_each_irq_pin(entry, data->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, + data->entry); } + raw_spin_unlock_irqrestore(&ioapic_lock, flags); - ioapic_irqd_unmask(data, cfg, masked); + return ret; } static struct irq_chip ioapic_chip __read_mostly = { @@ -2016,10 +1872,20 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_startup = startup_ioapic_irq, .irq_mask = mask_ioapic_irq, .irq_unmask = unmask_ioapic_irq, - .irq_ack = apic_ack_edge, - .irq_eoi = ack_ioapic_level, - .irq_set_affinity = native_ioapic_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_ack = irq_chip_ack_parent, + .irq_eoi = ioapic_ack_level, + .irq_set_affinity = ioapic_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static struct irq_chip ioapic_ir_chip __read_mostly = { + .name = "IR-IO-APIC", + .irq_startup = startup_ioapic_irq, + .irq_mask = mask_ioapic_irq, + .irq_unmask = unmask_ioapic_irq, + .irq_ack = irq_chip_ack_parent, + .irq_eoi = ioapic_ir_ack_level, + .irq_set_affinity = ioapic_set_affinity, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2113,12 +1979,12 @@ static inline void __init unlock_ExtINT_logic(void) memset(&entry1, 0, sizeof(entry1)); - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ + entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; + entry1.mask = IOAPIC_UNMASKED; entry1.dest = hard_smp_processor_id(); entry1.delivery_mode = dest_ExtINT; entry1.polarity = entry0.polarity; - entry1.trigger = 0; + entry1.trigger = IOAPIC_EDGE; entry1.vector = 0; ioapic_write_entry(apic, pin, entry1); @@ -2152,6 +2018,25 @@ static int __init disable_timer_pin_setup(char *arg) } early_param("disable_timer_pin_1", disable_timer_pin_setup); +static int mp_alloc_timer_irq(int ioapic, int pin) +{ + int irq = -1; + struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); + + if (domain) { + struct irq_alloc_info info; + + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0); + info.ioapic_id = mpc_ioapic_id(ioapic); + info.ioapic_pin = pin; + mutex_lock(&ioapic_mutex); + irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info); + mutex_unlock(&ioapic_mutex); + } + + return irq; +} + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2162,7 +2047,9 @@ early_param("disable_timer_pin_1", disable_timer_pin_setup); */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg(0); + struct irq_data *irq_data = irq_get_irq_data(0); + struct mp_chip_data *data = irq_data->chip_data; + struct irq_cfg *cfg = irqd_cfg(irq_data); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; unsigned long flags; @@ -2174,7 +2061,6 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ legacy_pic->mask(0); - assign_irq_vector(0, cfg, apic->target_cpus()); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -2215,23 +2101,21 @@ static inline void __init check_timer(void) } if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ + /* Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_node(cfg, node, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + mp_alloc_timer_irq(apic1, pin1); } else { - /* for edge trigger, setup_ioapic_irq already - * leave it unmasked. + /* + * for edge trigger, it's already unmasked, * so only need to unmask if it is level-trigger * do we really have level trigger timer? */ int idx; idx = find_irq_entry(apic1, pin1, mp_INT); if (idx != -1 && irq_trigger(idx)) - unmask_ioapic(cfg); + unmask_ioapic_irq(irq_get_chip_data(0)); } + irq_domain_activate_irq(irq_data); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); @@ -2251,8 +2135,8 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); + replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); + irq_domain_activate_irq(irq_data); legacy_pic->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2329,36 +2213,35 @@ out: static int mp_irqdomain_create(int ioapic) { - size_t size; + struct irq_alloc_info info; + struct irq_domain *parent; int hwirqs = mp_ioapic_pin_count(ioapic); struct ioapic *ip = &ioapics[ioapic]; struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); - size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic); - ip->pin_info = kzalloc(size, GFP_KERNEL); - if (!ip->pin_info) - return -ENOMEM; - if (cfg->type == IOAPIC_DOMAIN_INVALID) return 0; + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_IOAPIC; + info.ioapic_id = mpc_ioapic_id(ioapic); + parent = irq_remapping_get_ir_irq_domain(&info); + if (!parent) + parent = x86_vector_domain; + ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops, (void *)(long)ioapic); - if(!ip->irqdomain) { - kfree(ip->pin_info); - ip->pin_info = NULL; + if (!ip->irqdomain) return -ENOMEM; - } + + ip->irqdomain->parent = parent; if (cfg->type == IOAPIC_DOMAIN_LEGACY || cfg->type == IOAPIC_DOMAIN_STRICT) ioapic_dynirq_base = max(ioapic_dynirq_base, gsi_cfg->gsi_end + 1); - if (gsi_cfg->gsi_base == 0) - irq_set_default_host(ip->irqdomain); - return 0; } @@ -2368,8 +2251,6 @@ static void ioapic_destroy_irqdomain(int idx) irq_domain_remove(ioapics[idx].irqdomain); ioapics[idx].irqdomain = NULL; } - kfree(ioapics[idx].pin_info); - ioapics[idx].pin_info = NULL; } void __init setup_IO_APIC(void) @@ -2399,20 +2280,6 @@ void __init setup_IO_APIC(void) ioapic_initialized = 1; } -/* - * Called after all the initialization is done. If we didn't find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - static void resume_ioapic_id(int ioapic_idx) { unsigned long flags; @@ -2451,20 +2318,6 @@ static int __init ioapic_init_ops(void) device_initcall(ioapic_init_ops); -static int -io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) -{ - struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); - int ret; - - if (!cfg) - return -EINVAL; - ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); - if (!ret) - setup_ioapic_irq(irq, cfg, attr); - return ret; -} - static int io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; @@ -2692,7 +2545,7 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - x86_io_apic_ops.set_affinity(idata, mask, false); + irq_set_affinity(irq, mask); } } @@ -2737,7 +2590,7 @@ static struct resource * __init ioapic_setup_resources(void) return res; } -void __init native_io_apic_init_mappings(void) +void __init io_apic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; @@ -2962,7 +2815,6 @@ int mp_unregister_ioapic(u32 gsi_base) { int ioapic, pin; int found = 0; - struct mp_pin_info *pin_info; for_each_ioapic(ioapic) if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) { @@ -2975,11 +2827,17 @@ int mp_unregister_ioapic(u32 gsi_base) } for_each_pin(ioapic, pin) { - pin_info = mp_pin_info(ioapic, pin); - if (pin_info->count) { - pr_warn("pin%d on IOAPIC%d is still in use.\n", - pin, ioapic); - return -EBUSY; + u32 gsi = mp_pin_to_gsi(ioapic, pin); + int irq = mp_map_gsi_to_irq(gsi, 0, NULL); + struct mp_chip_data *data; + + if (irq >= 0) { + data = irq_get_chip_data(irq); + if (data && data->count) { + pr_warn("pin%d on IOAPIC%d is still in use.\n", + pin, ioapic); + return -EBUSY; + } } } @@ -3006,108 +2864,141 @@ int mp_ioapic_registered(u32 gsi_base) return 0; } -static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, - int ioapic, int ioapic_pin, - int trigger, int polarity) +static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, + struct irq_alloc_info *info) { - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; + if (info && info->ioapic_valid) { + data->trigger = info->ioapic_trigger; + data->polarity = info->ioapic_polarity; + } else if (acpi_get_override_irq(gsi, &data->trigger, + &data->polarity) < 0) { + /* PCI interrupts are always active low level triggered. */ + data->trigger = IOAPIC_LEVEL; + data->polarity = IOAPIC_POL_LOW; + } } -int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq) +static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data, + struct IO_APIC_route_entry *entry) { - int ioapic = (int)(long)domain->host_data; - struct mp_pin_info *info = mp_pin_info(ioapic, hwirq); - struct io_apic_irq_attr attr; + memset(entry, 0, sizeof(*entry)); + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + entry->trigger = data->trigger; + entry->polarity = data->polarity; + /* + * Mask level triggered irqs. Edge triggered irqs are masked + * by the irq core code in case they fire. + */ + if (data->trigger == IOAPIC_LEVEL) + entry->mask = IOAPIC_MASKED; + else + entry->mask = IOAPIC_UNMASKED; +} - /* Get default attribute if not set by caller yet */ - if (!info->set) { - u32 gsi = mp_pin_to_gsi(ioapic, hwirq); +int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int ret, ioapic, pin; + struct irq_cfg *cfg; + struct irq_data *irq_data; + struct mp_chip_data *data; + struct irq_alloc_info *info = arg; - if (acpi_get_override_irq(gsi, &info->trigger, - &info->polarity) < 0) { - /* - * PCI interrupts are always polarity one level - * triggered. - */ - info->trigger = 1; - info->polarity = 1; - } - info->node = NUMA_NO_NODE; + if (!info || nr_irqs > 1) + return -EINVAL; + irq_data = irq_domain_get_irq_data(domain, virq); + if (!irq_data) + return -EINVAL; - /* - * setup_IO_APIC_irqs() programs all legacy IRQs with default - * trigger and polarity attributes. Don't set the flag for that - * case so the first legacy IRQ user could reprogram the pin - * with real trigger and polarity attributes. - */ - if (virq >= nr_legacy_irqs() || info->count) - info->set = 1; - } - set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger, - info->polarity); + ioapic = mp_irqdomain_ioapic_idx(domain); + pin = info->ioapic_pin; + if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0) + return -EEXIST; - return io_apic_setup_irq_pin(virq, info->node, &attr); -} + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; -void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) -{ - struct irq_data *data = irq_get_irq_data(virq); - struct irq_cfg *cfg = irq_cfg(virq); - int ioapic = (int)(long)domain->host_data; - int pin = (int)data->hwirq; + info->ioapic_entry = &data->entry; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); + if (ret < 0) { + kfree(data); + return ret; + } + + INIT_LIST_HEAD(&data->irq_2_pin); + irq_data->hwirq = info->ioapic_pin; + irq_data->chip = (domain->parent == x86_vector_domain) ? + &ioapic_chip : &ioapic_ir_chip; + irq_data->chip_data = data; + mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info); + + cfg = irqd_cfg(irq_data); + add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin); + if (info->ioapic_entry) + mp_setup_entry(cfg, data, info->ioapic_entry); + mp_register_handler(virq, data->trigger); + if (virq < nr_legacy_irqs()) + legacy_pic->mask(virq); + + apic_printk(APIC_VERBOSE, KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n", + ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector, + virq, data->trigger, data->polarity, cfg->dest_apicid); - ioapic_mask_entry(ioapic, pin); - __remove_pin_from_irq(cfg, ioapic, pin); - WARN_ON(!list_empty(&cfg->irq_2_pin)); - arch_teardown_hwirq(virq); + return 0; } -int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) +void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - int ret = 0; - int ioapic, pin; - struct mp_pin_info *info; + struct irq_data *irq_data; + struct mp_chip_data *data; - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return -ENODEV; - - pin = mp_find_ioapic_pin(ioapic, gsi); - info = mp_pin_info(ioapic, pin); - trigger = trigger ? 1 : 0; - polarity = polarity ? 1 : 0; - - mutex_lock(&ioapic_mutex); - if (!info->set) { - info->trigger = trigger; - info->polarity = polarity; - info->node = node; - info->set = 1; - } else if (info->trigger != trigger || info->polarity != polarity) { - ret = -EBUSY; + BUG_ON(nr_irqs != 1); + irq_data = irq_domain_get_irq_data(domain, virq); + if (irq_data && irq_data->chip_data) { + data = irq_data->chip_data; + __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), + (int)irq_data->hwirq); + WARN_ON(!list_empty(&data->irq_2_pin)); + kfree(irq_data->chip_data); } - mutex_unlock(&ioapic_mutex); - - return ret; + irq_domain_free_irqs_top(domain, virq, nr_irqs); } -/* Enable IOAPIC early just for system timer */ -void __init pre_init_apic_IRQ0(void) +void mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data) { - struct io_apic_irq_attr attr = { 0, 0, 0, 0 }; + unsigned long flags; + struct irq_pin_list *entry; + struct mp_chip_data *data = irq_data->chip_data; - printk(KERN_INFO "Early APIC setup for system timer0\n"); -#ifndef CONFIG_SMP - physid_set_mask_of_physid(boot_cpu_physical_apicid, - &phys_cpu_present_map); -#endif - setup_local_APIC(); + raw_spin_lock_irqsave(&ioapic_lock, flags); + for_each_irq_pin(entry, data->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, data->entry); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); +} - io_apic_setup_irq_pin(0, 0, &attr); - irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, - "edge"); +void mp_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + /* It won't be called for IRQ with multiple IOAPIC pins associated */ + ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), + (int)irq_data->hwirq); +} + +int mp_irqdomain_ioapic_idx(struct irq_domain *domain) +{ + return (int)(long)domain->host_data; } + +const struct irq_domain_ops mp_ioapic_irqdomain_ops = { + .alloc = mp_irqdomain_alloc, + .free = mp_irqdomain_free, + .activate = mp_irqdomain_activate, + .deactivate = mp_irqdomain_deactivate, +}; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index d6ba2d660dc5..1a9d735e09c6 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu <jiang.liu@linux.intel.com> + * Convert to hierarchical irqdomain * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,22 +16,23 @@ #include <linux/dmar.h> #include <linux/hpet.h> #include <linux/msi.h> +#include <asm/irqdomain.h> #include <asm/msidef.h> #include <asm/hpet.h> #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/irq_remapping.h> -void native_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) +static struct irq_domain *msi_default_domain; + +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) { - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_cfg *cfg = irqd_cfg(data); msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) - msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); msg->address_lo = MSI_ADDR_BASE_LO | @@ -39,7 +42,7 @@ void native_compose_msi_msg(struct pci_dev *pdev, ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU : MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); + MSI_ADDR_DEST_ID(cfg->dest_apicid); msg->data = MSI_DATA_TRIGGER_EDGE | @@ -50,180 +53,201 @@ void native_compose_msi_msg(struct pci_dev *pdev, MSI_DATA_VECTOR(cfg->vector); } -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip pci_msi_controller = { + .name = "PCI-MSI", + .irq_unmask = pci_msi_unmask_irq, + .irq_mask = pci_msi_mask_irq, + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - struct irq_cfg *cfg; - int err; - unsigned dest; + struct irq_domain *domain; + struct irq_alloc_info info; - if (disable_apic) - return -ENXIO; + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_MSI; + info.msi_dev = dev; - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; + domain = irq_remapping_get_irq_domain(&info); + if (domain == NULL) + domain = msi_default_domain; + if (domain == NULL) + return -ENOSYS; - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; + return pci_msi_domain_alloc_irqs(domain, dev, nvec, type); +} - x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); +void native_teardown_msi_irq(unsigned int irq) +{ + irq_domain_free_irqs(irq, 1); +} - return 0; +static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->msi_hwirq; } -static int -msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) +static int pci_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) { - struct irq_cfg *cfg = irqd_cfg(data); - struct msi_msg msg; - unsigned int dest; - int ret; + struct pci_dev *pdev = to_pci_dev(dev); + struct msi_desc *desc = first_pci_msi_entry(pdev); + + init_irq_alloc_info(arg, NULL); + arg->msi_dev = pdev; + if (desc->msi_attrib.is_msix) { + arg->type = X86_IRQ_ALLOC_TYPE_MSIX; + } else { + arg->type = X86_IRQ_ALLOC_TYPE_MSI; + arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; + } - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; + return 0; +} - __get_cached_msi_msg(data->msi_desc, &msg); +static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc); +} + +static struct msi_domain_ops pci_msi_domain_ops = { + .get_hwirq = pci_msi_get_hwirq, + .msi_prepare = pci_msi_prepare, + .set_desc = pci_msi_set_desc, +}; - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); +static struct msi_domain_info pci_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX, + .ops = &pci_msi_domain_ops, + .chip = &pci_msi_controller, + .handler = handle_edge_irq, + .handler_name = "edge", +}; - __pci_write_msi_msg(data->msi_desc, &msg); +void arch_init_msi_domain(struct irq_domain *parent) +{ + if (disable_apic) + return; - return IRQ_SET_MASK_OK_NOCOPY; + msi_default_domain = pci_msi_create_irq_domain(NULL, + &pci_msi_domain_info, parent); + if (!msi_default_domain) + pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); } -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", +#ifdef CONFIG_IRQ_REMAP +static struct irq_chip pci_msi_ir_controller = { + .name = "IR-PCI-MSI", .irq_unmask = pci_msi_unmask_irq, .irq_mask = pci_msi_mask_irq, - .irq_ack = apic_ack_edge, - .irq_set_affinity = msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent, .flags = IRQCHIP_SKIP_SET_WAKE, }; -int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset) -{ - struct irq_chip *chip = &msi_chip; - struct msi_msg msg; - unsigned int irq = irq_base + irq_offset; - int ret; - - ret = msi_compose_msg(dev, irq, &msg, -1); - if (ret < 0) - return ret; - - irq_set_msi_desc_off(irq_base, irq_offset, msidesc); - - /* - * MSI-X message is written per-IRQ, the offset is always 0. - * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. - */ - if (!irq_offset) - pci_write_msi_msg(irq, &msg); +static struct msi_domain_info pci_msi_ir_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, + .ops = &pci_msi_domain_ops, + .chip = &pci_msi_ir_controller, + .handler = handle_edge_irq, + .handler_name = "edge", +}; - setup_remapped_irq(irq, irq_cfg(irq), chip); +struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent) +{ + return pci_msi_create_irq_domain(NULL, &pci_msi_ir_domain_info, parent); +} +#endif - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); +#ifdef CONFIG_DMAR_TABLE +static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg) +{ + dmar_msi_write(data->irq, msg); +} - dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq); +static struct irq_chip dmar_msi_controller = { + .name = "DMAR-MSI", + .irq_unmask = dmar_msi_unmask, + .irq_mask = dmar_msi_mask, + .irq_ack = irq_chip_ack_parent, + .irq_set_affinity = msi_domain_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_write_msi_msg = dmar_msi_write_msg, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; - return 0; +static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->dmar_id; } -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static int dmar_msi_init(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq, + irq_hw_number_t hwirq, msi_alloc_info_t *arg) { - struct msi_desc *msidesc; - unsigned int irq; - int node, ret; + irq_domain_set_info(domain, virq, arg->dmar_id, info->chip, NULL, + handle_edge_irq, arg->dmar_data, "edge"); - /* Multiple MSI vectors only supported with interrupt remapping */ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; + return 0; +} - node = dev_to_node(&dev->dev); +static struct msi_domain_ops dmar_msi_domain_ops = { + .get_hwirq = dmar_msi_get_hwirq, + .msi_init = dmar_msi_init, +}; - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = irq_alloc_hwirq(node); - if (!irq) - return -ENOSPC; +static struct msi_domain_info dmar_msi_domain_info = { + .ops = &dmar_msi_domain_ops, + .chip = &dmar_msi_controller, +}; - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) { - irq_free_hwirq(irq); - return ret; - } +static struct irq_domain *dmar_get_irq_domain(void) +{ + static struct irq_domain *dmar_domain; + static DEFINE_MUTEX(dmar_lock); - } - return 0; -} + mutex_lock(&dmar_lock); + if (dmar_domain == NULL) + dmar_domain = msi_create_irq_domain(NULL, &dmar_msi_domain_info, + x86_vector_domain); + mutex_unlock(&dmar_lock); -void native_teardown_msi_irq(unsigned int irq) -{ - irq_free_hwirq(irq); + return dmar_domain; } -#ifdef CONFIG_DMAR_TABLE -static int -dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) +int dmar_alloc_hwirq(int id, int node, void *arg) { - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest, irq = data->irq; - struct msi_msg msg; - int ret; - - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; + struct irq_domain *domain = dmar_get_irq_domain(); + struct irq_alloc_info info; - dmar_msi_read(irq, &msg); + if (!domain) + return -1; - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_DMAR; + info.dmar_id = id; + info.dmar_data = arg; - dmar_msi_write(irq, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; + return irq_domain_alloc_irqs(domain, 1, node, &info); } -static struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .irq_unmask = dmar_msi_unmask, - .irq_mask = dmar_msi_mask, - .irq_ack = apic_ack_edge, - .irq_set_affinity = dmar_msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int arch_setup_dmar_msi(unsigned int irq) +void dmar_free_hwirq(int irq) { - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg, -1); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; + irq_domain_free_irqs(irq, 1); } #endif @@ -231,56 +255,103 @@ int arch_setup_dmar_msi(unsigned int irq) * MSI message composition */ #ifdef CONFIG_HPET_TIMER +static inline int hpet_dev_id(struct irq_domain *domain) +{ + struct msi_domain_info *info = msi_get_domain_info(domain); + + return (int)(long)info->data; +} -static int hpet_msi_set_affinity(struct irq_data *data, - const struct cpumask *mask, bool force) +static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - struct msi_msg msg; - unsigned int dest; - int ret; + hpet_msi_write(data->handler_data, msg); +} - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; +static struct irq_chip hpet_msi_controller = { + .name = "HPET-MSI", + .irq_unmask = hpet_msi_unmask, + .irq_mask = hpet_msi_mask, + .irq_ack = irq_chip_ack_parent, + .irq_set_affinity = msi_domain_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_write_msi_msg = hpet_msi_write_msg, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; - hpet_msi_read(data->handler_data, &msg); +static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->hpet_index; +} - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); +static int hpet_msi_init(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq, + irq_hw_number_t hwirq, msi_alloc_info_t *arg) +{ + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); + irq_domain_set_info(domain, virq, arg->hpet_index, info->chip, NULL, + handle_edge_irq, arg->hpet_data, "edge"); - hpet_msi_write(data->handler_data, &msg); + return 0; +} - return IRQ_SET_MASK_OK_NOCOPY; +static void hpet_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq) +{ + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); } -static struct irq_chip hpet_msi_type = { - .name = "HPET_MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, - .irq_ack = apic_ack_edge, - .irq_set_affinity = hpet_msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, +static struct msi_domain_ops hpet_msi_domain_ops = { + .get_hwirq = hpet_msi_get_hwirq, + .msi_init = hpet_msi_init, + .msi_free = hpet_msi_free, +}; + +static struct msi_domain_info hpet_msi_domain_info = { + .ops = &hpet_msi_domain_ops, + .chip = &hpet_msi_controller, }; -int default_setup_hpet_msi(unsigned int irq, unsigned int id) +struct irq_domain *hpet_create_irq_domain(int hpet_id) { - struct irq_chip *chip = &hpet_msi_type; - struct msi_msg msg; - int ret; + struct irq_domain *parent; + struct irq_alloc_info info; + struct msi_domain_info *domain_info; + + if (x86_vector_domain == NULL) + return NULL; + + domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL); + if (!domain_info) + return NULL; + + *domain_info = hpet_msi_domain_info; + domain_info->data = (void *)(long)hpet_id; + + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_HPET; + info.hpet_id = hpet_id; + parent = irq_remapping_get_ir_irq_domain(&info); + if (parent == NULL) + parent = x86_vector_domain; + else + hpet_msi_controller.name = "IR-HPET-MSI"; + + return msi_create_irq_domain(NULL, domain_info, parent); +} - ret = msi_compose_msg(NULL, irq, &msg, id); - if (ret < 0) - return ret; +int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, + int dev_num) +{ + struct irq_alloc_info info; - hpet_msi_write(irq_get_handler_data(irq), &msg); - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - setup_remapped_irq(irq, irq_cfg(irq), chip); + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_HPET; + info.hpet_data = dev; + info.hpet_id = hpet_dev_id(domain); + info.hpet_index = dev_num; - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - return 0; + return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); } #endif diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6cedd7914581..28eba2d38b15 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu <jiang.liu@linux.intel.com> + * Enable support of hierarchical irqdomains * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -11,15 +13,28 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/compiler.h> -#include <linux/irqdomain.h> #include <linux/slab.h> +#include <asm/irqdomain.h> #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/i8259.h> #include <asm/desc.h> #include <asm/irq_remapping.h> +struct apic_chip_data { + struct irq_cfg cfg; + cpumask_var_t domain; + cpumask_var_t old_domain; + u8 move_in_progress : 1; +}; + +struct irq_domain *x86_vector_domain; static DEFINE_RAW_SPINLOCK(vector_lock); +static cpumask_var_t vector_cpumask; +static struct irq_chip lapic_controller; +#ifdef CONFIG_X86_IO_APIC +static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; +#endif void lock_vector_lock(void) { @@ -34,71 +49,59 @@ void unlock_vector_lock(void) raw_spin_unlock(&vector_lock); } -struct irq_cfg *irq_cfg(unsigned int irq) +static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data) { - return irq_get_chip_data(irq); + if (!irq_data) + return NULL; + + while (irq_data->parent_data) + irq_data = irq_data->parent_data; + + return irq_data->chip_data; } struct irq_cfg *irqd_cfg(struct irq_data *irq_data) { - return irq_data->chip_data; + struct apic_chip_data *data = apic_chip_data(irq_data); + + return data ? &data->cfg : NULL; } -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) +struct irq_cfg *irq_cfg(unsigned int irq) { - struct irq_cfg *cfg; + return irqd_cfg(irq_get_irq_data(irq)); +} - cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); - if (!cfg) +static struct apic_chip_data *alloc_apic_chip_data(int node) +{ + struct apic_chip_data *data; + + data = kzalloc_node(sizeof(*data), GFP_KERNEL, node); + if (!data) return NULL; - if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) - goto out_cfg; - if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) + if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node)) + goto out_data; + if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node)) goto out_domain; -#ifdef CONFIG_X86_IO_APIC - INIT_LIST_HEAD(&cfg->irq_2_pin); -#endif - return cfg; + return data; out_domain: - free_cpumask_var(cfg->domain); -out_cfg: - kfree(cfg); + free_cpumask_var(data->domain); +out_data: + kfree(data); return NULL; } -struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) +static void free_apic_chip_data(struct apic_chip_data *data) { - int res = irq_alloc_desc_at(at, node); - struct irq_cfg *cfg; - - if (res < 0) { - if (res != -EEXIST) - return NULL; - cfg = irq_cfg(at); - if (cfg) - return cfg; + if (data) { + free_cpumask_var(data->domain); + free_cpumask_var(data->old_domain); + kfree(data); } - - cfg = alloc_irq_cfg(at, node); - if (cfg) - irq_set_chip_data(at, cfg); - else - irq_free_desc(at); - return cfg; -} - -static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) -{ - if (!cfg) - return; - irq_set_chip_data(at, NULL); - free_cpumask_var(cfg->domain); - free_cpumask_var(cfg->old_domain); - kfree(cfg); } -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int __assign_irq_vector(int irq, struct apic_chip_data *d, + const struct cpumask *mask) { /* * NOTE! The local APIC isn't very good at handling @@ -114,36 +117,33 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; int cpu, err; - cpumask_var_t tmp_mask; - if (cfg->move_in_progress) + if (d->move_in_progress) return -EBUSY; - if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) - return -ENOMEM; - /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; - cpumask_clear(cfg->old_domain); + cpumask_clear(d->old_domain); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; - apic->vector_allocation_domain(cpu, tmp_mask, mask); + apic->vector_allocation_domain(cpu, vector_cpumask, mask); - if (cpumask_subset(tmp_mask, cfg->domain)) { + if (cpumask_subset(vector_cpumask, d->domain)) { err = 0; - if (cpumask_equal(tmp_mask, cfg->domain)) + if (cpumask_equal(vector_cpumask, d->domain)) break; /* * New cpumask using the vector is a proper subset of * the current in use mask. So cleanup the vector * allocation for the members that are not used anymore. */ - cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); - cpumask_and(cfg->domain, cfg->domain, tmp_mask); + cpumask_andnot(d->old_domain, d->domain, + vector_cpumask); + d->move_in_progress = + cpumask_intersects(d->old_domain, cpu_online_mask); + cpumask_and(d->domain, d->domain, vector_cpumask); break; } @@ -157,16 +157,18 @@ next: } if (unlikely(current_vector == vector)) { - cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); - cpumask_andnot(tmp_mask, mask, cfg->old_domain); - cpu = cpumask_first_and(tmp_mask, cpu_online_mask); + cpumask_or(d->old_domain, d->old_domain, + vector_cpumask); + cpumask_andnot(vector_cpumask, mask, d->old_domain); + cpu = cpumask_first_and(vector_cpumask, + cpu_online_mask); continue; } if (test_bit(vector, used_vectors)) goto next; - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { + for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED) goto next; @@ -174,55 +176,73 @@ next: /* Found one! */ current_vector = vector; current_offset = offset; - if (cfg->vector) { - cpumask_copy(cfg->old_domain, cfg->domain); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); + if (d->cfg.vector) { + cpumask_copy(d->old_domain, d->domain); + d->move_in_progress = + cpumask_intersects(d->old_domain, cpu_online_mask); } - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) + for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); + d->cfg.vector = vector; + cpumask_copy(d->domain, vector_cpumask); err = 0; break; } - free_cpumask_var(tmp_mask); + + if (!err) { + /* cache destination APIC IDs into cfg->dest_apicid */ + err = apic->cpu_mask_to_apicid_and(mask, d->domain, + &d->cfg.dest_apicid); + } return err; } -int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int assign_irq_vector(int irq, struct apic_chip_data *data, + const struct cpumask *mask) { int err; unsigned long flags; raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, cfg, mask); + err = __assign_irq_vector(irq, data, mask); raw_spin_unlock_irqrestore(&vector_lock, flags); return err; } -void clear_irq_vector(int irq, struct irq_cfg *cfg) +static int assign_irq_vector_policy(int irq, int node, + struct apic_chip_data *data, + struct irq_alloc_info *info) +{ + if (info && info->mask) + return assign_irq_vector(irq, data, info->mask); + if (node != NUMA_NO_NODE && + assign_irq_vector(irq, data, cpumask_of_node(node)) == 0) + return 0; + return assign_irq_vector(irq, data, apic->target_cpus()); +} + +static void clear_irq_vector(int irq, struct apic_chip_data *data) { int cpu, vector; unsigned long flags; raw_spin_lock_irqsave(&vector_lock, flags); - BUG_ON(!cfg->vector); + BUG_ON(!data->cfg.vector); - vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + vector = data->cfg.vector; + for_each_cpu_and(cpu, data->domain, cpu_online_mask) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - cfg->vector = 0; - cpumask_clear(cfg->domain); + data->cfg.vector = 0; + cpumask_clear(data->domain); - if (likely(!cfg->move_in_progress)) { + if (likely(!data->move_in_progress)) { raw_spin_unlock_irqrestore(&vector_lock, flags); return; } - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -231,10 +251,95 @@ void clear_irq_vector(int irq, struct irq_cfg *cfg) break; } } - cfg->move_in_progress = 0; + data->move_in_progress = 0; raw_spin_unlock_irqrestore(&vector_lock, flags); } +void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask) +{ + memset(info, 0, sizeof(*info)); + info->mask = mask; +} + +void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) +{ + if (src) + *dst = *src; + else + memset(dst, 0, sizeof(*dst)); +} + +static void x86_vector_free_irqs(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *irq_data; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); + if (irq_data && irq_data->chip_data) { + clear_irq_vector(virq + i, irq_data->chip_data); + free_apic_chip_data(irq_data->chip_data); +#ifdef CONFIG_X86_IO_APIC + if (virq + i < nr_legacy_irqs()) + legacy_irq_data[virq + i] = NULL; +#endif + irq_domain_reset_irq_data(irq_data); + } + } +} + +static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_alloc_info *info = arg; + struct apic_chip_data *data; + struct irq_data *irq_data; + int i, err; + + if (disable_apic) + return -ENXIO; + + /* Currently vector allocator can't guarantee contiguous allocations */ + if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1) + return -ENOSYS; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + BUG_ON(!irq_data); +#ifdef CONFIG_X86_IO_APIC + if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i]) + data = legacy_irq_data[virq + i]; + else +#endif + data = alloc_apic_chip_data(irq_data->node); + if (!data) { + err = -ENOMEM; + goto error; + } + + irq_data->chip = &lapic_controller; + irq_data->chip_data = data; + irq_data->hwirq = virq + i; + err = assign_irq_vector_policy(virq, irq_data->node, data, + info); + if (err) + goto error; + } + + return 0; + +error: + x86_vector_free_irqs(domain, virq, i + 1); + return err; +} + +static const struct irq_domain_ops x86_vector_domain_ops = { + .alloc = x86_vector_alloc_irqs, + .free = x86_vector_free_irqs, +}; + int __init arch_probe_nr_irqs(void) { int nr; @@ -258,8 +363,43 @@ int __init arch_probe_nr_irqs(void) return nr_legacy_irqs(); } +#ifdef CONFIG_X86_IO_APIC +static void init_legacy_irqs(void) +{ + int i, node = cpu_to_node(0); + struct apic_chip_data *data; + + /* + * For legacy IRQ's, start with assigning irq0 to irq15 to + * ISA_IRQ_VECTOR(i) for all cpu's. + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + data = legacy_irq_data[i] = alloc_apic_chip_data(node); + BUG_ON(!data); + + data->cfg.vector = ISA_IRQ_VECTOR(i); + cpumask_setall(data->domain); + irq_set_chip_data(i, data); + } +} +#else +static void init_legacy_irqs(void) { } +#endif + int __init arch_early_irq_init(void) { + init_legacy_irqs(); + + x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops, + NULL); + BUG_ON(x86_vector_domain == NULL); + irq_set_default_host(x86_vector_domain); + + arch_init_msi_domain(x86_vector_domain); + arch_init_htirq_domain(x86_vector_domain); + + BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); + return arch_early_ioapic_init(); } @@ -267,7 +407,7 @@ static void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ int irq, vector; - struct irq_cfg *cfg; + struct apic_chip_data *data; /* * vector_lock will make sure that we don't run into irq vector @@ -277,13 +417,13 @@ static void __setup_vector_irq(int cpu) raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { - cfg = irq_cfg(irq); - if (!cfg) + data = apic_chip_data(irq_get_irq_data(irq)); + if (!data) continue; - if (!cpumask_test_cpu(cpu, cfg->domain)) + if (!cpumask_test_cpu(cpu, data->domain)) continue; - vector = cfg->vector; + vector = data->cfg.vector; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ @@ -292,8 +432,8 @@ static void __setup_vector_irq(int cpu) if (irq <= VECTOR_UNDEFINED) continue; - cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) + data = apic_chip_data(irq_get_irq_data(irq)); + if (!cpumask_test_cpu(cpu, data->domain)) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; } raw_spin_unlock(&vector_lock); @@ -314,20 +454,20 @@ void setup_vector_irq(int cpu) * legacy vector to irq mapping: */ for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; + per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq; __setup_vector_irq(cpu); } -int apic_retrigger_irq(struct irq_data *data) +static int apic_retrigger_irq(struct irq_data *irq_data) { - struct irq_cfg *cfg = irqd_cfg(data); + struct apic_chip_data *data = apic_chip_data(irq_data); unsigned long flags; int cpu; raw_spin_lock_irqsave(&vector_lock, flags); - cpu = cpumask_first_and(cfg->domain, cpu_online_mask); - apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); + cpu = cpumask_first_and(data->domain, cpu_online_mask); + apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector); raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -340,73 +480,76 @@ void apic_ack_edge(struct irq_data *data) ack_APIC_irq(); } -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) +static int apic_set_affinity(struct irq_data *irq_data, + const struct cpumask *dest, bool force) { - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int irq = data->irq; - int err; + struct apic_chip_data *data = irq_data->chip_data; + int err, irq = irq_data->irq; if (!config_enabled(CONFIG_SMP)) return -EPERM; - if (!cpumask_intersects(mask, cpu_online_mask)) + if (!cpumask_intersects(dest, cpu_online_mask)) return -EINVAL; - err = assign_irq_vector(irq, cfg, mask); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); + err = assign_irq_vector(irq, data, dest); if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)) + struct irq_data *top = irq_get_irq_data(irq); + + if (assign_irq_vector(irq, data, top->affinity)) pr_err("Failed to recover vector for irq %d\n", irq); return err; } - cpumask_copy(data->affinity, mask); - - return 0; + return IRQ_SET_MASK_OK; } +static struct irq_chip lapic_controller = { + .irq_ack = apic_ack_edge, + .irq_set_affinity = apic_set_affinity, + .irq_retrigger = apic_retrigger_irq, +}; + #ifdef CONFIG_SMP -void send_cleanup_vector(struct irq_cfg *cfg) +static void __send_cleanup_vector(struct apic_chip_data *data) { cpumask_var_t cleanup_mask; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + for_each_cpu_and(i, data->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } - cfg->move_in_progress = 0; + data->move_in_progress = 0; +} + +void send_cleanup_vector(struct irq_cfg *cfg) +{ + struct apic_chip_data *data; + + data = container_of(cfg, struct apic_chip_data, cfg); + if (data->move_in_progress) + __send_cleanup_vector(data); } asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; - ack_APIC_irq(); - irq_enter(); - exit_idle(); + entering_ack_irq(); me = smp_processor_id(); for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { int irq; unsigned int irr; struct irq_desc *desc; - struct irq_cfg *cfg; + struct apic_chip_data *data; irq = __this_cpu_read(vector_irq[vector]); @@ -417,8 +560,8 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) if (!desc) continue; - cfg = irq_cfg(irq); - if (!cfg) + data = apic_chip_data(&desc->irq_data); + if (!data) continue; raw_spin_lock(&desc->lock); @@ -427,10 +570,11 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) * Check if the irq migration is in progress. If so, we * haven't received the cleanup request yet for this irq. */ - if (cfg->move_in_progress) + if (data->move_in_progress) goto unlock; - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + if (vector == data->cfg.vector && + cpumask_test_cpu(me, data->domain)) goto unlock; irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); @@ -450,20 +594,21 @@ unlock: raw_spin_unlock(&desc->lock); } - irq_exit(); + exiting_irq(); } static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) { unsigned me; + struct apic_chip_data *data; - if (likely(!cfg->move_in_progress)) + data = container_of(cfg, struct apic_chip_data, cfg); + if (likely(!data->move_in_progress)) return; me = smp_processor_id(); - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - send_cleanup_vector(cfg); + if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) + __send_cleanup_vector(data); } void irq_complete_move(struct irq_cfg *cfg) @@ -475,46 +620,11 @@ void irq_force_complete_move(int irq) { struct irq_cfg *cfg = irq_cfg(irq); - if (!cfg) - return; - - __irq_complete_move(cfg, cfg->vector); + if (cfg) + __irq_complete_move(cfg, cfg->vector); } #endif -/* - * Dynamic irq allocate and deallocation. Should be replaced by irq domains! - */ -int arch_setup_hwirq(unsigned int irq, int node) -{ - struct irq_cfg *cfg; - unsigned long flags; - int ret; - - cfg = alloc_irq_cfg(irq, node); - if (!cfg) - return -ENOMEM; - - raw_spin_lock_irqsave(&vector_lock, flags); - ret = __assign_irq_vector(irq, cfg, apic->target_cpus()); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - if (!ret) - irq_set_chip_data(irq, cfg); - else - free_irq_cfg(irq, cfg); - return ret; -} - -void arch_teardown_hwirq(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - free_remapped_irq(irq); - clear_irq_vector(irq, cfg); - free_irq_cfg(irq, cfg); -} - static void __init print_APIC_field(int base) { int i; diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6fae733e9194..3ffd925655e0 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -21,11 +21,13 @@ early_param("x2apic_phys", set_x2apic_phys_mode); static bool x2apic_fadt_phys(void) { +#ifdef CONFIG_ACPI if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { printk(KERN_DEBUG "System requires x2apic physical mode\n"); return true; } +#endif return false; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 9f6b9341950f..8e3d22a1af94 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -41,6 +41,25 @@ void common(void) { OFFSET(pbe_orig_address, pbe, orig_address); OFFSET(pbe_next, pbe, next); +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) + BLANK(); + OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax); + OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx); + OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx); + OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx); + OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si); + OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di); + OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp); + OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp); + OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip); + + BLANK(); + OFFSET(TI_sysenter_return, thread_info, sysenter_return); + + BLANK(); + OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); +#endif + #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); @@ -49,7 +68,9 @@ void common(void) { OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); +#ifdef CONFIG_X86_32 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); +#endif OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 47703aed74cf..6ce39025f467 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -17,17 +17,6 @@ void foo(void); void foo(void) { - OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax); - OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx); - OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx); - OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx); - OFFSET(IA32_SIGCONTEXT_si, sigcontext, si); - OFFSET(IA32_SIGCONTEXT_di, sigcontext, di); - OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp); - OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp); - OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip); - BLANK(); - OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); @@ -37,10 +26,6 @@ void foo(void) OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); BLANK(); - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - OFFSET(TI_cpu, thread_info, cpu); - BLANK(); - OFFSET(PT_EBX, pt_regs, bx); OFFSET(PT_ECX, pt_regs, cx); OFFSET(PT_EDX, pt_regs, dx); @@ -60,9 +45,6 @@ void foo(void) OFFSET(PT_OLDSS, pt_regs, ss); BLANK(); - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); - BLANK(); - OFFSET(saved_context_gdt_desc, saved_context, gdt_desc); BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 5ce6f2da8763..dcaab87da629 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -29,27 +29,6 @@ int main(void) BLANK(); #endif -#ifdef CONFIG_IA32_EMULATION - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - BLANK(); - -#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry) - ENTRY(ax); - ENTRY(bx); - ENTRY(cx); - ENTRY(dx); - ENTRY(si); - ENTRY(di); - ENTRY(bp); - ENTRY(sp); - ENTRY(ip); - BLANK(); -#undef ENTRY - - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); - BLANK(); -#endif - #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) ENTRY(bx); ENTRY(cx); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a62cf04dac8a..6bec0b55863e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1155,10 +1155,6 @@ static __init int setup_disablecpuid(char *arg) } __setup("clearcpuid=", setup_disablecpuid); -DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(kernel_stack); - #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e535533d5ab8..20190bdac9d5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { int i, ret = 0; + char *tmp; for (i = 0; i < mca_cfg.banks; i++) { m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); @@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); } - if (mce_severity(m, mca_cfg.tolerant, msg, true) >= - MCE_PANIC_SEVERITY) + + if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + *msg = tmp; ret = 1; + } } return ret; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 939155ffdece..aad4bd84b475 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -39,14 +39,12 @@ void hyperv_vector_handler(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - irq_enter(); - exit_idle(); - + entering_irq(); inc_irq_stat(irq_hv_callback_count); if (vmbus_handler) vmbus_handler(); - irq_exit(); + exiting_irq(); set_irq_regs(old_regs); } diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 5f90b85ff22e..70d7c93f4550 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -98,7 +98,8 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, continue; base = range_state[i].base_pfn; if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && - (mtrr_state.enabled & 1)) { + (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { /* Var MTRR contains UC entry below 1M? Skip it: */ printk(BIOS_BUG_MSG, i); if (base + size <= (1<<(20-PAGE_SHIFT))) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 7d74f7b3c6ba..3b533cf37c74 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -102,59 +102,76 @@ static int check_type_overlap(u8 *prev, u8 *curr) return 0; } -/* - * Error/Semi-error returns: - * 0xFF - when MTRR is not enabled - * *repeat == 1 implies [start:end] spanned across MTRR range and type returned - * corresponds only to [start:*partial_end]. - * Caller has to lookup again for [*partial_end:end]. +/** + * mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries + * + * Return the MTRR fixed memory type of 'start'. + * + * MTRR fixed entries are divided into the following ways: + * 0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges + * 0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges + * 0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges + * + * Return Values: + * MTRR_TYPE_(type) - Matched memory type + * MTRR_TYPE_INVALID - Unmatched + */ +static u8 mtrr_type_lookup_fixed(u64 start, u64 end) +{ + int idx; + + if (start >= 0x100000) + return MTRR_TYPE_INVALID; + + /* 0x0 - 0x7FFFF */ + if (start < 0x80000) { + idx = 0; + idx += (start >> 16); + return mtrr_state.fixed_ranges[idx]; + /* 0x80000 - 0xBFFFF */ + } else if (start < 0xC0000) { + idx = 1 * 8; + idx += ((start - 0x80000) >> 14); + return mtrr_state.fixed_ranges[idx]; + } + + /* 0xC0000 - 0xFFFFF */ + idx = 3 * 8; + idx += ((start - 0xC0000) >> 12); + return mtrr_state.fixed_ranges[idx]; +} + +/** + * mtrr_type_lookup_variable - look up memory type in MTRR variable entries + * + * Return Value: + * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched) + * + * Output Arguments: + * repeat - Set to 1 when [start:end] spanned across MTRR range and type + * returned corresponds only to [start:*partial_end]. Caller has + * to lookup again for [*partial_end:end]. + * + * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the + * region is fully covered by a single MTRR entry or the default + * type. */ -static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) +static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, + int *repeat, u8 *uniform) { int i; u64 base, mask; u8 prev_match, curr_match; *repeat = 0; - if (!mtrr_state_set) - return 0xFF; - - if (!mtrr_state.enabled) - return 0xFF; + *uniform = 1; - /* Make end inclusive end, instead of exclusive */ + /* Make end inclusive instead of exclusive */ end--; - /* Look in fixed ranges. Just return the type as per start */ - if (mtrr_state.have_fixed && (start < 0x100000)) { - int idx; - - if (start < 0x80000) { - idx = 0; - idx += (start >> 16); - return mtrr_state.fixed_ranges[idx]; - } else if (start < 0xC0000) { - idx = 1 * 8; - idx += ((start - 0x80000) >> 14); - return mtrr_state.fixed_ranges[idx]; - } else if (start < 0x1000000) { - idx = 3 * 8; - idx += ((start - 0xC0000) >> 12); - return mtrr_state.fixed_ranges[idx]; - } - } - - /* - * Look in variable ranges - * Look of multiple ranges matching this address and pick type - * as per MTRR precedence - */ - if (!(mtrr_state.enabled & 2)) - return mtrr_state.def_type; - - prev_match = 0xFF; + prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { - unsigned short start_state, end_state; + unsigned short start_state, end_state, inclusive; if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11))) continue; @@ -166,20 +183,29 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) start_state = ((start & mask) == (base & mask)); end_state = ((end & mask) == (base & mask)); + inclusive = ((start < base) && (end > base)); - if (start_state != end_state) { + if ((start_state != end_state) || inclusive) { /* * We have start:end spanning across an MTRR. - * We split the region into - * either - * (start:mtrr_end) (mtrr_end:end) - * or - * (start:mtrr_start) (mtrr_start:end) + * We split the region into either + * + * - start_state:1 + * (start:mtrr_end)(mtrr_end:end) + * - end_state:1 + * (start:mtrr_start)(mtrr_start:end) + * - inclusive:1 + * (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end) + * * depending on kind of overlap. - * Return the type for first region and a pointer to - * the start of second region so that caller will - * lookup again on the second region. - * Note: This way we handle multiple overlaps as well. + * + * Return the type of the first region and a pointer + * to the start of next region so that caller will be + * advised to lookup again after having adjusted start + * and end. + * + * Note: This way we handle overlaps with multiple + * entries and the default type properly. */ if (start_state) *partial_end = base + get_mtrr_size(mask); @@ -193,59 +219,94 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) end = *partial_end - 1; /* end is inclusive */ *repeat = 1; + *uniform = 0; } if ((start & mask) != (base & mask)) continue; curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; - if (prev_match == 0xFF) { + if (prev_match == MTRR_TYPE_INVALID) { prev_match = curr_match; continue; } + *uniform = 0; if (check_type_overlap(&prev_match, &curr_match)) return curr_match; } - if (mtrr_tom2) { - if (start >= (1ULL<<32) && (end < mtrr_tom2)) - return MTRR_TYPE_WRBACK; - } - - if (prev_match != 0xFF) + if (prev_match != MTRR_TYPE_INVALID) return prev_match; return mtrr_state.def_type; } -/* - * Returns the effective MTRR type for the region - * Error return: - * 0xFF - when MTRR is not enabled +/** + * mtrr_type_lookup - look up memory type in MTRR + * + * Return Values: + * MTRR_TYPE_(type) - The effective MTRR type for the region + * MTRR_TYPE_INVALID - MTRR is disabled + * + * Output Argument: + * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the + * region is fully covered by a single MTRR entry or the default + * type. */ -u8 mtrr_type_lookup(u64 start, u64 end) +u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) { - u8 type, prev_type; + u8 type, prev_type, is_uniform = 1, dummy; int repeat; u64 partial_end; - type = __mtrr_type_lookup(start, end, &partial_end, &repeat); + if (!mtrr_state_set) + return MTRR_TYPE_INVALID; + + if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) + return MTRR_TYPE_INVALID; + + /* + * Look up the fixed ranges first, which take priority over + * the variable ranges. + */ + if ((start < 0x100000) && + (mtrr_state.have_fixed) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { + is_uniform = 0; + type = mtrr_type_lookup_fixed(start, end); + goto out; + } + + /* + * Look up the variable ranges. Look of multiple ranges matching + * this address and pick type as per MTRR precedence. + */ + type = mtrr_type_lookup_variable(start, end, &partial_end, + &repeat, &is_uniform); /* * Common path is with repeat = 0. * However, we can have cases where [start:end] spans across some - * MTRR range. Do repeated lookups for that case here. + * MTRR ranges and/or the default type. Do repeated lookups for + * that case here. */ while (repeat) { prev_type = type; start = partial_end; - type = __mtrr_type_lookup(start, end, &partial_end, &repeat); + is_uniform = 0; + type = mtrr_type_lookup_variable(start, end, &partial_end, + &repeat, &dummy); if (check_type_overlap(&prev_type, &type)) - return type; + goto out; } + if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2)) + type = MTRR_TYPE_WRBACK; + +out: + *uniform = is_uniform; return type; } @@ -347,7 +408,9 @@ static void __init print_mtrr_state(void) mtrr_attrib_to_str(mtrr_state.def_type)); if (mtrr_state.have_fixed) { pr_debug("MTRR fixed ranges %sabled:\n", - mtrr_state.enabled & 1 ? "en" : "dis"); + ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ? + "en" : "dis"); print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); for (i = 0; i < 2; ++i) print_fixed(0x80000 + i * 0x20000, 0x04000, @@ -360,7 +423,7 @@ static void __init print_mtrr_state(void) print_fixed_last(); } pr_debug("MTRR variable ranges %sabled:\n", - mtrr_state.enabled & 2 ? "en" : "dis"); + mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis"); high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { @@ -382,7 +445,7 @@ static void __init print_mtrr_state(void) } /* Grab all of the MTRR state for this CPU into *state */ -void __init get_mtrr_state(void) +bool __init get_mtrr_state(void) { struct mtrr_var_range *vrs; unsigned long flags; @@ -426,6 +489,8 @@ void __init get_mtrr_state(void) post_set(); local_irq_restore(flags); + + return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED); } /* Some BIOS's are messed up and don't set all MTRRs the same! */ diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index ea5f363a1948..e7ed0d8ebacb 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -59,6 +59,12 @@ #define MTRR_TO_PHYS_WC_OFFSET 1000 u32 num_var_ranges; +static bool __mtrr_enabled; + +static bool mtrr_enabled(void) +{ + return __mtrr_enabled; +} unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); @@ -286,7 +292,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, int i, replace, error; mtrr_type ltype; - if (!mtrr_if) + if (!mtrr_enabled()) return -ENXIO; error = mtrr_if->validate_add_page(base, size, type); @@ -435,6 +441,8 @@ static int mtrr_check(unsigned long base, unsigned long size) int mtrr_add(unsigned long base, unsigned long size, unsigned int type, bool increment) { + if (!mtrr_enabled()) + return -ENODEV; if (mtrr_check(base, size)) return -EINVAL; return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, @@ -463,8 +471,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) unsigned long lbase, lsize; int error = -EINVAL; - if (!mtrr_if) - return -ENXIO; + if (!mtrr_enabled()) + return -ENODEV; max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ @@ -523,6 +531,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) */ int mtrr_del(int reg, unsigned long base, unsigned long size) { + if (!mtrr_enabled()) + return -ENODEV; if (mtrr_check(base, size)) return -EINVAL; return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); @@ -538,6 +548,9 @@ EXPORT_SYMBOL(mtrr_del); * attempts to add a WC MTRR covering size bytes starting at base and * logs an error if this fails. * + * The called should provide a power of two size on an equivalent + * power of two boundary. + * * Drivers must store the return value to pass to mtrr_del_wc_if_needed, * but drivers should not try to interpret that return value. */ @@ -545,7 +558,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size) { int ret; - if (pat_enabled) + if (pat_enabled() || !mtrr_enabled()) return 0; /* Success! (We don't need to do anything.) */ ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); @@ -577,7 +590,7 @@ void arch_phys_wc_del(int handle) EXPORT_SYMBOL(arch_phys_wc_del); /* - * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value + * arch_phys_wc_index - translates arch_phys_wc_add's return value * @handle: Return value from arch_phys_wc_add * * This will turn the return value from arch_phys_wc_add into an mtrr @@ -587,14 +600,14 @@ EXPORT_SYMBOL(arch_phys_wc_del); * in printk line. Alas there is an illegitimate use in some ancient * drm ioctls. */ -int phys_wc_to_mtrr_index(int handle) +int arch_phys_wc_index(int handle) { if (handle < MTRR_TO_PHYS_WC_OFFSET) return -1; else return handle - MTRR_TO_PHYS_WC_OFFSET; } -EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index); +EXPORT_SYMBOL_GPL(arch_phys_wc_index); /* * HACK ALERT! @@ -734,10 +747,12 @@ void __init mtrr_bp_init(void) } if (mtrr_if) { + __mtrr_enabled = true; set_num_var_ranges(); init_table(); if (use_intel()) { - get_mtrr_state(); + /* BIOS may override */ + __mtrr_enabled = get_mtrr_state(); if (mtrr_cleanup(phys_addr)) { changed_by_mtrr_cleanup = 1; @@ -745,10 +760,16 @@ void __init mtrr_bp_init(void) } } } + + if (!mtrr_enabled()) + pr_info("MTRR: Disabled\n"); } void mtrr_ap_init(void) { + if (!mtrr_enabled()) + return; + if (!use_intel() || mtrr_aps_delayed_init) return; /* @@ -774,6 +795,9 @@ void mtrr_save_state(void) { int first_cpu; + if (!mtrr_enabled()) + return; + get_online_cpus(); first_cpu = cpumask_first(cpu_online_mask); smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); @@ -782,6 +806,8 @@ void mtrr_save_state(void) void set_mtrr_aps_delayed_init(void) { + if (!mtrr_enabled()) + return; if (!use_intel()) return; @@ -793,7 +819,7 @@ void set_mtrr_aps_delayed_init(void) */ void mtrr_aps_init(void) { - if (!use_intel()) + if (!use_intel() || !mtrr_enabled()) return; /* @@ -810,7 +836,7 @@ void mtrr_aps_init(void) void mtrr_bp_restore(void) { - if (!use_intel()) + if (!use_intel() || !mtrr_enabled()) return; mtrr_if->set_all(); @@ -818,7 +844,7 @@ void mtrr_bp_restore(void) static int __init mtrr_init_finialize(void) { - if (!mtrr_if) + if (!mtrr_enabled()) return 0; if (use_intel()) { diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index df5e41f31a27..951884dcc433 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -51,7 +51,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); -void get_mtrr_state(void); +bool get_mtrr_state(void); extern void set_mtrr_ops(const struct mtrr_ops *ops); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 960e85de13fb..3998131d1a68 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, - [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, @@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(OP_READ) ] = { /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ @@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 999289b94025..358c54ad20d4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void) break; case 60: /* Haswell */ case 69: /* Haswell-Celeron */ + case 61: /* Broadwell */ rapl_cntr_mask = RAPL_IDX_HSW; rapl_pmu_events_group.attrs = rapl_events_hsw_attr; break; diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 6367a780cc8c..5ee771859b6f 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -4,7 +4,6 @@ #include <linux/bootmem.h> #include <linux/export.h> #include <linux/io.h> -#include <linux/irqdomain.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/of.h> @@ -17,6 +16,7 @@ #include <linux/of_pci.h> #include <linux/initrd.h> +#include <asm/irqdomain.h> #include <asm/hpet.h> #include <asm/apic.h> #include <asm/pci_x86.h> @@ -196,38 +196,31 @@ static struct of_ioapic_type of_ioapic_type[] = }, }; -static int ioapic_xlate(struct irq_domain *domain, - struct device_node *controller, - const u32 *intspec, u32 intsize, - irq_hw_number_t *out_hwirq, u32 *out_type) +static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { + struct of_phandle_args *irq_data = (void *)arg; struct of_ioapic_type *it; - u32 line, idx, gsi; + struct irq_alloc_info tmp; - if (WARN_ON(intsize < 2)) + if (WARN_ON(irq_data->args_count < 2)) return -EINVAL; - - line = intspec[0]; - - if (intspec[1] >= ARRAY_SIZE(of_ioapic_type)) + if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type)) return -EINVAL; - it = &of_ioapic_type[intspec[1]]; + it = &of_ioapic_type[irq_data->args[1]]; + ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity); + tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain)); + tmp.ioapic_pin = irq_data->args[0]; - idx = (u32)(long)domain->host_data; - gsi = mp_pin_to_gsi(idx, line); - if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0))) - return -EBUSY; - - *out_hwirq = line; - *out_type = it->out_type; - return 0; + return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp); } -const struct irq_domain_ops ioapic_irq_domain_ops = { - .map = mp_irqdomain_map, - .unmap = mp_irqdomain_unmap, - .xlate = ioapic_xlate, +static const struct irq_domain_ops ioapic_irq_domain_ops = { + .alloc = dt_irqdomain_alloc, + .free = mp_irqdomain_free, + .activate = mp_irqdomain_activate, + .deactivate = mp_irqdomain_deactivate, }; static void __init dtb_add_ioapic(struct device_node *dn) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 02c2eff7478d..22aadc917868 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -216,7 +216,7 @@ ENTRY(system_call) GLOBAL(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp /* Construct struct pt_regs on stack */ pushq_cfi $__USER_DS /* pt_regs->ss */ @@ -419,26 +419,27 @@ syscall_return: * a completely clean 64-bit userspace context. */ movq RCX(%rsp),%rcx - cmpq %rcx,RIP(%rsp) /* RCX == RIP */ + movq RIP(%rsp),%r11 + cmpq %rcx,%r11 /* RCX == RIP */ jne opportunistic_sysret_failed /* * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP * in kernel space. This essentially lets the user take over - * the kernel, since userspace controls RSP. It's not worth - * testing for canonicalness exactly -- this check detects any - * of the 17 high bits set, which is true for non-canonical - * or kernel addresses. (This will pessimize vsyscall=native. - * Big deal.) + * the kernel, since userspace controls RSP. * - * If virtual addresses ever become wider, this will need + * If width of "canonical tail" ever becomes variable, this will need * to be updated to remain correct on both old and new CPUs. */ .ifne __VIRTUAL_MASK_SHIFT - 47 .error "virtual address width changed -- SYSRET checks need update" .endif - shr $__VIRTUAL_MASK_SHIFT, %rcx - jnz opportunistic_sysret_failed + /* Change top 16 bits to be the sign-extension of 47th bit */ + shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + /* If this changed %rcx, it was not canonical */ + cmpq %rcx, %r11 + jne opportunistic_sysret_failed cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ jne opportunistic_sysret_failed @@ -475,8 +476,8 @@ syscall_return: */ syscall_return_via_sysret: CFI_REMEMBER_STATE - /* r11 is already restored (see code above) */ - RESTORE_C_REGS_EXCEPT_R11 + /* rcx and r11 are already restored (see code above) */ + RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp),%rsp USERGS_SYSRET64 CFI_RESTORE_STATE @@ -533,40 +534,27 @@ GLOBAL(stub_execveat) CFI_ENDPROC END(stub_execveat) -#ifdef CONFIG_X86_X32_ABI +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) .align 8 GLOBAL(stub_x32_execve) +GLOBAL(stub32_execve) CFI_STARTPROC DEFAULT_FRAME 0, 8 call compat_sys_execve jmp return_from_execve CFI_ENDPROC +END(stub32_execve) END(stub_x32_execve) .align 8 GLOBAL(stub_x32_execveat) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 - call compat_sys_execveat - jmp return_from_execve - CFI_ENDPROC -END(stub_x32_execveat) -#endif - -#ifdef CONFIG_IA32_EMULATION - .align 8 -GLOBAL(stub32_execve) - CFI_STARTPROC - call compat_sys_execve - jmp return_from_execve - CFI_ENDPROC -END(stub32_execve) - .align 8 GLOBAL(stub32_execveat) CFI_STARTPROC + DEFAULT_FRAME 0, 8 call compat_sys_execveat jmp return_from_execve CFI_ENDPROC END(stub32_execveat) +END(stub_x32_execveat) #endif /* @@ -622,7 +610,7 @@ ENTRY(ret_from_fork) RESTORE_EXTRA_REGS - testl $3,CS(%rsp) # from kernel_thread? + testb $3, CS(%rsp) # from kernel_thread? /* * By the time we get here, we have no idea whether our pt_regs, @@ -686,8 +674,8 @@ END(irq_entries_start) leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ - testl $3, CS-RBP(%rsp) - je 1f + testb $3, CS-RBP(%rsp) + jz 1f SWAPGS 1: /* @@ -741,8 +729,8 @@ ret_from_intr: CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET RBP - testl $3,CS(%rsp) - je retint_kernel + testb $3, CS(%rsp) + jz retint_kernel /* Interrupt came from user space */ GET_THREAD_INFO(%rcx) @@ -928,6 +916,8 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ #ifdef CONFIG_HAVE_KVM apicinterrupt3 POSTED_INTR_VECTOR \ kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \ + kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi #endif #ifdef CONFIG_X86_MCE_THRESHOLD @@ -989,7 +979,7 @@ ENTRY(\sym) .if \paranoid .if \paranoid == 1 CFI_REMEMBER_STATE - testl $3, CS(%rsp) /* If coming from userspace, switch */ + testb $3, CS(%rsp) /* If coming from userspace, switch */ jnz 1f /* stacks. */ .endif call paranoid_entry @@ -1202,17 +1192,17 @@ ENTRY(xen_failsafe_callback) /*CFI_REL_OFFSET ds,DS*/ CFI_REL_OFFSET r11,8 CFI_REL_OFFSET rcx,0 - movw %ds,%cx + movl %ds,%ecx cmpw %cx,0x10(%rsp) CFI_REMEMBER_STATE jne 1f - movw %es,%cx + movl %es,%ecx cmpw %cx,0x18(%rsp) jne 1f - movw %fs,%cx + movl %fs,%ecx cmpw %cx,0x20(%rsp) jne 1f - movw %gs,%cx + movl %gs,%ecx cmpw %cx,0x28(%rsp) jne 1f /* All segments match their saved values => Category 2 (Bad IRET). */ @@ -1330,8 +1320,8 @@ ENTRY(error_entry) SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 xorl %ebx,%ebx - testl $3,CS+8(%rsp) - je error_kernelspace + testb $3, CS+8(%rsp) + jz error_kernelspace error_swapgs: SWAPGS error_sti: @@ -1382,7 +1372,7 @@ ENTRY(error_exit) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax - jne retint_kernel + jnz retint_kernel LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi @@ -1627,7 +1617,6 @@ end_repeat_nmi: je 1f movq %r12, %cr2 1: - testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d031bad9e07e..02d257256200 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -547,7 +547,7 @@ ENTRY(early_idt_handler) cld cmpl $2,(%esp) # X86_TRAP_NMI - je is_nmi # Ignore NMI + je .Lis_nmi # Ignore NMI cmpl $2,%ss:early_recursion_flag je hlt_loop @@ -600,7 +600,7 @@ ex_entry: pop %ecx pop %eax decl %ss:early_recursion_flag -is_nmi: +.Lis_nmi: addl $8,%esp /* drop vector number and error code */ iret ENDPROC(early_idt_handler) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ae6588b301c2..43eafc8afb69 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -344,7 +344,7 @@ ENTRY(early_idt_handler) cld cmpl $2,(%rsp) # X86_TRAP_NMI - je is_nmi # Ignore NMI + je .Lis_nmi # Ignore NMI cmpl $2,early_recursion_flag(%rip) jz 1f @@ -409,7 +409,7 @@ ENTRY(early_idt_handler) popq %rcx popq %rax decl early_recursion_flag(%rip) -is_nmi: +.Lis_nmi: addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN ENDPROC(early_idt_handler) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3acbff4716b0..e2449cf38b06 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -12,6 +12,7 @@ #include <linux/pm.h> #include <linux/io.h> +#include <asm/irqdomain.h> #include <asm/fixmap.h> #include <asm/hpet.h> #include <asm/time.h> @@ -305,8 +306,6 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } -static int hpet_setup_msi_irq(unsigned int irq); - static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { @@ -357,7 +356,7 @@ static void hpet_set_mode(enum clock_event_mode mode, hpet_enable_legacy_int(); } else { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - hpet_setup_msi_irq(hdev->irq); + irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); disable_irq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); @@ -423,6 +422,7 @@ static int hpet_legacy_next_event(unsigned long delta, static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); static struct hpet_dev *hpet_devs; +static struct irq_domain *hpet_domain; void hpet_msi_unmask(struct irq_data *data) { @@ -473,31 +473,6 @@ static int hpet_msi_next_event(unsigned long delta, return hpet_next_event(delta, evt, hdev->num); } -static int hpet_setup_msi_irq(unsigned int irq) -{ - if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) { - irq_free_hwirq(irq); - return -EINVAL; - } - return 0; -} - -static int hpet_assign_irq(struct hpet_dev *dev) -{ - unsigned int irq = irq_alloc_hwirq(-1); - - if (!irq) - return -EINVAL; - - irq_set_handler_data(irq, dev); - - if (hpet_setup_msi_irq(irq)) - return -EINVAL; - - dev->irq = irq; - return 0; -} - static irqreturn_t hpet_interrupt_handler(int irq, void *data) { struct hpet_dev *dev = (struct hpet_dev *)data; @@ -540,9 +515,6 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) if (!(hdev->flags & HPET_DEV_VALID)) return; - if (hpet_setup_msi_irq(hdev->irq)) - return; - hdev->cpu = cpu; per_cpu(cpu_hpet_dev, cpu) = hdev; evt->name = hdev->name; @@ -574,7 +546,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int id; unsigned int num_timers; unsigned int num_timers_used = 0; - int i; + int i, irq; if (hpet_msi_disable) return; @@ -587,6 +559,10 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) num_timers++; /* Value read out starts from 0 */ hpet_print_config(); + hpet_domain = hpet_create_irq_domain(hpet_blockid); + if (!hpet_domain) + return; + hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); if (!hpet_devs) return; @@ -601,15 +577,16 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) if (!(cfg & HPET_TN_FSB_CAP)) continue; + irq = hpet_assign_irq(hpet_domain, hdev, hdev->num); + if (irq < 0) + continue; + + sprintf(hdev->name, "hpet%d", i); + hdev->num = i; + hdev->irq = irq; hdev->flags = 0; if (cfg & HPET_TN_PERIODIC_CAP) hdev->flags |= HPET_DEV_PERI_CAP; - hdev->num = i; - - sprintf(hdev->name, "hpet%d", i); - if (hpet_assign_irq(hdev)) - continue; - hdev->flags |= HPET_DEV_FSB_CAP; hdev->flags |= HPET_DEV_VALID; num_timers_used++; @@ -709,10 +686,6 @@ static int hpet_cpuhp_notify(struct notifier_block *n, } #else -static int hpet_setup_msi_irq(unsigned int irq) -{ - return 0; -} static void hpet_msi_capability_lookup(unsigned int start_timer) { return; diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 009183276bb7..6185d3141219 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -173,6 +173,21 @@ static void init_thread_xstate(void) xstate_size = sizeof(struct i387_fxsave_struct); else xstate_size = sizeof(struct i387_fsave_struct); + + /* + * Quirk: we don't yet handle the XSAVES* instructions + * correctly, as we don't correctly convert between + * standard and compacted format when interfacing + * with user-space - so disable it for now. + * + * The difference is small: with recent CPUs the + * compacted format is only marginally smaller than + * the standard FPU state format. + * + * ( This is easy to backport while we are fixing + * XSAVES* support. ) + */ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); } /* diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index e7cc5370cd2f..16cb827a5b27 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -329,8 +329,8 @@ static void init_8259A(int auto_eoi) */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* ICW2: 8259A-1 IR0-7 mapped to ISA_IRQ_VECTOR(0) */ + outb_pic(ISA_IRQ_VECTOR(0), PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); @@ -342,8 +342,8 @@ static void init_8259A(int auto_eoi) outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* ICW2: 8259A-2 IR0-7 mapped to ISA_IRQ_VECTOR(8) */ + outb_pic(ISA_IRQ_VECTOR(8), PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e5952c225532..7e10c8b4b318 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -22,6 +22,12 @@ #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + +DEFINE_PER_CPU(struct pt_regs *, irq_regs); +EXPORT_PER_CPU_SYMBOL(irq_regs); + atomic_t irq_err_count; /* Function pointer for generic interrupt vector handling */ @@ -136,6 +142,18 @@ int arch_show_interrupts(struct seq_file *p, int prec) #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); #endif +#ifdef CONFIG_HAVE_KVM + seq_printf(p, "%*s: ", prec, "PIN"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); + seq_puts(p, " Posted-interrupt notification event\n"); + + seq_printf(p, "%*s: ", prec, "PIW"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->kvm_posted_intr_wakeup_ipis); + seq_puts(p, " Posted-interrupt wakeup event\n"); +#endif return 0; } @@ -192,8 +210,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) unsigned vector = ~regs->orig_ax; unsigned irq; - irq_enter(); - exit_idle(); + entering_irq(); irq = __this_cpu_read(vector_irq[vector]); @@ -209,7 +226,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) } } - irq_exit(); + exiting_irq(); set_irq_regs(old_regs); return 1; @@ -237,6 +254,18 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs) } #ifdef CONFIG_HAVE_KVM +static void dummy_handler(void) {} +static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; + +void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) +{ + if (handler) + kvm_posted_intr_wakeup_handler = handler; + else + kvm_posted_intr_wakeup_handler = dummy_handler; +} +EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); + /* * Handler for POSTED_INTERRUPT_VECTOR. */ @@ -244,16 +273,23 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - ack_APIC_irq(); - - irq_enter(); - - exit_idle(); - + entering_ack_irq(); inc_irq_stat(kvm_posted_intr_ipis); + exiting_irq(); + set_irq_regs(old_regs); +} - irq_exit(); +/* + * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. + */ +__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + entering_ack_irq(); + inc_irq_stat(kvm_posted_intr_wakeup_ipis); + kvm_posted_intr_wakeup_handler(); + exiting_irq(); set_irq_regs(old_regs); } #endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index f9fd86a7fcc7..cd74f5978ab9 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -21,12 +21,6 @@ #include <asm/apic.h> -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - #ifdef CONFIG_DEBUG_STACKOVERFLOW int sysctl_panic_on_stackoverflow __read_mostly; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 394e643d7830..bc4604e500a3 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -20,12 +20,6 @@ #include <asm/idle.h> #include <asm/apic.h> -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - int sysctl_panic_on_stackoverflow; /* diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 15d741ddfeeb..dc5fa6a1e8d6 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -10,12 +10,6 @@ #include <asm/apic.h> #include <asm/trace/irq_vectors.h> -static inline void irq_work_entering_irq(void) -{ - irq_enter(); - ack_APIC_irq(); -} - static inline void __smp_irq_work_interrupt(void) { inc_irq_stat(apic_irq_work_irqs); @@ -24,14 +18,14 @@ static inline void __smp_irq_work_interrupt(void) __visible void smp_irq_work_interrupt(struct pt_regs *regs) { - irq_work_entering_irq(); + ipi_entering_ack_irq(); __smp_irq_work_interrupt(); exiting_irq(); } __visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) { - irq_work_entering_irq(); + ipi_entering_ack_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); __smp_irq_work_interrupt(); trace_irq_work_exit(IRQ_WORK_VECTOR); diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index cd10a6437264..680723a8e4b6 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -86,7 +86,7 @@ void __init init_IRQ(void) int i; /* - * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. + * On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15. * If these IRQ's are handled by legacy interrupt-controllers like PIC, * then this configuration will likely be static after the boot. If * these IRQ's are handled by more mordern controllers like IO-APIC, @@ -94,7 +94,7 @@ void __init init_IRQ(void) * irq's migrate etc. */ for (i = 0; i < nr_legacy_irqs(); i++) - per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; + per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i; x86_init.irqs.intr_init(); } @@ -144,6 +144,8 @@ static void __init apic_intr_init(void) #ifdef CONFIG_HAVE_KVM /* IPI for KVM to deliver posted interrupt */ alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); + /* IPI for KVM to deliver interrupt to wake up tasks */ + alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); #endif /* IPI vectors for APIC spurious and error interrupts */ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 2d2a237f2c73..30ca7607cbbb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -19,8 +19,8 @@ #include <linux/module.h> #include <linux/smp.h> #include <linux/pci.h> -#include <linux/irqdomain.h> +#include <asm/irqdomain.h> #include <asm/mtrr.h> #include <asm/mpspec.h> #include <asm/pgalloc.h> @@ -113,11 +113,6 @@ static void __init MP_bus_info(struct mpc_bus *m) pr_warn("Unknown bustype %s - ignoring\n", str); } -static struct irq_domain_ops mp_ioapic_irqdomain_ops = { - .map = mp_irqdomain_map, - .unmap = mp_irqdomain_unmap, -}; - static void __init MP_ioapic_info(struct mpc_ioapic *m) { struct ioapic_domain_cfg cfg = { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c614dd492f5f..58bcfb67c01f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || +#ifdef CONFIG_X86_32 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || +#endif type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ @@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .load_sp0 = native_load_sp0, -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#if defined(CONFIG_X86_32) .irq_enable_sysexit = native_irq_enable_sysexit, #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index a1da6737ba5b..0de21c62c348 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -49,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, swapgs); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8ed2106b06da..a99900cedc22 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -302,13 +302,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0, kernel_stack, and current_top_of_stack. This changes + * Reload esp0 and cpu_current_top_of_stack. This changes * current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f174..82134506faa8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -409,9 +409,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload esp0 and ss1. This changes current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + THREAD_SIZE); - /* * Now maybe reload the debug registers and handle I/O bitmaps */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d74ac33290ae..8d04a7594a03 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1222,8 +1222,7 @@ void __init setup_arch(char **cmdline_p) init_cpu_to_node(); init_apic_mappings(); - if (x86_io_apic_ops.init) - x86_io_apic_ops.init(); + io_apic_init_mappings(); kvm_guest_init(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index be8e1bde07aa..15aaa69bbb5e 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -170,8 +170,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) asmlinkage __visible void smp_reboot_interrupt(void) { - ack_APIC_irq(); - irq_enter(); + ipi_entering_ack_irq(); stop_this_cpu(NULL); irq_exit(); } @@ -265,12 +264,6 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs) */ } -static inline void smp_entering_irq(void) -{ - ack_APIC_irq(); - irq_enter(); -} - __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) { /* @@ -279,7 +272,7 @@ __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) * scheduler_ipi(). This is OK, since those functions are allowed * to nest. */ - smp_entering_irq(); + ipi_entering_ack_irq(); trace_reschedule_entry(RESCHEDULE_VECTOR); __smp_reschedule_interrupt(); trace_reschedule_exit(RESCHEDULE_VECTOR); @@ -297,14 +290,14 @@ static inline void __smp_call_function_interrupt(void) __visible void smp_call_function_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); __smp_call_function_interrupt(); exiting_irq(); } __visible void smp_trace_call_function_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); __smp_call_function_interrupt(); trace_call_function_exit(CALL_FUNCTION_VECTOR); @@ -319,14 +312,14 @@ static inline void __smp_call_function_single_interrupt(void) __visible void smp_call_function_single_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); __smp_call_function_single_interrupt(); exiting_irq(); } __visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); __smp_call_function_single_interrupt(); trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 50e547eac8cd..fd6291c921b6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -514,6 +514,40 @@ void __inquire_remote_apic(int apicid) } /* + * The Multiprocessor Specification 1.4 (1997) example code suggests + * that there should be a 10ms delay between the BSP asserting INIT + * and de-asserting INIT, when starting a remote processor. + * But that slows boot and resume on modern processors, which include + * many cores and don't require that delay. + * + * Cmdline "init_cpu_udelay=" is available to over-ride this delay. + * Modern processor families are quirked to remove the delay entirely. + */ +#define UDELAY_10MS_DEFAULT 10000 + +static unsigned int init_udelay = UDELAY_10MS_DEFAULT; + +static int __init cpu_init_udelay(char *str) +{ + get_option(&str, &init_udelay); + + return 0; +} +early_param("cpu_init_udelay", cpu_init_udelay); + +static void __init smp_quirk_init_udelay(void) +{ + /* if cmdline changed it from default, leave it alone */ + if (init_udelay != UDELAY_10MS_DEFAULT) + return; + + /* if modern processor, use no delay */ + if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) || + ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) + init_udelay = 0; +} + +/* * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. @@ -555,7 +589,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { - unsigned long send_status, accept_status = 0; + unsigned long send_status = 0, accept_status = 0; int maxlvt, num_starts, j; maxlvt = lapic_get_maxlvt(); @@ -583,7 +617,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); - mdelay(10); + udelay(init_udelay); pr_debug("Deasserting INIT\n"); @@ -651,6 +685,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) * Give the other CPU some time to accept the IPI. */ udelay(200); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); accept_status = (apic_read(APIC_ESR) & 0xEF); @@ -792,8 +827,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) + THREAD_SIZE; } /* @@ -1176,6 +1209,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) uv_system_init(); set_mtrr_aps_delayed_init(); + + smp_quirk_init_udelay(); } void arch_enable_nonboot_cpus_begin(void) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab5247687..5e0791f9d3dc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -997,8 +997,8 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(SYSCALL_VECTOR, &system_call); - set_bit(SYSCALL_VECTOR, used_vectors); + set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call); + set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 234b0722de53..3cee10abf01d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -111,11 +111,9 @@ EXPORT_SYMBOL_GPL(x86_platform); #if defined(CONFIG_PCI_MSI) struct x86_msi_ops x86_msi = { .setup_msi_irqs = native_setup_msi_irqs, - .compose_msi_msg = native_compose_msi_msg, .teardown_msi_irq = native_teardown_msi_irq, .teardown_msi_irqs = default_teardown_msi_irqs, .restore_msi_irqs = default_restore_msi_irqs, - .setup_hpet_msi = default_setup_hpet_msi, }; /* MSI arch specific hooks */ @@ -141,13 +139,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev) #endif struct x86_io_apic_ops x86_io_apic_ops = { - .init = native_io_apic_init_mappings, .read = native_io_apic_read, - .write = native_io_apic_write, - .modify = native_io_apic_modify, .disable = native_disable_io_apic, - .print_entries = native_io_apic_print_entries, - .set_affinity = native_ioapic_set_affinity, - .setup_entry = native_setup_ioapic_entry, - .eoi_ioapic_pin = native_eoi_ioapic_pin, }; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59b69f6a2844..1d08ad3582d0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,6 +16,8 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> +#include <asm/i387.h> /* For use_eager_fpu. Ugh! */ +#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */ #include <asm/user.h> #include <asm/xsave.h> #include "cpuid.h" @@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); + vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu); + /* * The existing code assumes virtual address is 48-bit in the canonical * address checks; exit if it is ever changed. diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c3b1ad9fca81..496b3695d3d3 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 7, 0); return best && (best->ebx & bit(X86_FEATURE_RTM)); } + +static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_MPX)); +} #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d43867c33bc4..44a7d2515497 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, } } -void update_permission_bitmask(struct kvm_vcpu *vcpu, - struct kvm_mmu *mmu, bool ept) +static void update_permission_bitmask(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu, bool ept) { unsigned bit, byte, pfec; u8 map; @@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) { bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); struct kvm_mmu *context = &vcpu->arch.mmu; MMU_WARN_ON(VALID_PAGE(context->root_hpa)); @@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) context->base_role.cr0_wp = is_write_protection(vcpu); context->base_role.smep_andnot_wp = smep && !is_write_protection(vcpu); + context->base_role.smap_andnot_wp + = smap && !is_write_protection(vcpu); } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); @@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; - union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush, zap_page; + union kvm_mmu_page_role mask = (union kvm_mmu_page_role) { + .cr0_wp = 1, + .cr4_pae = 1, + .nxe = 1, + .smep_andnot_wp = 1, + .smap_andnot_wp = 1, + }; /* * If we don't have indirect shadow pages, it means no page is @@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); - mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index c7d65637c851..0ada65ecddcf 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -71,8 +71,6 @@ enum { int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); -void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - bool ept); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { @@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index = (pfec >> 1) + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); + WARN_ON(pfec & PFERR_RSVD_MASK); + return (mmu->permissions[index] >> pte_access) & 1; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index fd49c867b25a..6e6d115fe9b5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, mmu_is_nested(vcpu)); if (likely(r != RET_MMIO_PF_INVALID)) return r; + + /* + * page fault with PFEC.RSVD = 1 is caused by shadow + * page fault, should not be used to walk guest page + * table. + */ + error_code &= ~PFERR_RSVD_MASK; }; r = mmu_topup_memory_caches(vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce741b8650f6..9afa233b5482 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, + .fpu_activate = svm_fpu_activate, .fpu_deactivate = svm_fpu_deactivate, .tlb_flush = svm_flush_tlb, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b61687bd79..2d73807f0d31 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, + .fpu_activate = vmx_fpu_activate, .fpu_deactivate = vmx_fpu_deactivate, .tlb_flush = vmx_flush_tlb, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c73efcd03e29..ea306adbbc13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | - X86_CR4_PAE | X86_CR4_SMEP; + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP; + if (cr4 & CR4_RESERVED_BITS) return 1; @@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); - if ((cr4 ^ old_cr4) & X86_CR4_SMAP) - update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false); - if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) kvm_update_cpuid(vcpu); @@ -6197,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) return; page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (is_error_page(page)) + return; kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); /* @@ -7060,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) fpu_save_init(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); + if (!vcpu->arch.eager_fpu) + kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); + trace_kvm_fpu(0); } @@ -7076,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { + struct kvm_vcpu *vcpu; + if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) printk_once(KERN_WARNING "kvm: SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); - return kvm_x86_ops->vcpu_create(kvm, id); + + vcpu = kvm_x86_ops->vcpu_create(kvm, id); + + /* + * Activate fpu unconditionally in case the guest needs eager FPU. It will be + * deactivated soon if it doesn't. + */ + kvm_x86_ops->fpu_activate(vcpu); + return vcpu; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f9a133cc099..cab9aaa7802c 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -90,7 +90,7 @@ struct lguest_data lguest_data = { .noirq_iret = (u32)lguest_noirq_iret, .kernel_address = PAGE_OFFSET, .blocked_interrupts = { 1 }, /* Block timer interrupts */ - .syscall_vec = SYSCALL_VECTOR, + .syscall_vec = IA32_SYSCALL_VECTOR, }; /*G:037 @@ -866,7 +866,7 @@ static void __init lguest_init_IRQ(void) for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) { /* Some systems map "vectors" to interrupts weirdly. Not us! */ __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); - if (i != SYSCALL_VECTOR) + if (i != IA32_SYSCALL_VECTOR) set_intr_gate(i, irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR)); } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 1530afb07c85..982989d282ff 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -40,6 +40,6 @@ else lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o - lib-y += copy_user_64.o copy_user_nocache_64.o + lib-y += copy_user_64.o lib-y += cmpxchg16b_emu.o endif diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index fa997dfaef24..e4b3beee83bd 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -16,30 +16,6 @@ #include <asm/asm.h> #include <asm/smap.h> - .macro ALIGN_DESTINATION - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE(100b,103b) - _ASM_EXTABLE(101b,103b) - .endm - /* Standard copy_to_user with segment limit checking */ ENTRY(_copy_to_user) CFI_STARTPROC @@ -266,3 +242,95 @@ ENTRY(copy_user_enhanced_fast_string) _ASM_EXTABLE(1b,12b) CFI_ENDPROC ENDPROC(copy_user_enhanced_fast_string) + +/* + * copy_user_nocache - Uncached memory copy with exception handling + * This will force destination/source out of cache for more performance. + */ +ENTRY(__copy_user_nocache) + CFI_STARTPROC + ASM_STAC + cmpl $8,%edx + jb 20f /* less then 8 bytes, go to byte copy loop */ + ALIGN_DESTINATION + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx + jz 17f +1: movq (%rsi),%r8 +2: movq 1*8(%rsi),%r9 +3: movq 2*8(%rsi),%r10 +4: movq 3*8(%rsi),%r11 +5: movnti %r8,(%rdi) +6: movnti %r9,1*8(%rdi) +7: movnti %r10,2*8(%rdi) +8: movnti %r11,3*8(%rdi) +9: movq 4*8(%rsi),%r8 +10: movq 5*8(%rsi),%r9 +11: movq 6*8(%rsi),%r10 +12: movq 7*8(%rsi),%r11 +13: movnti %r8,4*8(%rdi) +14: movnti %r9,5*8(%rdi) +15: movnti %r10,6*8(%rdi) +16: movnti %r11,7*8(%rdi) + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi + decl %ecx + jnz 1b +17: movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +18: movq (%rsi),%r8 +19: movnti %r8,(%rdi) + leaq 8(%rsi),%rsi + leaq 8(%rdi),%rdi + decl %ecx + jnz 18b +20: andl %edx,%edx + jz 23f + movl %edx,%ecx +21: movb (%rsi),%al +22: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 21b +23: xorl %eax,%eax + ASM_CLAC + sfence + ret + + .section .fixup,"ax" +30: shll $6,%ecx + addl %ecx,%edx + jmp 60f +40: lea (%rdx,%rcx,8),%rdx + jmp 60f +50: movl %ecx,%edx +60: sfence + jmp copy_user_handle_tail + .previous + + _ASM_EXTABLE(1b,30b) + _ASM_EXTABLE(2b,30b) + _ASM_EXTABLE(3b,30b) + _ASM_EXTABLE(4b,30b) + _ASM_EXTABLE(5b,30b) + _ASM_EXTABLE(6b,30b) + _ASM_EXTABLE(7b,30b) + _ASM_EXTABLE(8b,30b) + _ASM_EXTABLE(9b,30b) + _ASM_EXTABLE(10b,30b) + _ASM_EXTABLE(11b,30b) + _ASM_EXTABLE(12b,30b) + _ASM_EXTABLE(13b,30b) + _ASM_EXTABLE(14b,30b) + _ASM_EXTABLE(15b,30b) + _ASM_EXTABLE(16b,30b) + _ASM_EXTABLE(18b,40b) + _ASM_EXTABLE(19b,40b) + _ASM_EXTABLE(21b,50b) + _ASM_EXTABLE(22b,50b) + CFI_ENDPROC +ENDPROC(__copy_user_nocache) diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S deleted file mode 100644 index 6a4f43c2d9e6..000000000000 --- a/arch/x86/lib/copy_user_nocache_64.S +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> - * Copyright 2002 Andi Kleen, SuSE Labs. - * Subject to the GNU Public License v2. - * - * Functions to copy from and to user space. - */ - -#include <linux/linkage.h> -#include <asm/dwarf2.h> - -#define FIX_ALIGNMENT 1 - -#include <asm/current.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> -#include <asm/asm.h> -#include <asm/smap.h> - - .macro ALIGN_DESTINATION -#ifdef FIX_ALIGNMENT - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE(100b,103b) - _ASM_EXTABLE(101b,103b) -#endif - .endm - -/* - * copy_user_nocache - Uncached memory copy with exception handling - * This will force destination/source out of cache for more performance. - */ -ENTRY(__copy_user_nocache) - CFI_STARTPROC - ASM_STAC - cmpl $8,%edx - jb 20f /* less then 8 bytes, go to byte copy loop */ - ALIGN_DESTINATION - movl %edx,%ecx - andl $63,%edx - shrl $6,%ecx - jz 17f -1: movq (%rsi),%r8 -2: movq 1*8(%rsi),%r9 -3: movq 2*8(%rsi),%r10 -4: movq 3*8(%rsi),%r11 -5: movnti %r8,(%rdi) -6: movnti %r9,1*8(%rdi) -7: movnti %r10,2*8(%rdi) -8: movnti %r11,3*8(%rdi) -9: movq 4*8(%rsi),%r8 -10: movq 5*8(%rsi),%r9 -11: movq 6*8(%rsi),%r10 -12: movq 7*8(%rsi),%r11 -13: movnti %r8,4*8(%rdi) -14: movnti %r9,5*8(%rdi) -15: movnti %r10,6*8(%rdi) -16: movnti %r11,7*8(%rdi) - leaq 64(%rsi),%rsi - leaq 64(%rdi),%rdi - decl %ecx - jnz 1b -17: movl %edx,%ecx - andl $7,%edx - shrl $3,%ecx - jz 20f -18: movq (%rsi),%r8 -19: movnti %r8,(%rdi) - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi - decl %ecx - jnz 18b -20: andl %edx,%edx - jz 23f - movl %edx,%ecx -21: movb (%rsi),%al -22: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 21b -23: xorl %eax,%eax - ASM_CLAC - sfence - ret - - .section .fixup,"ax" -30: shll $6,%ecx - addl %ecx,%edx - jmp 60f -40: lea (%rdx,%rcx,8),%rdx - jmp 60f -50: movl %ecx,%edx -60: sfence - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE(1b,30b) - _ASM_EXTABLE(2b,30b) - _ASM_EXTABLE(3b,30b) - _ASM_EXTABLE(4b,30b) - _ASM_EXTABLE(5b,30b) - _ASM_EXTABLE(6b,30b) - _ASM_EXTABLE(7b,30b) - _ASM_EXTABLE(8b,30b) - _ASM_EXTABLE(9b,30b) - _ASM_EXTABLE(10b,30b) - _ASM_EXTABLE(11b,30b) - _ASM_EXTABLE(12b,30b) - _ASM_EXTABLE(13b,30b) - _ASM_EXTABLE(14b,30b) - _ASM_EXTABLE(15b,30b) - _ASM_EXTABLE(16b,30b) - _ASM_EXTABLE(18b,40b) - _ASM_EXTABLE(19b,40b) - _ASM_EXTABLE(21b,50b) - _ASM_EXTABLE(22b,50b) - CFI_ENDPROC -ENDPROC(__copy_user_nocache) diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 9ca35fc60cfe..3a2ec8790ca7 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -82,7 +82,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) * MTRR is UC or WC. UC_MINUS gets the real intention, of the * user, which is "WC if the MTRR is WC, UC if you can't do that." */ - if (!pat_enabled && pgprot_val(prot) == + if (!pat_enabled() && pgprot_val(prot) == (__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC))) prot = __pgprot(__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 70e7444c6835..b0da3588b452 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -234,10 +234,11 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: - * pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; + * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use - * UC MINUS. + * UC MINUS. Drivers that are certain they need or can already + * be converted over to strong UC can use ioremap_uc(). */ enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS; @@ -247,6 +248,39 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) EXPORT_SYMBOL(ioremap_nocache); /** + * ioremap_uc - map bus memory into CPU space as strongly uncachable + * @phys_addr: bus address of the memory + * @size: size of the resource to map + * + * ioremap_uc performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked with a strong + * preference as completely uncachable on the CPU when possible. For non-PAT + * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT + * systems this will set the PAT entry for the pages as strong UC. This call + * will honor existing caching rules from things like the PCI bus. Note that + * there are other caches and buffers on many busses. In particular driver + * authors should read up on PCI writes. + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + * + * Must be freed with iounmap. + */ +void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size) +{ + enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC; + + return __ioremap_caller(phys_addr, size, pcm, + __builtin_return_address(0)); +} +EXPORT_SYMBOL_GPL(ioremap_uc); + +/** * ioremap_wc - map memory into CPU space write combined * @phys_addr: bus address of the memory * @size: size of the resource to map @@ -258,7 +292,7 @@ EXPORT_SYMBOL(ioremap_nocache); */ void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) { - if (pat_enabled) + if (pat_enabled()) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC, __builtin_return_address(0)); else @@ -331,7 +365,7 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -int arch_ioremap_pud_supported(void) +int __init arch_ioremap_pud_supported(void) { #ifdef CONFIG_X86_64 return cpu_has_gbpages; @@ -340,7 +374,7 @@ int arch_ioremap_pud_supported(void) #endif } -int arch_ioremap_pmd_supported(void) +int __init arch_ioremap_pmd_supported(void) { return cpu_has_pse; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 89af288ec674..70d221fe2eb4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -129,16 +129,15 @@ within(unsigned long addr, unsigned long start, unsigned long end) */ void clflush_cache_range(void *vaddr, unsigned int size) { - void *vend = vaddr + size - 1; + unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; + void *vend = vaddr + size; + void *p; mb(); - for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) - clflushopt(vaddr); - /* - * Flush any possible final partial cacheline: - */ - clflushopt(vend); + for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + p < vend; p += boot_cpu_data.x86_clflush_size) + clflushopt(p); mb(); } @@ -418,13 +417,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) phys_addr_t phys_addr; unsigned long offset; enum pg_level level; - unsigned long psize; unsigned long pmask; pte_t *pte; pte = lookup_address(virt_addr, &level); BUG_ON(!pte); - psize = page_level_size(level); pmask = page_level_mask(level); offset = virt_addr & ~pmask; phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; @@ -1468,6 +1465,9 @@ int _set_memory_uc(unsigned long addr, int numpages) { /* * for now UC MINUS. see comments in ioremap_nocache() + * If you really need strong UC use ioremap_uc(), but note + * that you cannot override IO areas with set_memory_*() as + * these helpers cannot work with IO memory. */ return change_page_attr_set(&addr, numpages, cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), @@ -1571,7 +1571,7 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - if (!pat_enabled) + if (!pat_enabled()) return set_memory_uc(addr, numpages); ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 35af6771a95a..a1c96544099d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -33,13 +33,15 @@ #include "pat_internal.h" #include "mm_internal.h" -#ifdef CONFIG_X86_PAT -int __read_mostly pat_enabled = 1; +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); static inline void pat_disable(const char *reason) { - pat_enabled = 0; - printk(KERN_INFO "%s\n", reason); + __pat_enabled = 0; + pr_info("x86/PAT: %s\n", reason); } static int __init nopat(char *str) @@ -48,13 +50,12 @@ static int __init nopat(char *str) return 0; } early_param("nopat", nopat); -#else -static inline void pat_disable(const char *reason) + +bool pat_enabled(void) { - (void)reason; + return !!__pat_enabled; } -#endif - +EXPORT_SYMBOL_GPL(pat_enabled); int pat_debug_enable; @@ -188,7 +189,7 @@ void pat_init_cache_modes(void) pat_msg + 4 * i); update_cache_mode_entry(i, cache); } - pr_info("PAT configuration [0-7]: %s\n", pat_msg); + pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); } #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) @@ -198,7 +199,7 @@ void pat_init(void) u64 pat; bool boot_cpu = !boot_pat_state; - if (!pat_enabled) + if (!pat_enabled()) return; if (!cpu_has_pat) { @@ -211,8 +212,7 @@ void pat_init(void) * switched to PAT on the boot CPU. We have no way to * undo PAT. */ - printk(KERN_ERR "PAT enabled, " - "but not supported by secondary CPU\n"); + pr_err("x86/PAT: PAT enabled, but not supported by secondary CPU\n"); BUG(); } } @@ -267,9 +267,9 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, * request is for WB. */ if (req_type == _PAGE_CACHE_MODE_WB) { - u8 mtrr_type; + u8 mtrr_type, uniform; - mtrr_type = mtrr_type_lookup(start, end); + mtrr_type = mtrr_type_lookup(start, end, &uniform); if (mtrr_type != MTRR_TYPE_WRBACK) return _PAGE_CACHE_MODE_UC_MINUS; @@ -347,7 +347,7 @@ static int reserve_ram_pages_type(u64 start, u64 end, page = pfn_to_page(pfn); type = get_page_memtype(page); if (type != -1) { - pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", + pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", start, end - 1, type, req_type); if (new_type) *new_type = type; @@ -400,7 +400,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, BUG_ON(start >= end); /* end is exclusive */ - if (!pat_enabled) { + if (!pat_enabled()) { /* This is identical to page table setting without PAT */ if (new_type) { if (req_type == _PAGE_CACHE_MODE_WC) @@ -451,9 +451,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, err = rbt_memtype_check_insert(new, new_type); if (err) { - printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", - start, end - 1, - cattr_name(new->type), cattr_name(req_type)); + pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", + start, end - 1, + cattr_name(new->type), cattr_name(req_type)); kfree(new); spin_unlock(&memtype_lock); @@ -475,7 +475,7 @@ int free_memtype(u64 start, u64 end) int is_range_ram; struct memtype *entry; - if (!pat_enabled) + if (!pat_enabled()) return 0; /* Low ISA region is always mapped WB. No need to track */ @@ -497,8 +497,8 @@ int free_memtype(u64 start, u64 end) spin_unlock(&memtype_lock); if (!entry) { - printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", - current->comm, current->pid, start, end - 1); + pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", + current->comm, current->pid, start, end - 1); return -EINVAL; } @@ -623,13 +623,13 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) u64 to = from + size; u64 cursor = from; - if (!pat_enabled) + if (!pat_enabled()) return 1; while (cursor < to) { if (!devmem_is_allowed(pfn)) { - printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", - current->comm, from, to - 1); + pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", + current->comm, from, to - 1); return 0; } cursor += PAGE_SIZE; @@ -659,7 +659,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * caching for the high addresses through the KEN pin, but * we maintain the tradition of paranoia in this code. */ - if (!pat_enabled && + if (!pat_enabled() && !(boot_cpu_has(X86_FEATURE_MTRR) || boot_cpu_has(X86_FEATURE_K6_MTRR) || boot_cpu_has(X86_FEATURE_CYRIX_ARR) || @@ -698,8 +698,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, size; if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { - printk(KERN_INFO "%s:%d ioremap_change_attr failed %s " - "for [mem %#010Lx-%#010Lx]\n", + pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", current->comm, current->pid, cattr_name(pcm), base, (unsigned long long)(base + size-1)); @@ -729,12 +728,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, * the type requested matches the type of first page in the range. */ if (is_ram) { - if (!pat_enabled) + if (!pat_enabled()) return 0; pcm = lookup_memtype(paddr); if (want_pcm != pcm) { - printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", + pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, cattr_name(want_pcm), (unsigned long long)paddr, @@ -755,13 +754,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, if (strict_prot || !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { free_memtype(paddr, paddr + size); - printk(KERN_ERR "%s:%d map pfn expected mapping type %s" - " for [mem %#010Lx-%#010Lx], got %s\n", - current->comm, current->pid, - cattr_name(want_pcm), - (unsigned long long)paddr, - (unsigned long long)(paddr + size - 1), - cattr_name(pcm)); + pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", + current->comm, current->pid, + cattr_name(want_pcm), + (unsigned long long)paddr, + (unsigned long long)(paddr + size - 1), + cattr_name(pcm)); return -EINVAL; } /* @@ -844,7 +842,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return ret; } - if (!pat_enabled) + if (!pat_enabled()) return 0; /* @@ -872,7 +870,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, { enum page_cache_mode pcm; - if (!pat_enabled) + if (!pat_enabled()) return 0; /* Set prot based on lookup */ @@ -913,7 +911,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, pgprot_t pgprot_writecombine(pgprot_t prot) { - if (pat_enabled) + if (pat_enabled()) return __pgprot(pgprot_val(prot) | cachemode2protval(_PAGE_CACHE_MODE_WC)); else @@ -996,7 +994,7 @@ static const struct file_operations memtype_fops = { static int __init pat_memtype_list_init(void) { - if (pat_enabled) { + if (pat_enabled()) { debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, NULL, &memtype_fops); } diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h index f6411620305d..a739bfc40690 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat_internal.h @@ -4,7 +4,7 @@ extern int pat_debug_enable; #define dprintk(fmt, arg...) \ - do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) + do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0) struct memtype { u64 start; diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 6582adcc8bd9..63931080366a 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -160,9 +160,9 @@ success: return 0; failure: - printk(KERN_INFO "%s:%d conflicting memory types " - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, - end, cattr_name(found_type), cattr_name(match->type)); + pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", + current->comm, current->pid, start, end, + cattr_name(found_type), cattr_name(match->type)); return -EBUSY; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 0b97d2c75df3..fb0a9dd1d6e4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -563,16 +563,31 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, } #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +/** + * pud_set_huge - setup kernel PUD mapping + * + * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this + * function sets up a huge page only if any of the following conditions are met: + * + * - MTRRs are disabled, or + * + * - MTRRs are enabled and the range is completely covered by a single MTRR, or + * + * - MTRRs are enabled and the corresponding MTRR memory type is WB, which + * has no effect on the requested PAT memory type. + * + * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger + * page mapping attempt fails. + * + * Returns 1 on success and 0 on failure. + */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { - u8 mtrr; + u8 mtrr, uniform; - /* - * Do not use a huge page when the range is covered by non-WB type - * of MTRRs. - */ - mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); + if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && + (mtrr != MTRR_TYPE_WRBACK)) return 0; prot = pgprot_4k_2_large(prot); @@ -584,17 +599,24 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pmd_set_huge - setup kernel PMD mapping + * + * See text over pud_set_huge() above. + * + * Returns 1 on success and 0 on failure. + */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { - u8 mtrr; + u8 mtrr, uniform; - /* - * Do not use a huge page when the range is covered by non-WB type - * of MTRRs. - */ - mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); + if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && + (mtrr != MTRR_TYPE_WRBACK)) { + pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n", + __func__, addr, addr + PMD_SIZE); return 0; + } prot = pgprot_4k_2_large(prot); @@ -605,6 +627,11 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pud_clear_huge - clear kernel PUD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PUD map is found). + */ int pud_clear_huge(pud_t *pud) { if (pud_large(*pud)) { @@ -615,6 +642,11 @@ int pud_clear_huge(pud_t *pud) return 0; } +/** + * pmd_clear_huge - clear kernel PMD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PMD map is found). + */ int pmd_clear_huge(pmd_t *pmd) { if (pmd_large(*pmd)) { diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 987514396c1e..ddeff4844a10 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -559,6 +559,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, if (is_ereg(dst_reg)) EMIT1(0x41); EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); + + /* emit 'movzwl eax, ax' */ + if (is_ereg(dst_reg)) + EMIT3(0x45, 0x0F, 0xB7); + else + EMIT2(0x0F, 0xB7); + EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: /* emit 'bswap eax' to swap lower 4 bytes */ @@ -577,6 +584,27 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm32) { + case 16: + /* emit 'movzwl eax, ax' to zero extend 16-bit + * into 64 bit + */ + if (is_ereg(dst_reg)) + EMIT3(0x45, 0x0F, 0xB7); + else + EMIT2(0x0F, 0xB7); + EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); + break; + case 32: + /* emit 'mov eax, eax' to clear upper 32-bits */ + if (is_ereg(dst_reg)) + EMIT1(0x45); + EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); + break; + case 64: + /* nop */ + break; + } break; /* ST: *(u8*)(dst_reg + off) = imm */ @@ -938,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog) } ctx.cleanup_addr = proglen; - for (pass = 0; pass < 10; pass++) { + /* JITed image shrinks with every pass and the loop iterates + * until the image stops shrinking. Very large bpf programs + * may converge on the last pass. In such case do one more + * pass to emit the final image + */ + for (pass = 0; pass < 10 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { image = NULL; diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d93963340c3c..14a63ed6fe09 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -482,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - struct pci_sysdata *sd = bridge->bus->sysdata; - - ACPI_COMPANION_SET(&bridge->dev, sd->companion); + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_sysdata *sd = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, sd->companion); + } return 0; } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 349c0d32cc0b..0a9f2caf358f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -429,12 +429,12 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, * Caller can followup with UC MINUS request and add a WC mtrr if there * is a free mtrr slot. */ - if (!pat_enabled && write_combine) + if (!pat_enabled() && write_combine) return -EINVAL; - if (pat_enabled && write_combine) + if (pat_enabled() && write_combine) prot |= cachemode2protval(_PAGE_CACHE_MODE_WC); - else if (pat_enabled || boot_cpu_data.x86 > 3) + else if (pat_enabled() || boot_cpu_data.x86 > 3) /* * ioremap() and ioremap_nocache() defaults to UC MINUS for now. * To avoid attribute conflicts, request UC MINUS here diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 852aa4c92da0..27062303c881 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -208,6 +208,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, static int intel_mid_pci_irq_enable(struct pci_dev *dev) { + struct irq_alloc_info info; int polarity; if (dev->irq_managed && dev->irq > 0) @@ -217,14 +218,13 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) polarity = 0; /* active high */ else polarity = 1; /* active low */ + ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity); /* * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to * IOAPIC RTE entries, so we just enable RTE for the device. */ - if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev))) - return -EBUSY; - if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0) + if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC, &info) < 0) return -EBUSY; dev->irq_managed = 1; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 5dc6ca5e1741..9bd115484745 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -146,19 +146,20 @@ static void __init pirq_peer_trick(void) /* * Code for querying and setting of IRQ routes on various interrupt routers. + * PIC Edge/Level Control Registers (ELCR) 0x4d0 & 0x4d1. */ -void eisa_set_level_irq(unsigned int irq) +void elcr_set_level_irq(unsigned int irq) { unsigned char mask = 1 << (irq & 7); unsigned int port = 0x4d0 + (irq >> 3); unsigned char val; - static u16 eisa_irq_mask; + static u16 elcr_irq_mask; - if (irq >= 16 || (1 << irq) & eisa_irq_mask) + if (irq >= 16 || (1 << irq) & elcr_irq_mask) return; - eisa_irq_mask |= (1 << irq); + elcr_irq_mask |= (1 << irq); printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq); val = inb(port); if (!(val & mask)) { @@ -965,11 +966,11 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { msg = "found"; - eisa_set_level_irq(irq); + elcr_set_level_irq(irq); } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { if (r->set(pirq_router_dev, dev, pirq, newirq)) { - eisa_set_level_irq(newirq); + elcr_set_level_irq(newirq); msg = "assigned"; irq = newirq; } diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index a62e0be3a2f1..f1a6c8e86ddd 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,4 +1,5 @@ # Platform specific code goes here +obj-y += atom/ obj-y += ce4100/ obj-y += efi/ obj-y += geode/ diff --git a/arch/x86/platform/atom/Makefile b/arch/x86/platform/atom/Makefile new file mode 100644 index 000000000000..0a3a40cbc794 --- /dev/null +++ b/arch/x86/platform/atom/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c new file mode 100644 index 000000000000..5ca8ead91579 --- /dev/null +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -0,0 +1,183 @@ +/* + * Intel SOC Punit device state debug driver + * Punit controls power management for North Complex devices (Graphics + * blocks, Image Signal Processing, video processing, display, DSP etc.) + * + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/io.h> +#include <asm/cpu_device_id.h> +#include <asm/iosf_mbi.h> + +/* Side band Interface port */ +#define PUNIT_PORT 0x04 +/* Power gate status reg */ +#define PWRGT_STATUS 0x61 +/* Subsystem config/status Video processor */ +#define VED_SS_PM0 0x32 +/* Subsystem config/status ISP (Image Signal Processor) */ +#define ISP_SS_PM0 0x39 +/* Subsystem config/status Input/output controller */ +#define MIO_SS_PM 0x3B +/* Shift bits for getting status for video, isp and i/o */ +#define SSS_SHIFT 24 +/* Shift bits for getting status for graphics rendering */ +#define RENDER_POS 0 +/* Shift bits for getting status for media control */ +#define MEDIA_POS 2 +/* Shift bits for getting status for Valley View/Baytrail display */ +#define VLV_DISPLAY_POS 6 +/* Subsystem config/status display for Cherry Trail SOC */ +#define CHT_DSP_SSS 0x36 +/* Shift bits for getting status for display */ +#define CHT_DSP_SSS_POS 16 + +struct punit_device { + char *name; + int reg; + int sss_pos; +}; + +static const struct punit_device punit_device_byt[] = { + { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, + { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, + { "DISPLAY", PWRGT_STATUS, VLV_DISPLAY_POS }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const struct punit_device punit_device_cht[] = { + { "GFX RENDER", PWRGT_STATUS, RENDER_POS }, + { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS }, + { "DISPLAY", CHT_DSP_SSS, CHT_DSP_SSS_POS }, + { "VED", VED_SS_PM0, SSS_SHIFT }, + { "ISP", ISP_SS_PM0, SSS_SHIFT }, + { "MIO", MIO_SS_PM, SSS_SHIFT }, + { NULL } +}; + +static const char * const dstates[] = {"D0", "D0i1", "D0i2", "D0i3"}; + +static int punit_dev_state_show(struct seq_file *seq_file, void *unused) +{ + u32 punit_pwr_status; + struct punit_device *punit_devp = seq_file->private; + int index; + int status; + + seq_puts(seq_file, "\n\nPUNIT NORTH COMPLEX DEVICES :\n"); + while (punit_devp->name) { + status = iosf_mbi_read(PUNIT_PORT, BT_MBI_PMC_READ, + punit_devp->reg, + &punit_pwr_status); + if (status) { + seq_printf(seq_file, "%9s : Read Failed\n", + punit_devp->name); + } else { + index = (punit_pwr_status >> punit_devp->sss_pos) & 3; + seq_printf(seq_file, "%9s : %s\n", punit_devp->name, + dstates[index]); + } + punit_devp++; + } + + return 0; +} + +static int punit_dev_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, punit_dev_state_show, inode->i_private); +} + +static const struct file_operations punit_dev_state_ops = { + .open = punit_dev_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *punit_dbg_file; + +static int punit_dbgfs_register(struct punit_device *punit_device) +{ + static struct dentry *dev_state; + + punit_dbg_file = debugfs_create_dir("punit_atom", NULL); + if (!punit_dbg_file) + return -ENXIO; + + dev_state = debugfs_create_file("dev_power_state", S_IFREG | S_IRUGO, + punit_dbg_file, punit_device, + &punit_dev_state_ops); + if (!dev_state) { + pr_err("punit_dev_state register failed\n"); + debugfs_remove(punit_dbg_file); + return -ENXIO; + } + + return 0; +} + +static void punit_dbgfs_unregister(void) +{ + debugfs_remove_recursive(punit_dbg_file); +} + +#define ICPU(model, drv_data) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT,\ + (kernel_ulong_t)&drv_data } + +static const struct x86_cpu_id intel_punit_cpu_ids[] = { + ICPU(55, punit_device_byt), /* Valleyview, Bay Trail */ + ICPU(76, punit_device_cht), /* Braswell, Cherry Trail */ + {} +}; + +MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids); + +static int __init punit_atom_debug_init(void) +{ + const struct x86_cpu_id *id; + int ret; + + id = x86_match_cpu(intel_punit_cpu_ids); + if (!id) + return -ENODEV; + + ret = punit_dbgfs_register((struct punit_device *)id->driver_data); + if (ret < 0) + return ret; + + return 0; +} + +static void __exit punit_atom_debug_exit(void) +{ + punit_dbgfs_unregister(); +} + +module_init(punit_atom_debug_init); +module_exit(punit_atom_debug_exit); + +MODULE_AUTHOR("Kumar P, Mahesh <mahesh.kumar.p@intel.com>"); +MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); +MODULE_DESCRIPTION("Driver for Punit devices states debugging"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c index 0b283d4d0ad7..de734134bc8d 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c @@ -27,6 +27,7 @@ static struct platform_device wdt_dev = { static int tangier_probe(struct platform_device *pdev) { int gsi; + struct irq_alloc_info info; struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data; if (!pdata) @@ -34,8 +35,8 @@ static int tangier_probe(struct platform_device *pdev) /* IOAPIC builds identity mapping between GSI and IRQ on MID */ gsi = pdata->irq; - if (mp_set_gsi_attr(gsi, 1, 0, cpu_to_node(0)) || - mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC) <= 0) { + ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0); + if (mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) { dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", gsi); return -EINVAL; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 3005f0c89f2e..01d54ea766c1 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -81,26 +81,34 @@ static unsigned long __init intel_mid_calibrate_tsc(void) return 0; } +static void __init intel_mid_setup_bp_timer(void) +{ + apbt_time_init(); + setup_boot_APIC_clock(); +} + static void __init intel_mid_time_init(void) { sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); + switch (intel_mid_timer_options) { case INTEL_MID_TIMER_APBT_ONLY: break; case INTEL_MID_TIMER_LAPIC_APBT: - x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; + /* Use apbt and local apic */ + x86_init.timers.setup_percpu_clockev = intel_mid_setup_bp_timer; x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; - break; + return; default: if (!boot_cpu_has(X86_FEATURE_ARAT)) break; + /* Lapic only, no apbt */ x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; return; } - /* we need at least one APB timer */ - pre_init_apic_IRQ0(); - apbt_time_init(); + + x86_init.timers.setup_percpu_clockev = apbt_time_init; } static void intel_mid_arch_setup(void) diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index c14ad34776c4..ce992e8cc065 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -95,18 +95,16 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table) pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz, irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->freq_hz, pentry->irq); - if (!pentry->irq) - continue; - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; -/* triggering mode edge bit 2-3, active high polarity bit 0-1 */ - mp_irq.irqflag = 5; - mp_irq.srcbus = MP_BUS_ISA; - mp_irq.srcbusirq = pentry->irq; /* IRQ */ - mp_irq.dstapic = MP_APIC_ALL; - mp_irq.dstirq = pentry->irq; - mp_save_irq(&mp_irq); - mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ + mp_irq.irqflag = 5; + mp_irq.srcbus = MP_BUS_ISA; + mp_irq.srcbusirq = pentry->irq; /* IRQ */ + mp_irq.dstapic = MP_APIC_ALL; + mp_irq.dstirq = pentry->irq; + mp_save_irq(&mp_irq); + mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL); } return 0; @@ -177,7 +175,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) mp_irq.dstapic = MP_APIC_ALL; mp_irq.dstirq = pentry->irq; mp_save_irq(&mp_irq); - mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); + mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL); } return 0; } @@ -436,6 +434,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) struct devs_id *dev = NULL; int num, i, ret; int polarity; + struct irq_alloc_info info; sb = (struct sfi_table_simple *)table; num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); @@ -469,9 +468,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) polarity = 1; } - ret = mp_set_gsi_attr(irq, 1, polarity, NUMA_NO_NODE); - if (ret == 0) - ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC); + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, polarity); + ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC, &info); WARN_ON(ret < 0); } diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index 2a8a74f3bd76..6c7111bbd1e9 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -25,8 +25,8 @@ #include <linux/init.h> #include <linux/sfi.h> #include <linux/io.h> -#include <linux/irqdomain.h> +#include <asm/irqdomain.h> #include <asm/io_apic.h> #include <asm/mpspec.h> #include <asm/setup.h> @@ -71,9 +71,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC -static struct irq_domain_ops sfi_ioapic_irqdomain_ops = { - .map = mp_irqdomain_map, -}; static int __init sfi_parse_ioapic(struct sfi_table_header *table) { @@ -82,7 +79,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) int i, num; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_STRICT, - .ops = &sfi_ioapic_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; sb = (struct sfi_table_simple *)table; diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 0ce673645432..8570abe68be1 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -13,22 +13,37 @@ #include <linux/slab.h> #include <linux/irq.h> +#include <asm/irqdomain.h> #include <asm/apic.h> #include <asm/uv/uv_irq.h> #include <asm/uv/uv_hub.h> /* MMR offset and pnode of hub sourcing interrupts for a given irq */ -struct uv_irq_2_mmr_pnode{ - struct rb_node list; +struct uv_irq_2_mmr_pnode { unsigned long offset; int pnode; - int irq; }; -static DEFINE_SPINLOCK(uv_irq_lock); -static struct rb_root uv_irq_root; +static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = cfg->dest_apicid; -static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); + uv_write_global_mmr64(info->pnode, info->offset, mmr_value); +} static void uv_noop(struct irq_data *data) { } @@ -37,6 +52,23 @@ static void uv_ack_apic(struct irq_data *data) ack_APIC_irq(); } +static int +uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0) { + uv_program_mmr(cfg, data->chip_data); + send_cleanup_vector(cfg); + } + + return ret; +} + static struct irq_chip uv_irq_chip = { .name = "UV-CORE", .irq_mask = uv_noop, @@ -45,189 +77,99 @@ static struct irq_chip uv_irq_chip = { .irq_set_affinity = uv_set_irq_affinity, }; -/* - * Add offset and pnode information of the hub sourcing interrupts to the - * rb tree for a specific irq. - */ -static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - struct rb_node **link = &uv_irq_root.rb_node; - struct rb_node *parent = NULL; - struct uv_irq_2_mmr_pnode *n; - struct uv_irq_2_mmr_pnode *e; - unsigned long irqflags; - - n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, - uv_blade_to_memory_nid(blade)); - if (!n) + struct uv_irq_2_mmr_pnode *chip_data; + struct irq_alloc_info *info = arg; + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + int ret; + + if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV) + return -EINVAL; + + chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL, + irq_data->node); + if (!chip_data) return -ENOMEM; - n->irq = irq; - n->offset = offset; - n->pnode = uv_blade_to_pnode(blade); - spin_lock_irqsave(&uv_irq_lock, irqflags); - /* Find the right place in the rbtree: */ - while (*link) { - parent = *link; - e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); - - if (unlikely(irq == e->irq)) { - /* irq entry exists */ - e->pnode = uv_blade_to_pnode(blade); - e->offset = offset; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - kfree(n); - return 0; - } - - if (irq < e->irq) - link = &(*link)->rb_left; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret >= 0) { + if (info->uv_limit == UV_AFFINITY_CPU) + irq_set_status_flags(virq, IRQ_NO_BALANCING); else - link = &(*link)->rb_right; + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); + + chip_data->pnode = uv_blade_to_pnode(info->uv_blade); + chip_data->offset = info->uv_offset; + irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data, + handle_percpu_irq, NULL, info->uv_name); + } else { + kfree(chip_data); } - /* Insert the node into the rbtree. */ - rb_link_node(&n->list, parent, link); - rb_insert_color(&n->list, &uv_irq_root); - - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; + return ret; } -/* Retrieve offset and pnode information from the rb tree for a specific irq */ -int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +static void uv_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - - if (e->irq == irq) { - *offset = e->offset; - *pnode = e->pnode; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; - } - - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return -1; + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + + BUG_ON(nr_irqs != 1); + kfree(irq_data->chip_data); + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); + irq_clear_status_flags(virq, IRQ_NO_BALANCING); + irq_domain_free_irqs_top(domain, virq, nr_irqs); } /* * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. */ -static int -arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset, int limit) +static void uv_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data) { - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode, err; - unsigned int dest; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); - if (err != 0) - return err; - - if (limit == UV_AFFINITY_CPU) - irq_set_status_flags(irq, IRQ_NO_BALANCING); - else - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - - irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); } /* * Disable the specified MMR located on the specified blade so that MSIs are * longer allowed to be sent. */ -static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +static void uv_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) { unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; entry->mask = 1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); } -static int -uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest; - unsigned long mmr_value, mmr_offset; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - if (apic_set_affinity(data, mask, &dest)) - return -1; - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - /* Get previously stored MMR and pnode of hub sourcing interrupts */ - if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) - return -1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +static const struct irq_domain_ops uv_domain_ops = { + .alloc = uv_domain_alloc, + .free = uv_domain_free, + .activate = uv_domain_activate, + .deactivate = uv_domain_deactivate, +}; - if (cfg->move_in_progress) - send_cleanup_vector(cfg); +static struct irq_domain *uv_get_irq_domain(void) +{ + static struct irq_domain *uv_domain; + static DEFINE_MUTEX(uv_lock); + + mutex_lock(&uv_lock); + if (uv_domain == NULL) { + uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL); + if (uv_domain) + uv_domain->parent = x86_vector_domain; + } + mutex_unlock(&uv_lock); - return IRQ_SET_MASK_OK_NOCOPY; + return uv_domain; } /* @@ -238,19 +180,21 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { - int ret, irq = irq_alloc_hwirq(uv_blade_to_memory_nid(mmr_blade)); + struct irq_alloc_info info; + struct irq_domain *domain = uv_get_irq_domain(); - if (!irq) - return -EBUSY; + if (!domain) + return -ENOMEM; - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, - limit); - if (ret == irq) - uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); - else - irq_free_hwirq(irq); + init_irq_alloc_info(&info, cpumask_of(cpu)); + info.type = X86_IRQ_ALLOC_TYPE_UV; + info.uv_limit = limit; + info.uv_blade = mmr_blade; + info.uv_offset = mmr_offset; + info.uv_name = irq_name; - return ret; + return irq_domain_alloc_irqs(domain, 1, + uv_blade_to_memory_nid(mmr_blade), &info); } EXPORT_SYMBOL_GPL(uv_setup_irq); @@ -263,26 +207,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); */ void uv_teardown_irq(unsigned int irq) { - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - if (e->irq == irq) { - arch_disable_uv_irq(e->pnode, e->offset); - rb_erase(n, &uv_irq_root); - kfree(e); - break; - } - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - irq_free_hwirq(irq); + irq_domain_free_irqs(irq, 1); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 3c4469a7a929..e2386cb4e0c3 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -78,9 +78,9 @@ ENTRY(restore_image) /* code below has been relocated to a safe page */ ENTRY(core_restore_code) -loop: +.Lloop: testq %rdx, %rdx - jz done + jz .Ldone /* get addresses from the pbe and copy the page */ movq pbe_address(%rdx), %rsi @@ -91,8 +91,8 @@ loop: /* progress to the next pbe */ movq pbe_next(%rdx), %rdx - jmp loop -done: + jmp .Lloop +.Ldone: /* jump to the restore_registers address from the image header */ jmpq *%rax /* diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 275a3a8b78af..e97032069f88 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) -HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi +HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060..fe969ac1c65e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1181,10 +1181,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tscp = native_read_tscp, .iret = xen_iret, - .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, .usergs_sysret64 = xen_sysret64, +#else + .irq_enable_sysexit = xen_sysexit, #endif .load_tr_desc = paravirt_nop, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 985fc3ee0973..04529e620559 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,8 @@ #include <asm/percpu.h> #include <asm/processor-flags.h> #include <asm/segment.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> #include <xen/interface/xen.h> @@ -47,29 +49,13 @@ ENTRY(xen_iret) ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) -/* - * sysexit is not used for 64-bit processes, so it's only ever used to - * return to 32-bit compat userspace. - */ -ENTRY(xen_sysexit) - pushq $__USER32_DS - pushq %rcx - pushq $X86_EFLAGS_IF - pushq $__USER32_CS - pushq %rdx - - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_sysexit) -RELOC(xen_sysexit, 1b+1) - ENTRY(xen_sysret64) /* * We're already on the usermode stack at this point, but * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq $__USER_DS pushq PER_CPU_VAR(rsp_scratch) @@ -88,7 +74,7 @@ ENTRY(xen_sysret32) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq $__USER32_DS pushq PER_CPU_VAR(rsp_scratch) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549..c20fe29e65f4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); +#ifdef CONFIG_X86_32 __visible void xen_sysexit(void); +#endif __visible void xen_sysret32(void); __visible void xen_sysret64(void); __visible void xen_adjust_exception_frame(void); diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 172a02a6ad14..ba78ccf651e7 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt, return -EINVAL; } +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + return NULL; +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ +} + #endif /* _XTENSA_DMA_MAPPING_H */ |