summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig36
-rw-r--r--arch/x86/boot/compressed/eboot.c3
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/boot/header.S3
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S4
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c12
-rw-r--r--arch/x86/ia32/ia32_signal.c8
-rw-r--r--arch/x86/include/asm/efi.h100
-rw-r--r--arch/x86/include/asm/elf.h35
-rw-r--r--arch/x86/include/asm/espfix.h16
-rw-r--r--arch/x86/include/asm/fixmap.h11
-rw-r--r--arch/x86/include/asm/fpu-internal.h10
-rw-r--r--arch/x86/include/asm/io_apic.h2
-rw-r--r--arch/x86/include/asm/irq_remapping.h3
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mmu.h2
-rw-r--r--arch/x86/include/asm/pgtable-2level.h59
-rw-r--r--arch/x86/include/asm/pgtable.h20
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h66
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/setup.h2
-rw-r--r--arch/x86/include/asm/signal.h6
-rw-r--r--arch/x86/include/asm/swiotlb.h7
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/vdso.h78
-rw-r--r--arch/x86/include/asm/vdso32.h11
-rw-r--r--arch/x86/include/asm/vvar.h20
-rw-r--r--arch/x86/include/uapi/asm/vsyscall.h7
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c130
-rw-r--r--arch/x86/kernel/cpu/common.c33
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/devicetree.c12
-rw-r--r--arch/x86/kernel/early-quirks.c46
-rw-r--r--arch/x86/kernel/entry_32.S17
-rw-r--r--arch/x86/kernel/entry_64.S285
-rw-r--r--arch/x86/kernel/espfix_64.c209
-rw-r--r--arch/x86/kernel/ftrace.c56
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/irq.c16
-rw-r--r--arch/x86/kernel/ldt.c10
-rw-r--r--arch/x86/kernel/mcount_64.S217
-rw-r--r--arch/x86/kernel/pci-dma.c11
-rw-r--r--arch/x86/kernel/pci-swiotlb.c9
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/smpboot.c12
-rw-r--r--arch/x86/kernel/vsyscall_64.c15
-rw-r--r--arch/x86/mm/dump_pagetables.c44
-rw-r--r--arch/x86/mm/fault.c5
-rw-r--r--arch/x86/mm/hugetlbpage.c10
-rw-r--r--arch/x86/mm/init_64.c59
-rw-r--r--arch/x86/mm/ioremap.c6
-rw-r--r--arch/x86/mm/numa.c6
-rw-r--r--arch/x86/mm/pageattr-test.c2
-rw-r--r--arch/x86/mm/pgtable.c6
-rw-r--r--arch/x86/pci/sta2x11-fixup.c6
-rw-r--r--arch/x86/platform/efi/efi.c51
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S81
-rw-r--r--arch/x86/platform/intel-mid/device_libs/Makefile1
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_wdt.c72
-rw-r--r--arch/x86/platform/uv/bios_uv.c2
-rw-r--r--arch/x86/platform/uv/uv_irq.c10
-rw-r--r--arch/x86/platform/uv/uv_nmi.c2
-rw-r--r--arch/x86/syscalls/syscall_64.tbl6
-rw-r--r--arch/x86/um/vdso/vma.c2
-rw-r--r--arch/x86/vdso/.gitignore5
-rw-r--r--arch/x86/vdso/Makefile91
-rw-r--r--arch/x86/vdso/vclock_gettime.c26
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S40
-rw-r--r--arch/x86/vdso/vdso.S3
-rw-r--r--arch/x86/vdso/vdso.lds.S7
-rw-r--r--arch/x86/vdso/vdso2c.c175
-rw-r--r--arch/x86/vdso/vdso2c.h163
-rw-r--r--arch/x86/vdso/vdso32-setup.c211
-rw-r--r--arch/x86/vdso/vdso32.S9
-rw-r--r--arch/x86/vdso/vdso32/vdso32.lds.S15
-rw-r--r--arch/x86/vdso/vdsox32.S3
-rw-r--r--arch/x86/vdso/vdsox32.lds.S7
-rw-r--r--arch/x86/vdso/vma.c236
-rw-r--r--arch/x86/xen/mmu.c8
-rw-r--r--arch/x86/xen/setup.c11
86 files changed, 1675 insertions, 1349 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7d5feb5908dd..b660088c220d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,7 +26,7 @@ config X86
select ARCH_MIGHT_HAVE_PC_SERIO
select HAVE_AOUT if X86_32
select HAVE_UNSTABLE_SCHED_CLOCK
- select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
select ARCH_SUPPORTS_INT128 if X86_64
select ARCH_WANTS_PROT_NUMA_PROT_NONE
select HAVE_IDE
@@ -41,7 +41,7 @@ config X86
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_FRAME_POINTERS
select HAVE_DMA_ATTRS
- select HAVE_DMA_CONTIGUOUS if !SWIOTLB
+ select HAVE_DMA_CONTIGUOUS
select HAVE_KRETPROBES
select GENERIC_EARLY_IOREMAP
select HAVE_OPTPROBES
@@ -105,7 +105,7 @@ config X86
select HAVE_ARCH_SECCOMP_FILTER
select BUILDTIME_EXTABLE_SORT
select GENERIC_CMOS_UPDATE
- select HAVE_ARCH_SOFT_DIRTY
+ select HAVE_ARCH_SOFT_DIRTY if X86_64
select CLOCKSOURCE_WATCHDOG
select GENERIC_CLOCKEVENTS
select ARCH_CLOCKSOURCE_DATA
@@ -833,6 +833,7 @@ config X86_LOCAL_APIC
config X86_IO_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
+ select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
bool "Reroute for broken boot IRQs"
@@ -911,10 +912,27 @@ config VM86
default y
depends on X86_32
---help---
- This option is required by programs like DOSEMU to run 16-bit legacy
- code on X86 processors. It also may be needed by software like
- XFree86 to initialize some video cards via BIOS. Disabling this
- option saves about 6k.
+ This option is required by programs like DOSEMU to run
+ 16-bit real mode legacy code on x86 processors. It also may
+ be needed by software like XFree86 to initialize some video
+ cards via BIOS. Disabling this option saves about 6K.
+
+config X86_16BIT
+ bool "Enable support for 16-bit segments" if EXPERT
+ default y
+ ---help---
+ This option is required by programs like Wine to run 16-bit
+ protected mode legacy code on x86 processors. Disabling
+ this option saves about 300 bytes on i386, or around 6K text
+ plus 16K runtime memory on x86-64,
+
+config X86_ESPFIX32
+ def_bool y
+ depends on X86_16BIT && X86_32
+
+config X86_ESPFIX64
+ def_bool y
+ depends on X86_16BIT && X86_64
config TOSHIBA
tristate "Toshiba Laptop support"
@@ -1873,6 +1891,10 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y
depends on X86_64 || X86_PAE
+config ARCH_ENABLE_HUGEPAGE_MIGRATION
+ def_bool y
+ depends on X86_64 && HUGETLB_PAGE && MIGRATION
+
menu "Power management and ACPI options"
config ARCH_HIBERNATION_HEADER
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 4703a6c4b8e3..0331d765c2bb 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1087,8 +1087,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
hdr->type_of_loader = 0x21;
/* Convert unicode cmdline to ascii */
- cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image,
- &options_size);
+ cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
if (!cmdline_ptr)
goto fail;
hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 0d558ee899ae..2884e0c3e8a5 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -452,7 +452,7 @@ efi32_config:
.global efi64_config
efi64_config:
.fill 11,8,0
- .quad efi_call6
+ .quad efi_call
.byte 1
#endif /* CONFIG_EFI_STUB */
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 0ca9a5c362bc..84c223479e3c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -375,8 +375,7 @@ xloadflags:
# define XLF0 0
#endif
-#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) && \
- !defined(CONFIG_EFI_MIXED)
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
/* kernel/boot_param/ramdisk could be loaded above 4g */
# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
#else
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 185fad49d86f..5d1e0075ac24 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -92,7 +92,7 @@ __clmul_gf128mul_ble:
ret
ENDPROC(__clmul_gf128mul_ble)
-/* void clmul_ghash_mul(char *dst, const be128 *shash) */
+/* void clmul_ghash_mul(char *dst, const u128 *shash) */
ENTRY(clmul_ghash_mul)
movups (%rdi), DATA
movups (%rsi), SHASH
@@ -106,7 +106,7 @@ ENDPROC(clmul_ghash_mul)
/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- * const be128 *shash);
+ * const u128 *shash);
*/
ENTRY(clmul_ghash_update)
cmp $16, %rdx
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index d785cf2c529c..88bb7ba8b175 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -25,17 +25,17 @@
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
-void clmul_ghash_mul(char *dst, const be128 *shash);
+void clmul_ghash_mul(char *dst, const u128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- const be128 *shash);
+ const u128 *shash);
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
struct ghash_ctx {
- be128 shash;
+ u128 shash;
};
struct ghash_desc_ctx {
@@ -68,11 +68,11 @@ static int ghash_setkey(struct crypto_shash *tfm,
a = be64_to_cpu(x->a);
b = be64_to_cpu(x->b);
- ctx->shash.a = (__be64)((b << 1) | (a >> 63));
- ctx->shash.b = (__be64)((a << 1) | (b >> 63));
+ ctx->shash.a = (b << 1) | (a >> 63);
+ ctx->shash.b = (a << 1) | (b >> 63);
if (a >> 63)
- ctx->shash.b ^= cpu_to_be64(0xc2);
+ ctx->shash.b ^= ((u64)0xc2) << 56;
return 0;
}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 220675795e08..f9e181aaba97 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -383,8 +383,8 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
} else {
/* Return stub is in 32bit vsyscall page */
if (current->mm->context.vdso)
- restorer = VDSO32_SYMBOL(current->mm->context.vdso,
- sigreturn);
+ restorer = current->mm->context.vdso +
+ selected_vdso32->sym___kernel_sigreturn;
else
restorer = &frame->retcode;
}
@@ -462,8 +462,8 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
else
- restorer = VDSO32_SYMBOL(current->mm->context.vdso,
- rt_sigreturn);
+ restorer = current->mm->context.vdso +
+ selected_vdso32->sym___kernel_rt_sigreturn;
put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
/*
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0869434eaf72..1eb5f6433ad8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_EFI_H
#define _ASM_X86_EFI_H
+#include <asm/i387.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
* with preserved alignment on virtual addresses starting from -4G down
@@ -27,91 +28,58 @@
extern unsigned long asmlinkage efi_call_phys(void *, ...);
-#define efi_call_phys0(f) efi_call_phys(f)
-#define efi_call_phys1(f, a1) efi_call_phys(f, a1)
-#define efi_call_phys2(f, a1, a2) efi_call_phys(f, a1, a2)
-#define efi_call_phys3(f, a1, a2, a3) efi_call_phys(f, a1, a2, a3)
-#define efi_call_phys4(f, a1, a2, a3, a4) \
- efi_call_phys(f, a1, a2, a3, a4)
-#define efi_call_phys5(f, a1, a2, a3, a4, a5) \
- efi_call_phys(f, a1, a2, a3, a4, a5)
-#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \
- efi_call_phys(f, a1, a2, a3, a4, a5, a6)
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
*/
+/* Use this macro if your virtual returns a non-void value */
#define efi_call_virt(f, args...) \
- ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args)
-
-#define efi_call_virt0(f) efi_call_virt(f)
-#define efi_call_virt1(f, a1) efi_call_virt(f, a1)
-#define efi_call_virt2(f, a1, a2) efi_call_virt(f, a1, a2)
-#define efi_call_virt3(f, a1, a2, a3) efi_call_virt(f, a1, a2, a3)
-#define efi_call_virt4(f, a1, a2, a3, a4) \
- efi_call_virt(f, a1, a2, a3, a4)
-#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
- efi_call_virt(f, a1, a2, a3, a4, a5)
-#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
- efi_call_virt(f, a1, a2, a3, a4, a5, a6)
+({ \
+ efi_status_t __s; \
+ kernel_fpu_begin(); \
+ __s = ((efi_##f##_t __attribute__((regparm(0)))*) \
+ efi.systab->runtime->f)(args); \
+ kernel_fpu_end(); \
+ __s; \
+})
+
+/* Use this macro if your virtual call does not return any value */
+#define __efi_call_virt(f, args...) \
+({ \
+ kernel_fpu_begin(); \
+ ((efi_##f##_t __attribute__((regparm(0)))*) \
+ efi.systab->runtime->f)(args); \
+ kernel_fpu_end(); \
+})
#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
#else /* !CONFIG_X86_32 */
-extern u64 efi_call0(void *fp);
-extern u64 efi_call1(void *fp, u64 arg1);
-extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
-extern u64 efi_call3(void *fp, u64 arg1, u64 arg2, u64 arg3);
-extern u64 efi_call4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4);
-extern u64 efi_call5(void *fp, u64 arg1, u64 arg2, u64 arg3,
- u64 arg4, u64 arg5);
-extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
- u64 arg4, u64 arg5, u64 arg6);
-
-#define efi_call_phys0(f) \
- efi_call0((f))
-#define efi_call_phys1(f, a1) \
- efi_call1((f), (u64)(a1))
-#define efi_call_phys2(f, a1, a2) \
- efi_call2((f), (u64)(a1), (u64)(a2))
-#define efi_call_phys3(f, a1, a2, a3) \
- efi_call3((f), (u64)(a1), (u64)(a2), (u64)(a3))
-#define efi_call_phys4(f, a1, a2, a3, a4) \
- efi_call4((f), (u64)(a1), (u64)(a2), (u64)(a3), \
- (u64)(a4))
-#define efi_call_phys5(f, a1, a2, a3, a4, a5) \
- efi_call5((f), (u64)(a1), (u64)(a2), (u64)(a3), \
- (u64)(a4), (u64)(a5))
-#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \
- efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
- (u64)(a4), (u64)(a5), (u64)(a6))
-
-#define _efi_call_virtX(x, f, ...) \
+#define EFI_LOADER_SIGNATURE "EL64"
+
+extern u64 asmlinkage efi_call(void *fp, ...);
+
+#define efi_call_phys(f, args...) efi_call((f), args)
+
+#define efi_call_virt(f, ...) \
({ \
efi_status_t __s; \
\
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
- __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \
+ __kernel_fpu_begin(); \
+ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \
+ __kernel_fpu_end(); \
preempt_enable(); \
__s; \
})
-#define efi_call_virt0(f) \
- _efi_call_virtX(0, f)
-#define efi_call_virt1(f, a1) \
- _efi_call_virtX(1, f, (u64)(a1))
-#define efi_call_virt2(f, a1, a2) \
- _efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
-#define efi_call_virt3(f, a1, a2, a3) \
- _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
-#define efi_call_virt4(f, a1, a2, a3, a4) \
- _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
-#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
- _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
-#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
- _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
+/*
+ * All X86_64 virt calls return non-void values. Thus, use non-void call for
+ * virt calls that would be void on X86_32.
+ */
+#define __efi_call_virt(f, args...) efi_call_virt(f, args)
extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 2c71182d30ef..1a055c81d864 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -75,7 +75,12 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
#include <asm/vdso.h>
-extern unsigned int vdso_enabled;
+#ifdef CONFIG_X86_64
+extern unsigned int vdso64_enabled;
+#endif
+#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
+extern unsigned int vdso32_enabled;
+#endif
/*
* This is used to ensure we don't load something for the wrong architecture.
@@ -269,9 +274,9 @@ extern int force_personality32;
struct task_struct;
-#define ARCH_DLINFO_IA32(vdso_enabled) \
+#define ARCH_DLINFO_IA32 \
do { \
- if (vdso_enabled) { \
+ if (vdso32_enabled) { \
NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
} \
@@ -281,7 +286,7 @@ do { \
#define STACK_RND_MASK (0x7ff)
-#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled)
+#define ARCH_DLINFO ARCH_DLINFO_IA32
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
@@ -292,16 +297,17 @@ do { \
#define ARCH_DLINFO \
do { \
- if (vdso_enabled) \
+ if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
- (unsigned long)current->mm->context.vdso); \
+ (unsigned long __force)current->mm->context.vdso); \
} while (0)
+/* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */
#define ARCH_DLINFO_X32 \
do { \
- if (vdso_enabled) \
+ if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
- (unsigned long)current->mm->context.vdso); \
+ (unsigned long __force)current->mm->context.vdso); \
} while (0)
#define AT_SYSINFO 32
@@ -310,7 +316,7 @@ do { \
if (test_thread_flag(TIF_X32)) \
ARCH_DLINFO_X32; \
else \
- ARCH_DLINFO_IA32(sysctl_vsyscall32)
+ ARCH_DLINFO_IA32
#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
@@ -319,18 +325,17 @@ else \
#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
#define VDSO_ENTRY \
- ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
+ ((unsigned long)current->mm->context.vdso + \
+ selected_vdso32->sym___kernel_vsyscall)
struct linux_binprm;
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
-extern int x32_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp);
-
-extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
-#define compat_arch_setup_additional_pages syscall32_setup_pages
+extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp);
+#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
extern unsigned long arch_randomize_brk(struct mm_struct *mm);
#define arch_randomize_brk arch_randomize_brk
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
new file mode 100644
index 000000000000..99efebb2f69d
--- /dev/null
+++ b/arch/x86/include/asm/espfix.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_X86_ESPFIX_H
+#define _ASM_X86_ESPFIX_H
+
+#ifdef CONFIG_X86_64
+
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
+DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
+
+extern void init_espfix_bsp(void);
+extern void init_espfix_ap(void);
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 43f482a0db37..b0910f97a3ea 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -24,7 +24,7 @@
#include <linux/threads.h>
#include <asm/kmap_types.h>
#else
-#include <asm/vsyscall.h>
+#include <uapi/asm/vsyscall.h>
#endif
/*
@@ -41,7 +41,8 @@
extern unsigned long __FIXADDR_TOP;
#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
#else
-#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
+#define FIXADDR_TOP (round_up(VSYSCALL_ADDR + PAGE_SIZE, 1<<PMD_SHIFT) - \
+ PAGE_SIZE)
#endif
@@ -68,11 +69,7 @@ enum fixed_addresses {
#ifdef CONFIG_X86_32
FIX_HOLE,
#else
- VSYSCALL_LAST_PAGE,
- VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
- + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
- VVAR_PAGE,
- VSYSCALL_HPET,
+ VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
#ifdef CONFIG_PARAVIRT_CLOCK
PVCLOCK_FIXMAP_BEGIN,
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index cea1c76d49bf..115e3689cd53 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -87,22 +87,22 @@ static inline int is_x32_frame(void)
static __always_inline __pure bool use_eager_fpu(void)
{
- return static_cpu_has(X86_FEATURE_EAGER_FPU);
+ return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
}
static __always_inline __pure bool use_xsaveopt(void)
{
- return static_cpu_has(X86_FEATURE_XSAVEOPT);
+ return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
- return static_cpu_has(X86_FEATURE_XSAVE);
+ return static_cpu_has_safe(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
- return static_cpu_has(X86_FEATURE_FXSR);
+ return static_cpu_has_safe(X86_FEATURE_FXSR);
}
static inline void fx_finit(struct i387_fxsave_struct *fx)
@@ -293,7 +293,7 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. "m" is a random variable that should be in L1 */
- if (unlikely(static_cpu_has(X86_FEATURE_FXSAVE_LEAK))) {
+ if (unlikely(static_cpu_has_safe(X86_FEATURE_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 459e50a424d1..90f97b4b9347 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -168,8 +168,6 @@ extern int save_ioapic_entries(void);
extern void mask_ioapic_entries(void);
extern int restore_ioapic_entries(void);
-extern int get_nr_irqs_gsi(void);
-
extern void setup_ioapic_ids_from_mpc(void);
extern void setup_ioapic_ids_from_mpc_nocheck(void);
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index d806b228d2c0..b7747c4c2cf2 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -103,4 +103,7 @@ static inline bool setup_remapped_irq(int irq,
}
#endif /* CONFIG_IRQ_REMAP */
+#define dmar_alloc_hwirq() irq_alloc_hwirq(-1)
+#define dmar_free_hwirq irq_free_hwirq
+
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6e4ce2df87cf..958b90f761e5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -176,8 +176,6 @@ int mce_available(struct cpuinfo_x86 *c);
DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
-extern atomic_t mce_entry;
-
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5f55e6962769..876e74e8eec7 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -18,7 +18,7 @@ typedef struct {
#endif
struct mutex lock;
- void *vdso;
+ void __user *vdso;
} mm_context_t;
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 0d193e234647..206a87fdd22d 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -62,66 +62,14 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
return ((value >> rightshift) & mask) << leftshift;
}
-#ifdef CONFIG_MEM_SOFT_DIRTY
-
-/*
- * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and
- * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset
- * into this range.
- */
-#define PTE_FILE_MAX_BITS 28
-#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
-#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
-#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
-#define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1)
-#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
-#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
-#define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
-
-#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1)
-#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1)
-#define PTE_FILE_MASK3 ((1U << PTE_FILE_BITS3) - 1)
-
-#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1)
-#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)
-#define PTE_FILE_LSHIFT4 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)
-
-static __always_inline pgoff_t pte_to_pgoff(pte_t pte)
-{
- return (pgoff_t)
- (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3, PTE_FILE_LSHIFT3) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT4, -1UL, PTE_FILE_LSHIFT4));
-}
-
-static __always_inline pte_t pgoff_to_pte(pgoff_t off)
-{
- return (pte_t){
- .pte_low =
- pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) +
- pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) +
- pte_bitop(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3, PTE_FILE_SHIFT3) +
- pte_bitop(off, PTE_FILE_LSHIFT4, -1UL, PTE_FILE_SHIFT4) +
- _PAGE_FILE,
- };
-}
-
-#else /* CONFIG_MEM_SOFT_DIRTY */
-
/*
* Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
* split up the 29 bits of offset into this range.
*/
#define PTE_FILE_MAX_BITS 29
#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
-#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
-#else
-#define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1)
-#define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1)
-#endif
#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
@@ -150,16 +98,9 @@ static __always_inline pte_t pgoff_to_pte(pgoff_t off)
};
}
-#endif /* CONFIG_MEM_SOFT_DIRTY */
-
/* Encode and de-code a swap entry */
-#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
-#else
-#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
-#endif
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index b459ddf27d64..0ec056012618 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -131,7 +131,8 @@ static inline int pte_exec(pte_t pte)
static inline int pte_special(pte_t pte)
{
- return pte_flags(pte) & _PAGE_SPECIAL;
+ return (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_SPECIAL)) ==
+ (_PAGE_PRESENT|_PAGE_SPECIAL);
}
static inline unsigned long pte_pfn(pte_t pte)
@@ -296,6 +297,7 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_PRESENT);
}
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline int pte_soft_dirty(pte_t pte)
{
return pte_flags(pte) & _PAGE_SOFT_DIRTY;
@@ -331,6 +333,8 @@ static inline int pte_file_soft_dirty(pte_t pte)
return pte_flags(pte) & _PAGE_SOFT_DIRTY;
}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
/*
* Mask out unsupported bits in a present pgprot. Non-present pgprots
* can use those bits for other purposes, so leave them be.
@@ -452,6 +456,12 @@ static inline int pte_present(pte_t a)
_PAGE_NUMA);
}
+#define pte_present_nonuma pte_present_nonuma
+static inline int pte_present_nonuma(pte_t a)
+{
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+}
+
#define pte_accessible pte_accessible
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
{
@@ -858,23 +868,25 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
{
}
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present(pte));
+ VM_BUG_ON(pte_present_nonuma(pte));
return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
static inline int pte_swp_soft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present(pte));
+ VM_BUG_ON(pte_present_nonuma(pte));
return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present(pte));
+ VM_BUG_ON(pte_present_nonuma(pte));
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
+#endif
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e22c1dbf7feb..5be9063545d2 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -143,12 +143,12 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_unmap(pte) ((void)(pte))/* NOP */
/* Encode and de-code a swap entry */
-#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
+#ifdef CONFIG_NUMA_BALANCING
+/* Automatic NUMA balancing needs to be distinguishable from swap entries */
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
#else
-#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
#endif
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index c883bf726398..7166e25ecb57 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t;
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
+#define ESPFIX_PGD_ENTRY _AC(-2, UL)
+#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
#define EARLY_DYNAMIC_PAGE_TABLES 64
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index eb3d44945133..f216963760e5 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -16,15 +16,26 @@
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
-#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
-#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
-#define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */
+#define _PAGE_BIT_SOFTW1 9 /* available for programmer */
+#define _PAGE_BIT_SOFTW2 10 /* " */
+#define _PAGE_BIT_SOFTW3 11 /* " */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
-#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
-#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
-#define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */
+#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
+#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
+#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
+#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
+#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
+#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
+/*
+ * Swap offsets on configurations that allow automatic NUMA balancing use the
+ * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from
+ * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the
+ * maximum possible swap space from 16TB to 8TB.
+ */
+#define _PAGE_BIT_NUMA (_PAGE_BIT_GLOBAL+1)
+
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
@@ -40,7 +51,7 @@
#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
-#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
+#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
@@ -61,8 +72,6 @@
* they do not conflict with each other.
*/
-#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN
-
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
#else
@@ -70,6 +79,21 @@
#endif
/*
+ * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page
+ * that is not present. The hinting fault gathers numa placement statistics
+ * (see pte_numa()). The bit is always zero when the PTE is not present.
+ *
+ * The bit picked must be always zero when the pmd is present and not
+ * present, so that we don't lose information when we set it while
+ * atomically clearing the present bit.
+ */
+#ifdef CONFIG_NUMA_BALANCING
+#define _PAGE_NUMA (_AT(pteval_t, 1) << _PAGE_BIT_NUMA)
+#else
+#define _PAGE_NUMA (_AT(pteval_t, 0))
+#endif
+
+/*
* Tracking soft dirty bit when a page goes to a swap is tricky.
* We need a bit which can be stored in pte _and_ not conflict
* with swap entry format. On x86 bits 6 and 7 are *not* involved
@@ -94,26 +118,6 @@
#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-/*
- * _PAGE_NUMA indicates that this page will trigger a numa hinting
- * minor page fault to gather numa placement statistics (see
- * pte_numa()). The bit picked (8) is within the range between
- * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't
- * require changes to the swp entry format because that bit is always
- * zero when the pte is not present.
- *
- * The bit picked must be always zero when the pmd is present and not
- * present, so that we don't lose information when we set it while
- * atomically clearing the present bit.
- *
- * Because we shared the same bit (8) with _PAGE_PROTNONE this can be
- * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE
- * couldn't reach, like handle_mm_fault() (see access_error in
- * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for
- * handle_mm_fault() to be invoked).
- */
-#define _PAGE_NUMA _PAGE_PROTNONE
-
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_DIRTY)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
@@ -122,8 +126,8 @@
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY)
-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+ _PAGE_SOFT_DIRTY | _PAGE_NUMA)
+#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA)
#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
#define _PAGE_CACHE_WB (0)
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6fd3fd769796..a90f8972dad5 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -12,8 +12,6 @@ void ia32_syscall(void);
void ia32_cstar_target(void);
void ia32_sysenter_target(void);
-void syscall32_cpu_init(void);
-
void x86_configure_nx(void);
void x86_report_nx(void);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 9264f04a4c55..ff4e7b236e21 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -59,6 +59,8 @@ static inline void x86_ce4100_early_setup(void) { }
#ifndef _SETUP
+#include <asm/espfix.h>
+
/*
* This is set up by the setup-routine at boot-time
*/
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 35e67a457182..31eab867e6d3 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -92,12 +92,6 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
? __const_sigismember((set), (sig)) \
: __gen_sigismember((set), (sig)))
-static inline int sigfindinword(unsigned long word)
-{
- asm("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc");
- return word;
-}
-
struct pt_regs;
#else /* __i386__ */
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 977f1761a25d..ab05d73e2bb7 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void)
static inline void dma_mark_clean(void *addr, size_t size) {}
+extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs);
+extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs);
+
#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 3f556c6a0157..2b19caa4081c 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -41,7 +41,6 @@
# define __ARCH_WANT_SYS_OLD_GETRLIMIT
# define __ARCH_WANT_SYS_OLD_UNAME
# define __ARCH_WANT_SYS_PAUSE
-# define __ARCH_WANT_SYS_SGETMASK
# define __ARCH_WANT_SYS_SIGNAL
# define __ARCH_WANT_SYS_SIGPENDING
# define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index d1dc55404ff1..30be253dd283 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -3,63 +3,51 @@
#include <asm/page_types.h>
#include <linux/linkage.h>
+#include <linux/init.h>
-#ifdef __ASSEMBLER__
+#ifndef __ASSEMBLER__
-#define DEFINE_VDSO_IMAGE(symname, filename) \
-__PAGE_ALIGNED_DATA ; \
- .globl symname##_start, symname##_end ; \
- .align PAGE_SIZE ; \
- symname##_start: ; \
- .incbin filename ; \
- symname##_end: ; \
- .align PAGE_SIZE /* extra data here leaks to userspace. */ ; \
- \
-.previous ; \
- \
- .globl symname##_pages ; \
- .bss ; \
- .align 8 ; \
- .type symname##_pages, @object ; \
- symname##_pages: ; \
- .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * (BITS_PER_LONG / 8) ; \
- .size symname##_pages, .-symname##_pages
+#include <linux/mm_types.h>
-#else
+struct vdso_image {
+ void *data;
+ unsigned long size; /* Always a multiple of PAGE_SIZE */
-#define DECLARE_VDSO_IMAGE(symname) \
- extern char symname##_start[], symname##_end[]; \
- extern struct page *symname##_pages[]
+ /* text_mapping.pages is big enough for data/size page pointers */
+ struct vm_special_mapping text_mapping;
-#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+ unsigned long alt, alt_len;
-#include <asm/vdso32.h>
+ unsigned long sym_end_mapping; /* Total size of the mapping */
-DECLARE_VDSO_IMAGE(vdso32_int80);
-#ifdef CONFIG_COMPAT
-DECLARE_VDSO_IMAGE(vdso32_syscall);
+ unsigned long sym_vvar_page;
+ unsigned long sym_hpet_page;
+ unsigned long sym_VDSO32_NOTE_MASK;
+ unsigned long sym___kernel_sigreturn;
+ unsigned long sym___kernel_rt_sigreturn;
+ unsigned long sym___kernel_vsyscall;
+ unsigned long sym_VDSO32_SYSENTER_RETURN;
+};
+
+#ifdef CONFIG_X86_64
+extern const struct vdso_image vdso_image_64;
+#endif
+
+#ifdef CONFIG_X86_X32
+extern const struct vdso_image vdso_image_x32;
#endif
-DECLARE_VDSO_IMAGE(vdso32_sysenter);
-/*
- * Given a pointer to the vDSO image, find the pointer to VDSO32_name
- * as that symbol is defined in the vDSO sources or linker script.
- */
-#define VDSO32_SYMBOL(base, name) \
-({ \
- extern const char VDSO32_##name[]; \
- (void __user *)(VDSO32_##name + (unsigned long)(base)); \
-})
+#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+extern const struct vdso_image vdso_image_32_int80;
+#ifdef CONFIG_COMPAT
+extern const struct vdso_image vdso_image_32_syscall;
#endif
+extern const struct vdso_image vdso_image_32_sysenter;
-/*
- * These symbols are defined with the addresses in the vsyscall page.
- * See vsyscall-sigreturn.S.
- */
-extern void __user __kernel_sigreturn;
-extern void __user __kernel_rt_sigreturn;
+extern const struct vdso_image *selected_vdso32;
+#endif
-void __init patch_vdso32(void *vdso, size_t len);
+extern void __init init_vdso_image(const struct vdso_image *image);
#endif /* __ASSEMBLER__ */
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h
deleted file mode 100644
index 7efb7018406e..000000000000
--- a/arch/x86/include/asm/vdso32.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_X86_VDSO32_H
-#define _ASM_X86_VDSO32_H
-
-#define VDSO_BASE_PAGE 0
-#define VDSO_VVAR_PAGE 1
-#define VDSO_HPET_PAGE 2
-#define VDSO_PAGES 3
-#define VDSO_PREV_PAGES 2
-#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
-
-#endif
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 081d909bc495..5d2b9ad2c6d2 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -29,31 +29,13 @@
#else
-#ifdef BUILD_VDSO32
+extern char __vvar_page;
#define DECLARE_VVAR(offset, type, name) \
extern type vvar_ ## name __attribute__((visibility("hidden")));
#define VVAR(name) (vvar_ ## name)
-#else
-
-extern char __vvar_page;
-
-/* Base address of vvars. This is not ABI. */
-#ifdef CONFIG_X86_64
-#define VVAR_ADDRESS (-10*1024*1024 - 4096)
-#else
-#define VVAR_ADDRESS (&__vvar_page)
-#endif
-
-#define DECLARE_VVAR(offset, type, name) \
- static type const * const vvaraddr_ ## name = \
- (void *)(VVAR_ADDRESS + (offset));
-
-#define VVAR(name) (*vvaraddr_ ## name)
-#endif
-
#define DEFINE_VVAR(type, name) \
type name \
__attribute__((section(".vvar_" #name), aligned(16))) __visible
diff --git a/arch/x86/include/uapi/asm/vsyscall.h b/arch/x86/include/uapi/asm/vsyscall.h
index 85dc1b3825ab..b97dd6e263d2 100644
--- a/arch/x86/include/uapi/asm/vsyscall.h
+++ b/arch/x86/include/uapi/asm/vsyscall.h
@@ -7,11 +7,6 @@ enum vsyscall_num {
__NR_vgetcpu,
};
-#define VSYSCALL_START (-10UL << 20)
-#define VSYSCALL_SIZE 1024
-#define VSYSCALL_END (-2UL << 20)
-#define VSYSCALL_MAPPED_PAGES 1
-#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
-
+#define VSYSCALL_ADDR (-10UL << 20)
#endif /* _UAPI_ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f4d96000d33a..047f9ff2e36c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,9 +26,11 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
+obj-$(CONFIG_X86_64) += mcount_64.o
obj-y += syscall_$(BITS).o vsyscall_gtod.o
obj-$(CONFIG_X86_64) += vsyscall_64.o
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
+obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b574b295a2f9..8e3842fc8bea 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, struct dma_attrs *attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
- free_pages((unsigned long)vaddr, get_order(size));
+ dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 992060e09897..9d0a9795a0f8 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -206,9 +206,6 @@ int __init arch_early_irq_init(void)
count = ARRAY_SIZE(irq_cfgx);
node = cpu_to_node(0);
- /* Make sure the legacy interrupts are marked in the bitmap */
- irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
-
for (i = 0; i < count; i++) {
irq_set_chip_data(i, &cfg[i]);
zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
@@ -281,18 +278,6 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
return cfg;
}
-static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
-{
- return irq_alloc_descs_from(from, count, node);
-}
-
-static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
-{
- free_irq_cfg(at, cfg);
- irq_free_desc(at);
-}
-
-
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -2916,98 +2901,39 @@ static int __init ioapic_init_ops(void)
device_initcall(ioapic_init_ops);
/*
- * Dynamic irq allocate and deallocation
+ * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
*/
-unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
+int arch_setup_hwirq(unsigned int irq, int node)
{
- struct irq_cfg **cfg;
+ struct irq_cfg *cfg;
unsigned long flags;
- int irq, i;
-
- if (from < nr_irqs_gsi)
- from = nr_irqs_gsi;
+ int ret;
- cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
+ cfg = alloc_irq_cfg(irq, node);
if (!cfg)
- return 0;
-
- irq = alloc_irqs_from(from, count, node);
- if (irq < 0)
- goto out_cfgs;
-
- for (i = 0; i < count; i++) {
- cfg[i] = alloc_irq_cfg(irq + i, node);
- if (!cfg[i])
- goto out_irqs;
- }
+ return -ENOMEM;
raw_spin_lock_irqsave(&vector_lock, flags);
- for (i = 0; i < count; i++)
- if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
- goto out_vecs;
- raw_spin_unlock_irqrestore(&vector_lock, flags);
-
- for (i = 0; i < count; i++) {
- irq_set_chip_data(irq + i, cfg[i]);
- irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
- }
-
- kfree(cfg);
- return irq;
-
-out_vecs:
- for (i--; i >= 0; i--)
- __clear_irq_vector(irq + i, cfg[i]);
+ ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
raw_spin_unlock_irqrestore(&vector_lock, flags);
-out_irqs:
- for (i = 0; i < count; i++)
- free_irq_at(irq + i, cfg[i]);
-out_cfgs:
- kfree(cfg);
- return 0;
-}
-unsigned int create_irq_nr(unsigned int from, int node)
-{
- return __create_irqs(from, 1, node);
-}
-
-int create_irq(void)
-{
- int node = cpu_to_node(0);
- unsigned int irq_want;
- int irq;
-
- irq_want = nr_irqs_gsi;
- irq = create_irq_nr(irq_want, node);
-
- if (irq == 0)
- irq = -1;
-
- return irq;
+ if (!ret)
+ irq_set_chip_data(irq, cfg);
+ else
+ free_irq_cfg(irq, cfg);
+ return ret;
}
-void destroy_irq(unsigned int irq)
+void arch_teardown_hwirq(unsigned int irq)
{
struct irq_cfg *cfg = irq_get_chip_data(irq);
unsigned long flags;
- irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
-
free_remapped_irq(irq);
-
raw_spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
raw_spin_unlock_irqrestore(&vector_lock, flags);
- free_irq_at(irq, cfg);
-}
-
-void destroy_irqs(unsigned int irq, unsigned int count)
-{
- unsigned int i;
-
- for (i = 0; i < count; i++)
- destroy_irq(irq + i);
+ free_irq_cfg(irq, cfg);
}
/*
@@ -3136,8 +3062,8 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- unsigned int irq, irq_want;
struct msi_desc *msidesc;
+ unsigned int irq;
int node, ret;
/* Multiple MSI vectors only supported with interrupt remapping */
@@ -3145,28 +3071,25 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return 1;
node = dev_to_node(&dev->dev);
- irq_want = nr_irqs_gsi;
+
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = create_irq_nr(irq_want, node);
- if (irq == 0)
+ irq = irq_alloc_hwirq(node);
+ if (!irq)
return -ENOSPC;
- irq_want = irq + 1;
-
ret = setup_msi_irq(dev, msidesc, irq, 0);
- if (ret < 0)
- goto error;
+ if (ret < 0) {
+ irq_free_hwirq(irq);
+ return ret;
+ }
+
}
return 0;
-
-error:
- destroy_irq(irq);
- return ret;
}
void native_teardown_msi_irq(unsigned int irq)
{
- destroy_irq(irq);
+ irq_free_hwirq(irq);
}
#ifdef CONFIG_DMAR_TABLE
@@ -3420,11 +3343,6 @@ static void __init probe_nr_irqs_gsi(void)
printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
}
-int get_nr_irqs_gsi(void)
-{
- return nr_irqs_gsi;
-}
-
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
return from < nr_irqs_gsi ? nr_irqs_gsi : from;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a135239badb7..2cbbf88d8f2c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -20,6 +20,7 @@
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <asm/sections.h>
+#include <asm/vsyscall.h>
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <asm/pgtable.h>
@@ -953,6 +954,38 @@ static void vgetcpu_set_mode(void)
else
vgetcpu_mode = VGETCPU_LSL;
}
+
+/* May not be __init: called during resume */
+static void syscall32_cpu_init(void)
+{
+ /* Load these always in case some future AMD CPU supports
+ SYSENTER from compat mode too. */
+ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+
+ wrmsrl(MSR_CSTAR, ia32_cstar_target);
+}
+#endif
+
+#ifdef CONFIG_X86_32
+void enable_sep_cpu(void)
+{
+ int cpu = get_cpu();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+
+ if (!boot_cpu_has(X86_FEATURE_SEP)) {
+ put_cpu();
+ return;
+ }
+
+ tss->x86_tss.ss1 = __KERNEL_CS;
+ tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+ wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
+ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
+ put_cpu();
+}
#endif
void __init identify_boot_cpu(void)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6cc800381d14..bb92f38153b2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -60,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define SPINUNIT 100 /* 100ns */
-atomic_t mce_entry;
-
DEFINE_PER_CPU(unsigned, mce_exception_count);
struct mce_bank *mce_banks __read_mostly;
@@ -1040,8 +1038,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
- atomic_inc(&mce_entry);
-
this_cpu_inc(mce_exception_count);
if (!cfg->banks)
@@ -1171,7 +1167,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_report_event(regs);
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
out:
- atomic_dec(&mce_entry);
sync_core();
}
EXPORT_SYMBOL_GPL(do_machine_check);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index d35078ea1446..7db54b5d5f86 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -206,23 +206,21 @@ static void __init dtb_apic_setup(void)
static void __init x86_flattree_get_config(void)
{
u32 size, map_len;
- struct boot_param_header *dt;
+ void *dt;
if (!initial_dtb)
return;
- map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK),
- (u64)sizeof(struct boot_param_header));
+ map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
- dt = early_memremap(initial_dtb, map_len);
- size = be32_to_cpu(dt->totalsize);
+ initial_boot_params = dt = early_memremap(initial_dtb, map_len);
+ size = of_get_flat_dt_size();
if (map_len < size) {
early_iounmap(dt, map_len);
- dt = early_memremap(initial_dtb, size);
+ initial_boot_params = dt = early_memremap(initial_dtb, size);
map_len = size;
}
- initial_boot_params = dt;
unflatten_and_copy_device_tree();
early_iounmap(dt, map_len);
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6cda0baeac9d..2e1a6853e00c 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -419,7 +419,7 @@ static size_t __init gen6_stolen_size(int num, int slot, int func)
return gmch_ctrl << 25; /* 32 MB units */
}
-static size_t gen8_stolen_size(int num, int slot, int func)
+static size_t __init gen8_stolen_size(int num, int slot, int func)
{
u16 gmch_ctrl;
@@ -429,48 +429,73 @@ static size_t gen8_stolen_size(int num, int slot, int func)
return gmch_ctrl << 25; /* 32 MB units */
}
+static size_t __init chv_stolen_size(int num, int slot, int func)
+{
+ u16 gmch_ctrl;
+
+ gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
+ gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
+ gmch_ctrl &= SNB_GMCH_GMS_MASK;
+
+ /*
+ * 0x0 to 0x10: 32MB increments starting at 0MB
+ * 0x11 to 0x16: 4MB increments starting at 8MB
+ * 0x17 to 0x1d: 4MB increments start at 36MB
+ */
+ if (gmch_ctrl < 0x11)
+ return gmch_ctrl << 25;
+ else if (gmch_ctrl < 0x17)
+ return (gmch_ctrl - 0x11 + 2) << 22;
+ else
+ return (gmch_ctrl - 0x17 + 9) << 22;
+}
struct intel_stolen_funcs {
size_t (*size)(int num, int slot, int func);
u32 (*base)(int num, int slot, int func, size_t size);
};
-static const struct intel_stolen_funcs i830_stolen_funcs = {
+static const struct intel_stolen_funcs i830_stolen_funcs __initconst = {
.base = i830_stolen_base,
.size = i830_stolen_size,
};
-static const struct intel_stolen_funcs i845_stolen_funcs = {
+static const struct intel_stolen_funcs i845_stolen_funcs __initconst = {
.base = i845_stolen_base,
.size = i830_stolen_size,
};
-static const struct intel_stolen_funcs i85x_stolen_funcs = {
+static const struct intel_stolen_funcs i85x_stolen_funcs __initconst = {
.base = i85x_stolen_base,
.size = gen3_stolen_size,
};
-static const struct intel_stolen_funcs i865_stolen_funcs = {
+static const struct intel_stolen_funcs i865_stolen_funcs __initconst = {
.base = i865_stolen_base,
.size = gen3_stolen_size,
};
-static const struct intel_stolen_funcs gen3_stolen_funcs = {
+static const struct intel_stolen_funcs gen3_stolen_funcs __initconst = {
.base = intel_stolen_base,
.size = gen3_stolen_size,
};
-static const struct intel_stolen_funcs gen6_stolen_funcs = {
+static const struct intel_stolen_funcs gen6_stolen_funcs __initconst = {
.base = intel_stolen_base,
.size = gen6_stolen_size,
};
-static const struct intel_stolen_funcs gen8_stolen_funcs = {
+static const struct intel_stolen_funcs gen8_stolen_funcs __initconst = {
.base = intel_stolen_base,
.size = gen8_stolen_size,
};
-static struct pci_device_id intel_stolen_ids[] __initdata = {
+static const struct intel_stolen_funcs chv_stolen_funcs __initconst = {
+ .base = intel_stolen_base,
+ .size = chv_stolen_size,
+};
+
+static const struct pci_device_id intel_stolen_ids[] __initconst = {
INTEL_I830_IDS(&i830_stolen_funcs),
INTEL_I845G_IDS(&i845_stolen_funcs),
INTEL_I85X_IDS(&i85x_stolen_funcs),
@@ -496,7 +521,8 @@ static struct pci_device_id intel_stolen_ids[] __initdata = {
INTEL_HSW_D_IDS(&gen6_stolen_funcs),
INTEL_HSW_M_IDS(&gen6_stolen_funcs),
INTEL_BDW_M_IDS(&gen8_stolen_funcs),
- INTEL_BDW_D_IDS(&gen8_stolen_funcs)
+ INTEL_BDW_D_IDS(&gen8_stolen_funcs),
+ INTEL_CHV_IDS(&chv_stolen_funcs),
};
static void __init intel_graphics_stolen(int num, int slot, int func)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a2a4f4697889..98313ffaae6a 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -527,6 +527,7 @@ syscall_exit:
restore_all:
TRACE_IRQS_IRET
restore_all_notrace:
+#ifdef CONFIG_X86_ESPFIX32
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
# are returning to the kernel.
@@ -537,6 +538,7 @@ restore_all_notrace:
cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
CFI_REMEMBER_STATE
je ldt_ss # returning to user-space with LDT SS
+#endif
restore_nocheck:
RESTORE_REGS 4 # skip orig_eax/error_code
irq_return:
@@ -549,13 +551,9 @@ ENTRY(iret_exc)
.previous
_ASM_EXTABLE(irq_return,iret_exc)
+#ifdef CONFIG_X86_ESPFIX32
CFI_RESTORE_STATE
ldt_ss:
- larl PT_OLDSS(%esp), %eax
- jnz restore_nocheck
- testl $0x00400000, %eax # returning to 32bit stack?
- jnz restore_nocheck # allright, normal return
-
#ifdef CONFIG_PARAVIRT
/*
* The kernel can't run on a non-flat stack if paravirt mode
@@ -597,6 +595,7 @@ ldt_ss:
lss (%esp), %esp /* switch to espfix segment */
CFI_ADJUST_CFA_OFFSET -8
jmp restore_nocheck
+#endif
CFI_ENDPROC
ENDPROC(system_call)
@@ -704,6 +703,7 @@ END(syscall_badsys)
* the high word of the segment base from the GDT and swiches to the
* normal stack and adjusts ESP with the matching offset.
*/
+#ifdef CONFIG_X86_ESPFIX32
/* fixup the stack */
mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
@@ -713,8 +713,10 @@ END(syscall_badsys)
pushl_cfi %eax
lss (%esp), %esp /* switch to the normal stack segment */
CFI_ADJUST_CFA_OFFSET -8
+#endif
.endm
.macro UNWIND_ESPFIX_STACK
+#ifdef CONFIG_X86_ESPFIX32
movl %ss, %eax
/* see if on espfix stack */
cmpw $__ESPFIX_SS, %ax
@@ -725,6 +727,7 @@ END(syscall_badsys)
/* switch to normal stack */
FIXUP_ESPFIX_STACK
27:
+#endif
.endm
/*
@@ -1355,11 +1358,13 @@ END(debug)
ENTRY(nmi)
RING0_INT_FRAME
ASM_CLAC
+#ifdef CONFIG_X86_ESPFIX32
pushl_cfi %eax
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
popl_cfi %eax
je nmi_espfix_stack
+#endif
cmpl $ia32_sysenter_target,(%esp)
je nmi_stack_fixup
pushl_cfi %eax
@@ -1399,6 +1404,7 @@ nmi_debug_stack_check:
FIX_STACK 24, nmi_stack_correct, 1
jmp nmi_stack_correct
+#ifdef CONFIG_X86_ESPFIX32
nmi_espfix_stack:
/* We have a RING0_INT_FRAME here.
*
@@ -1420,6 +1426,7 @@ nmi_espfix_stack:
lss 12+4(%esp), %esp # back to espfix stack
CFI_ADJUST_CFA_OFFSET -24
jmp irq_return
+#endif
CFI_ENDPROC
END(nmi)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index be846d2468f7..48a2644a082a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -53,11 +53,11 @@
#include <asm/page_types.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
-#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/asm.h>
#include <asm/context_tracking.h>
#include <asm/smap.h>
+#include <asm/pgtable_types.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -69,209 +69,6 @@
.code64
.section .entry.text, "ax"
-#ifdef CONFIG_FUNCTION_TRACER
-
-#ifdef CC_USING_FENTRY
-# define function_hook __fentry__
-#else
-# define function_hook mcount
-#endif
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-ENTRY(function_hook)
- retq
-END(function_hook)
-
-/* skip is set if stack has been adjusted */
-.macro ftrace_caller_setup skip=0
- MCOUNT_SAVE_FRAME \skip
-
- /* Load the ftrace_ops into the 3rd parameter */
- movq function_trace_op(%rip), %rdx
-
- /* Load ip into the first parameter */
- movq RIP(%rsp), %rdi
- subq $MCOUNT_INSN_SIZE, %rdi
- /* Load the parent_ip into the second parameter */
-#ifdef CC_USING_FENTRY
- movq SS+16(%rsp), %rsi
-#else
- movq 8(%rbp), %rsi
-#endif
-.endm
-
-ENTRY(ftrace_caller)
- /* Check if tracing was disabled (quick check) */
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
- ftrace_caller_setup
- /* regs go into 4th parameter (but make it NULL) */
- movq $0, %rcx
-
-GLOBAL(ftrace_call)
- call ftrace_stub
-
- MCOUNT_RESTORE_FRAME
-ftrace_return:
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call)
- jmp ftrace_stub
-#endif
-
-GLOBAL(ftrace_stub)
- retq
-END(ftrace_caller)
-
-ENTRY(ftrace_regs_caller)
- /* Save the current flags before compare (in SS location)*/
- pushfq
-
- /* Check if tracing was disabled (quick check) */
- cmpl $0, function_trace_stop
- jne ftrace_restore_flags
-
- /* skip=8 to skip flags saved in SS */
- ftrace_caller_setup 8
-
- /* Save the rest of pt_regs */
- movq %r15, R15(%rsp)
- movq %r14, R14(%rsp)
- movq %r13, R13(%rsp)
- movq %r12, R12(%rsp)
- movq %r11, R11(%rsp)
- movq %r10, R10(%rsp)
- movq %rbp, RBP(%rsp)
- movq %rbx, RBX(%rsp)
- /* Copy saved flags */
- movq SS(%rsp), %rcx
- movq %rcx, EFLAGS(%rsp)
- /* Kernel segments */
- movq $__KERNEL_DS, %rcx
- movq %rcx, SS(%rsp)
- movq $__KERNEL_CS, %rcx
- movq %rcx, CS(%rsp)
- /* Stack - skipping return address */
- leaq SS+16(%rsp), %rcx
- movq %rcx, RSP(%rsp)
-
- /* regs go into 4th parameter */
- leaq (%rsp), %rcx
-
-GLOBAL(ftrace_regs_call)
- call ftrace_stub
-
- /* Copy flags back to SS, to restore them */
- movq EFLAGS(%rsp), %rax
- movq %rax, SS(%rsp)
-
- /* Handlers can change the RIP */
- movq RIP(%rsp), %rax
- movq %rax, SS+8(%rsp)
-
- /* restore the rest of pt_regs */
- movq R15(%rsp), %r15
- movq R14(%rsp), %r14
- movq R13(%rsp), %r13
- movq R12(%rsp), %r12
- movq R10(%rsp), %r10
- movq RBP(%rsp), %rbp
- movq RBX(%rsp), %rbx
-
- /* skip=8 to skip flags saved in SS */
- MCOUNT_RESTORE_FRAME 8
-
- /* Restore flags */
- popfq
-
- jmp ftrace_return
-ftrace_restore_flags:
- popfq
- jmp ftrace_stub
-
-END(ftrace_regs_caller)
-
-
-#else /* ! CONFIG_DYNAMIC_FTRACE */
-
-ENTRY(function_hook)
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
- cmpq $ftrace_stub, ftrace_trace_function
- jnz trace
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- cmpq $ftrace_stub, ftrace_graph_return
- jnz ftrace_graph_caller
-
- cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
- jnz ftrace_graph_caller
-#endif
-
-GLOBAL(ftrace_stub)
- retq
-
-trace:
- MCOUNT_SAVE_FRAME
-
- movq RIP(%rsp), %rdi
-#ifdef CC_USING_FENTRY
- movq SS+16(%rsp), %rsi
-#else
- movq 8(%rbp), %rsi
-#endif
- subq $MCOUNT_INSN_SIZE, %rdi
-
- call *ftrace_trace_function
-
- MCOUNT_RESTORE_FRAME
-
- jmp ftrace_stub
-END(function_hook)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_TRACER */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
- MCOUNT_SAVE_FRAME
-
-#ifdef CC_USING_FENTRY
- leaq SS+16(%rsp), %rdi
- movq $0, %rdx /* No framepointers needed */
-#else
- leaq 8(%rbp), %rdi
- movq (%rbp), %rdx
-#endif
- movq RIP(%rsp), %rsi
- subq $MCOUNT_INSN_SIZE, %rsi
-
- call prepare_ftrace_return
-
- MCOUNT_RESTORE_FRAME
-
- retq
-END(ftrace_graph_caller)
-
-GLOBAL(return_to_handler)
- subq $24, %rsp
-
- /* Save the return values */
- movq %rax, (%rsp)
- movq %rdx, 8(%rsp)
- movq %rbp, %rdi
-
- call ftrace_return_to_handler
-
- movq %rax, %rdi
- movq 8(%rsp), %rdx
- movq (%rsp), %rax
- addq $24, %rsp
- jmp *%rdi
-#endif
-
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
@@ -1040,8 +837,18 @@ restore_args:
RESTORE_ARGS 1,8,1
irq_return:
+ /*
+ * Are we returning to a stack segment from the LDT? Note: in
+ * 64-bit mode SS:RSP on the exception stack is always valid.
+ */
+#ifdef CONFIG_X86_ESPFIX64
+ testb $4,(SS-RIP)(%rsp)
+ jnz irq_return_ldt
+#endif
+
+irq_return_iret:
INTERRUPT_RETURN
- _ASM_EXTABLE(irq_return, bad_iret)
+ _ASM_EXTABLE(irq_return_iret, bad_iret)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
@@ -1049,6 +856,32 @@ ENTRY(native_iret)
_ASM_EXTABLE(native_iret, bad_iret)
#endif
+#ifdef CONFIG_X86_ESPFIX64
+irq_return_ldt:
+ pushq_cfi %rax
+ pushq_cfi %rdi
+ SWAPGS
+ movq PER_CPU_VAR(espfix_waddr),%rdi
+ movq %rax,(0*8)(%rdi) /* RAX */
+ movq (2*8)(%rsp),%rax /* RIP */
+ movq %rax,(1*8)(%rdi)
+ movq (3*8)(%rsp),%rax /* CS */
+ movq %rax,(2*8)(%rdi)
+ movq (4*8)(%rsp),%rax /* RFLAGS */
+ movq %rax,(3*8)(%rdi)
+ movq (6*8)(%rsp),%rax /* SS */
+ movq %rax,(5*8)(%rdi)
+ movq (5*8)(%rsp),%rax /* RSP */
+ movq %rax,(4*8)(%rdi)
+ andl $0xffff0000,%eax
+ popq_cfi %rdi
+ orq PER_CPU_VAR(espfix_stack),%rax
+ SWAPGS
+ movq %rax,%rsp
+ popq_cfi %rax
+ jmp irq_return_iret
+#endif
+
.section .fixup,"ax"
bad_iret:
/*
@@ -1110,9 +943,45 @@ ENTRY(retint_kernel)
call preempt_schedule_irq
jmp exit_intr
#endif
-
CFI_ENDPROC
END(common_interrupt)
+
+ /*
+ * If IRET takes a fault on the espfix stack, then we
+ * end up promoting it to a doublefault. In that case,
+ * modify the stack to make it look like we just entered
+ * the #GP handler from user space, similar to bad_iret.
+ */
+#ifdef CONFIG_X86_ESPFIX64
+ ALIGN
+__do_double_fault:
+ XCPT_FRAME 1 RDI+8
+ movq RSP(%rdi),%rax /* Trap on the espfix stack? */
+ sarq $PGDIR_SHIFT,%rax
+ cmpl $ESPFIX_PGD_ENTRY,%eax
+ jne do_double_fault /* No, just deliver the fault */
+ cmpl $__KERNEL_CS,CS(%rdi)
+ jne do_double_fault
+ movq RIP(%rdi),%rax
+ cmpq $irq_return_iret,%rax
+#ifdef CONFIG_PARAVIRT
+ je 1f
+ cmpq $native_iret,%rax
+#endif
+ jne do_double_fault /* This shouldn't happen... */
+1:
+ movq PER_CPU_VAR(kernel_stack),%rax
+ subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
+ movq %rax,RSP(%rdi)
+ movq $0,(%rax) /* Missing (lost) #GP error code */
+ movq $general_protection,RIP(%rdi)
+ retq
+ CFI_ENDPROC
+END(__do_double_fault)
+#else
+# define __do_double_fault do_double_fault
+#endif
+
/*
* End of kprobes section
*/
@@ -1289,7 +1158,7 @@ idtentry overflow do_overflow has_error_code=0
idtentry bounds do_bounds has_error_code=0
idtentry invalid_op do_invalid_op has_error_code=0
idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault do_double_fault has_error_code=1 paranoid=1
+idtentry double_fault __do_double_fault has_error_code=1 paranoid=1
idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
idtentry invalid_TSS do_invalid_TSS has_error_code=1
idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1576,7 +1445,7 @@ error_sti:
*/
error_kernelspace:
incl %ebx
- leaq irq_return(%rip),%rcx
+ leaq irq_return_iret(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
movl %ecx,%eax /* zero extend */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
new file mode 100644
index 000000000000..6afbb16e9b79
--- /dev/null
+++ b/arch/x86/kernel/espfix_64.c
@@ -0,0 +1,209 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2014 Intel Corporation; author: H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * The IRET instruction, when returning to a 16-bit segment, only
+ * restores the bottom 16 bits of the user space stack pointer. This
+ * causes some 16-bit software to break, but it also leaks kernel state
+ * to user space.
+ *
+ * This works around this by creating percpu "ministacks", each of which
+ * is mapped 2^16 times 64K apart. When we detect that the return SS is
+ * on the LDT, we copy the IRET frame to the ministack and use the
+ * relevant alias to return to userspace. The ministacks are mapped
+ * readonly, so if the IRET fault we promote #GP to #DF which is an IST
+ * vector and thus has its own stack; we then do the fixup in the #DF
+ * handler.
+ *
+ * This file sets up the ministacks and the related page tables. The
+ * actual ministack invocation is in entry_64.S.
+ */
+
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <linux/random.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/setup.h>
+#include <asm/espfix.h>
+
+/*
+ * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
+ * it up to a cache line to avoid unnecessary sharing.
+ */
+#define ESPFIX_STACK_SIZE (8*8UL)
+#define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE)
+
+/* There is address space for how many espfix pages? */
+#define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16))
+
+#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE)
+#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS
+# error "Need more than one PGD for the ESPFIX hack"
+#endif
+
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+
+/* This contains the *bottom* address of the espfix stack */
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
+
+/* Initialization mutex - should this be a spinlock? */
+static DEFINE_MUTEX(espfix_init_mutex);
+
+/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */
+#define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE)
+static void *espfix_pages[ESPFIX_MAX_PAGES];
+
+static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD]
+ __aligned(PAGE_SIZE);
+
+static unsigned int page_random, slot_random;
+
+/*
+ * This returns the bottom address of the espfix stack for a specific CPU.
+ * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case
+ * we have to account for some amount of padding at the end of each page.
+ */
+static inline unsigned long espfix_base_addr(unsigned int cpu)
+{
+ unsigned long page, slot;
+ unsigned long addr;
+
+ page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random;
+ slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE;
+ addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE);
+ addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16);
+ addr += ESPFIX_BASE_ADDR;
+ return addr;
+}
+
+#define PTE_STRIDE (65536/PAGE_SIZE)
+#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE)
+#define ESPFIX_PMD_CLONES PTRS_PER_PMD
+#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES))
+
+#define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX)
+
+static void init_espfix_random(void)
+{
+ unsigned long rand;
+
+ /*
+ * This is run before the entropy pools are initialized,
+ * but this is hopefully better than nothing.
+ */
+ if (!arch_get_random_long(&rand)) {
+ /* The constant is an arbitrary large prime */
+ rdtscll(rand);
+ rand *= 0xc345c6b72fd16123UL;
+ }
+
+ slot_random = rand % ESPFIX_STACKS_PER_PAGE;
+ page_random = (rand / ESPFIX_STACKS_PER_PAGE)
+ & (ESPFIX_PAGE_SPACE - 1);
+}
+
+void __init init_espfix_bsp(void)
+{
+ pgd_t *pgd_p;
+ pteval_t ptemask;
+
+ ptemask = __supported_pte_mask;
+
+ /* Install the espfix pud into the kernel page directory */
+ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
+
+ /* Randomize the locations */
+ init_espfix_random();
+
+ /* The rest is the same as for any other processor */
+ init_espfix_ap();
+}
+
+void init_espfix_ap(void)
+{
+ unsigned int cpu, page;
+ unsigned long addr;
+ pud_t pud, *pud_p;
+ pmd_t pmd, *pmd_p;
+ pte_t pte, *pte_p;
+ int n;
+ void *stack_page;
+ pteval_t ptemask;
+
+ /* We only have to do this once... */
+ if (likely(this_cpu_read(espfix_stack)))
+ return; /* Already initialized */
+
+ cpu = smp_processor_id();
+ addr = espfix_base_addr(cpu);
+ page = cpu/ESPFIX_STACKS_PER_PAGE;
+
+ /* Did another CPU already set this up? */
+ stack_page = ACCESS_ONCE(espfix_pages[page]);
+ if (likely(stack_page))
+ goto done;
+
+ mutex_lock(&espfix_init_mutex);
+
+ /* Did we race on the lock? */
+ stack_page = ACCESS_ONCE(espfix_pages[page]);
+ if (stack_page)
+ goto unlock_done;
+
+ ptemask = __supported_pte_mask;
+
+ pud_p = &espfix_pud_page[pud_index(addr)];
+ pud = *pud_p;
+ if (!pud_present(pud)) {
+ pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+ pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
+ paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
+ for (n = 0; n < ESPFIX_PUD_CLONES; n++)
+ set_pud(&pud_p[n], pud);
+ }
+
+ pmd_p = pmd_offset(&pud, addr);
+ pmd = *pmd_p;
+ if (!pmd_present(pmd)) {
+ pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+ pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
+ paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
+ for (n = 0; n < ESPFIX_PMD_CLONES; n++)
+ set_pmd(&pmd_p[n], pmd);
+ }
+
+ pte_p = pte_offset_kernel(&pmd, addr);
+ stack_page = (void *)__get_free_page(GFP_KERNEL);
+ pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
+ paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT);
+ for (n = 0; n < ESPFIX_PTE_CLONES; n++)
+ set_pte(&pte_p[n*PTE_STRIDE], pte);
+
+ /* Job is done for this CPU and any CPU which shares this page */
+ ACCESS_ONCE(espfix_pages[page]) = stack_page;
+
+unlock_done:
+ mutex_unlock(&espfix_init_mutex);
+done:
+ this_cpu_write(espfix_stack, addr);
+ this_cpu_write(espfix_waddr, (unsigned long)stack_page
+ + (addr & ~PAGE_MASK));
+}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 52819e816f87..cbc4a91b131e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -297,16 +297,7 @@ int ftrace_int3_handler(struct pt_regs *regs)
static int ftrace_write(unsigned long ip, const char *val, int size)
{
- /*
- * On x86_64, kernel text mappings are mapped read-only with
- * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
- * of the kernel text mapping to modify the kernel text.
- *
- * For 32bit kernels, these mappings are same and we can use
- * kernel identity mapping to modify code.
- */
- if (within(ip, (unsigned long)_text, (unsigned long)_etext))
- ip = (unsigned long)__va(__pa_symbol(ip));
+ ip = text_ip_addr(ip);
if (probe_kernel_write((void *)ip, val, size))
return -EPERM;
@@ -349,40 +340,14 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
return add_break(rec->ip, old);
}
-/*
- * If the record has the FTRACE_FL_REGS set, that means that it
- * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
- * is not not set, then it wants to convert to the normal callback.
- */
-static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
-{
- if (rec->flags & FTRACE_FL_REGS)
- return (unsigned long)FTRACE_REGS_ADDR;
- else
- return (unsigned long)FTRACE_ADDR;
-}
-
-/*
- * The FTRACE_FL_REGS_EN is set when the record already points to
- * a function that saves all the regs. Basically the '_EN' version
- * represents the current state of the function.
- */
-static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
-{
- if (rec->flags & FTRACE_FL_REGS_EN)
- return (unsigned long)FTRACE_REGS_ADDR;
- else
- return (unsigned long)FTRACE_ADDR;
-}
-
static int add_breakpoints(struct dyn_ftrace *rec, int enable)
{
unsigned long ftrace_addr;
int ret;
- ret = ftrace_test_record(rec, enable);
+ ftrace_addr = ftrace_get_addr_curr(rec);
- ftrace_addr = get_ftrace_addr(rec);
+ ret = ftrace_test_record(rec, enable);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
@@ -392,10 +357,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
/* converting nop to call */
return add_brk_on_nop(rec);
- case FTRACE_UPDATE_MODIFY_CALL_REGS:
case FTRACE_UPDATE_MODIFY_CALL:
- ftrace_addr = get_ftrace_old_addr(rec);
- /* fall through */
case FTRACE_UPDATE_MAKE_NOP:
/* converting a call to a nop */
return add_brk_on_call(rec, ftrace_addr);
@@ -440,14 +402,14 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
* If not, don't touch the breakpoint, we make just create
* a disaster.
*/
- ftrace_addr = get_ftrace_addr(rec);
+ ftrace_addr = ftrace_get_addr_new(rec);
nop = ftrace_call_replace(ip, ftrace_addr);
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
goto update;
/* Check both ftrace_addr and ftrace_old_addr */
- ftrace_addr = get_ftrace_old_addr(rec);
+ ftrace_addr = ftrace_get_addr_curr(rec);
nop = ftrace_call_replace(ip, ftrace_addr);
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
@@ -491,13 +453,12 @@ static int add_update(struct dyn_ftrace *rec, int enable)
ret = ftrace_test_record(rec, enable);
- ftrace_addr = get_ftrace_addr(rec);
+ ftrace_addr = ftrace_get_addr_new(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
- case FTRACE_UPDATE_MODIFY_CALL_REGS:
case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
@@ -538,13 +499,12 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
ret = ftrace_update_record(rec, enable);
- ftrace_addr = get_ftrace_addr(rec);
+ ftrace_addr = ftrace_get_addr_new(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
- case FTRACE_UPDATE_MODIFY_CALL_REGS:
case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
@@ -621,8 +581,8 @@ void ftrace_replace_code(int enable)
return;
remove_breakpoints:
+ pr_warn("Failed on %s (%d):\n", report, count);
ftrace_bug(ret, rec ? rec->ip : 0);
- printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
for_ftrace_rec_iter(iter) {
rec = ftrace_rec_iter_record(iter);
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 068054f4bf20..eda1a865641e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -172,7 +172,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
*/
load_ucode_bsp();
- if (console_loglevel == 10)
+ if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
early_printk("Kernel alive\n");
clear_page(init_level4_pgt);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 4177bfbc80b0..319bcb9372fe 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -74,9 +74,6 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
static inline void hpet_set_mapping(void)
{
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
-#ifdef CONFIG_X86_64
- __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE);
-#endif
}
static inline void hpet_clear_mapping(void)
@@ -479,7 +476,7 @@ static int hpet_msi_next_event(unsigned long delta,
static int hpet_setup_msi_irq(unsigned int irq)
{
if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
- destroy_irq(irq);
+ irq_free_hwirq(irq);
return -EINVAL;
}
return 0;
@@ -487,9 +484,8 @@ static int hpet_setup_msi_irq(unsigned int irq)
static int hpet_assign_irq(struct hpet_dev *dev)
{
- unsigned int irq;
+ unsigned int irq = irq_alloc_hwirq(-1);
- irq = create_irq_nr(0, -1);
if (!irq)
return -EINVAL;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 283a76a9cc40..11ccfb0a63e7 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -17,6 +17,7 @@
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
+#include <asm/desc.h>
#define CREATE_TRACE_POINTS
#include <asm/trace/irq_vectors.h>
@@ -334,10 +335,17 @@ int check_irq_vectors_for_cpu_disable(void)
for_each_online_cpu(cpu) {
if (cpu == this_cpu)
continue;
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
- vector++) {
- if (per_cpu(vector_irq, cpu)[vector] < 0)
- count++;
+ /*
+ * We scan from FIRST_EXTERNAL_VECTOR to first system
+ * vector. If the vector is marked in the used vectors
+ * bitmap or an irq is assigned to it, we don't count
+ * it as available.
+ */
+ for (vector = FIRST_EXTERNAL_VECTOR;
+ vector < first_system_vector; vector++) {
+ if (!test_bit(vector, used_vectors) &&
+ per_cpu(vector_irq, cpu)[vector] < 0)
+ count++;
}
}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index dcbbaa165bde..c37886d759cc 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,8 +20,6 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
-int sysctl_ldt16 = 0;
-
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
{
@@ -231,16 +229,10 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
}
}
- /*
- * On x86-64 we do not support 16-bit segments due to
- * IRET leaking the high bits of the kernel stack address.
- */
-#ifdef CONFIG_X86_64
- if (!ldt_info.seg_32bit && !sysctl_ldt16) {
+ if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
error = -EINVAL;
goto out_unlock;
}
-#endif
fill_ldt(&ldt, &ldt_info);
if (oldmode)
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
new file mode 100644
index 000000000000..c050a0153168
--- /dev/null
+++ b/arch/x86/kernel/mcount_64.S
@@ -0,0 +1,217 @@
+/*
+ * linux/arch/x86_64/mcount_64.S
+ *
+ * Copyright (C) 2014 Steven Rostedt, Red Hat Inc
+ */
+
+#include <linux/linkage.h>
+#include <asm/ptrace.h>
+#include <asm/ftrace.h>
+
+
+ .code64
+ .section .entry.text, "ax"
+
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CC_USING_FENTRY
+# define function_hook __fentry__
+#else
+# define function_hook mcount
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(function_hook)
+ retq
+END(function_hook)
+
+/* skip is set if stack has been adjusted */
+.macro ftrace_caller_setup skip=0
+ MCOUNT_SAVE_FRAME \skip
+
+ /* Load the ftrace_ops into the 3rd parameter */
+ movq function_trace_op(%rip), %rdx
+
+ /* Load ip into the first parameter */
+ movq RIP(%rsp), %rdi
+ subq $MCOUNT_INSN_SIZE, %rdi
+ /* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
+ movq 8(%rbp), %rsi
+#endif
+.endm
+
+ENTRY(ftrace_caller)
+ /* Check if tracing was disabled (quick check) */
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ ftrace_caller_setup
+ /* regs go into 4th parameter (but make it NULL) */
+ movq $0, %rcx
+
+GLOBAL(ftrace_call)
+ call ftrace_stub
+
+ MCOUNT_RESTORE_FRAME
+ftrace_return:
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+GLOBAL(ftrace_graph_call)
+ jmp ftrace_stub
+#endif
+
+GLOBAL(ftrace_stub)
+ retq
+END(ftrace_caller)
+
+ENTRY(ftrace_regs_caller)
+ /* Save the current flags before compare (in SS location)*/
+ pushfq
+
+ /* Check if tracing was disabled (quick check) */
+ cmpl $0, function_trace_stop
+ jne ftrace_restore_flags
+
+ /* skip=8 to skip flags saved in SS */
+ ftrace_caller_setup 8
+
+ /* Save the rest of pt_regs */
+ movq %r15, R15(%rsp)
+ movq %r14, R14(%rsp)
+ movq %r13, R13(%rsp)
+ movq %r12, R12(%rsp)
+ movq %r11, R11(%rsp)
+ movq %r10, R10(%rsp)
+ movq %rbp, RBP(%rsp)
+ movq %rbx, RBX(%rsp)
+ /* Copy saved flags */
+ movq SS(%rsp), %rcx
+ movq %rcx, EFLAGS(%rsp)
+ /* Kernel segments */
+ movq $__KERNEL_DS, %rcx
+ movq %rcx, SS(%rsp)
+ movq $__KERNEL_CS, %rcx
+ movq %rcx, CS(%rsp)
+ /* Stack - skipping return address */
+ leaq SS+16(%rsp), %rcx
+ movq %rcx, RSP(%rsp)
+
+ /* regs go into 4th parameter */
+ leaq (%rsp), %rcx
+
+GLOBAL(ftrace_regs_call)
+ call ftrace_stub
+
+ /* Copy flags back to SS, to restore them */
+ movq EFLAGS(%rsp), %rax
+ movq %rax, SS(%rsp)
+
+ /* Handlers can change the RIP */
+ movq RIP(%rsp), %rax
+ movq %rax, SS+8(%rsp)
+
+ /* restore the rest of pt_regs */
+ movq R15(%rsp), %r15
+ movq R14(%rsp), %r14
+ movq R13(%rsp), %r13
+ movq R12(%rsp), %r12
+ movq R10(%rsp), %r10
+ movq RBP(%rsp), %rbp
+ movq RBX(%rsp), %rbx
+
+ /* skip=8 to skip flags saved in SS */
+ MCOUNT_RESTORE_FRAME 8
+
+ /* Restore flags */
+ popfq
+
+ jmp ftrace_return
+ftrace_restore_flags:
+ popfq
+ jmp ftrace_stub
+
+END(ftrace_regs_caller)
+
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(function_hook)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ cmpq $ftrace_stub, ftrace_trace_function
+ jnz trace
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ cmpq $ftrace_stub, ftrace_graph_return
+ jnz ftrace_graph_caller
+
+ cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
+ jnz ftrace_graph_caller
+#endif
+
+GLOBAL(ftrace_stub)
+ retq
+
+trace:
+ MCOUNT_SAVE_FRAME
+
+ movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
+ movq 8(%rbp), %rsi
+#endif
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+ call *ftrace_trace_function
+
+ MCOUNT_RESTORE_FRAME
+
+ jmp ftrace_stub
+END(function_hook)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ MCOUNT_SAVE_FRAME
+
+#ifdef CC_USING_FENTRY
+ leaq SS+16(%rsp), %rdi
+ movq $0, %rdx /* No framepointers needed */
+#else
+ leaq 8(%rbp), %rdi
+ movq (%rbp), %rdx
+#endif
+ movq RIP(%rsp), %rsi
+ subq $MCOUNT_INSN_SIZE, %rsi
+
+ call prepare_ftrace_return
+
+ MCOUNT_RESTORE_FRAME
+
+ retq
+END(ftrace_graph_caller)
+
+GLOBAL(return_to_handler)
+ subq $24, %rsp
+
+ /* Save the return values */
+ movq %rax, (%rsp)
+ movq %rdx, 8(%rsp)
+ movq %rbp, %rdi
+
+ call ftrace_return_to_handler
+
+ movq %rax, %rdi
+ movq 8(%rsp), %rdx
+ movq (%rsp), %rax
+ addq $24, %rsp
+ jmp *%rdi
+#endif
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f7d0672481fd..a25e202bb319 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -97,12 +97,17 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_mask = dma_alloc_coherent_mask(dev, flag);
- flag |= __GFP_ZERO;
+ flag &= ~__GFP_ZERO;
again:
page = NULL;
/* CMA can be used only in the context which permits sleeping */
- if (flag & __GFP_WAIT)
+ if (flag & __GFP_WAIT) {
page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ if (page && page_to_phys(page) + size > dma_mask) {
+ dma_release_from_contiguous(dev, page, count);
+ page = NULL;
+ }
+ }
/* fallback */
if (!page)
page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
@@ -120,7 +125,7 @@ again:
return NULL;
}
-
+ memset(page_address(page), 0, size);
*dma_addr = addr;
return page_address(page);
}
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6c483ba98b9c..77dd0ad58be4 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -14,7 +14,7 @@
#include <asm/iommu_table.h>
int swiotlb __read_mostly;
-static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
{
@@ -28,11 +28,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
}
-static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+void x86_swiotlb_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
struct dma_attrs *attrs)
{
- swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+ if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
+ swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+ else
+ dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
static struct dma_map_ops swiotlb_dma_ops = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 09c76d265550..78a0e6298922 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1119,7 +1119,7 @@ void __init setup_arch(char **cmdline_p)
setup_real_mode();
memblock_set_current_limit(get_max_mapped());
- dma_contiguous_reserve(0);
+ dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
/*
* NOTE: On x86-32, only from this point on, fixmaps are ready for use.
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 9e5de6813e1f..a0da58db43a8 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -298,7 +298,8 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
}
if (current->mm->context.vdso)
- restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
+ restorer = current->mm->context.vdso +
+ selected_vdso32->sym___kernel_sigreturn;
else
restorer = &frame->retcode;
if (ksig->ka.sa.sa_flags & SA_RESTORER)
@@ -361,7 +362,8 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. */
- restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
+ restorer = current->mm->context.vdso +
+ selected_vdso32->sym___kernel_sigreturn;
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
put_user_ex(restorer, &frame->pretcode);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 34826934d4a7..5492798930ef 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -244,6 +244,13 @@ static void notrace start_secondary(void *unused)
check_tsc_sync_target();
/*
+ * Enable the espfix hack for this CPU
+ */
+#ifdef CONFIG_X86_ESPFIX64
+ init_espfix_ap();
+#endif
+
+ /*
* We need to hold vector_lock so there the set of online cpus
* does not change while we are assigning vectors to cpus. Holding
* this lock ensures we don't half assign or remove an irq from a cpu.
@@ -859,9 +866,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
/* was set by cpu_init() */
cpumask_clear_cpu(cpu, cpu_initialized_mask);
-
- set_cpu_present(cpu, false);
- per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
}
/* mark "stuck" area as not stuck */
@@ -921,7 +925,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
err = do_boot_cpu(apicid, cpu, tidle);
if (err) {
- pr_debug("do_boot_cpu failed %d\n", err);
+ pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
return -EIO;
}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8b3b3eb3cead..ea5b5709aa76 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -91,7 +91,7 @@ static int addr_to_vsyscall_nr(unsigned long addr)
{
int nr;
- if ((addr & ~0xC00UL) != VSYSCALL_START)
+ if ((addr & ~0xC00UL) != VSYSCALL_ADDR)
return -EINVAL;
nr = (addr & 0xC00UL) >> 10;
@@ -330,24 +330,17 @@ void __init map_vsyscall(void)
{
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
- unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
- __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
+ __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
vsyscall_mode == NATIVE
? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR);
- BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
- (unsigned long)VSYSCALL_START);
-
- __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
- BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
- (unsigned long)VVAR_ADDRESS);
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
+ (unsigned long)VSYSCALL_ADDR);
}
static int __init vsyscall_init(void)
{
- BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE));
-
cpu_notifier_register_begin();
on_each_cpu(cpu_vsyscall_init, NULL, 1);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 20621d753d5f..167ffcac16ed 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -30,12 +30,14 @@ struct pg_state {
unsigned long start_address;
unsigned long current_address;
const struct addr_marker *marker;
+ unsigned long lines;
bool to_dmesg;
};
struct addr_marker {
unsigned long start_address;
const char *name;
+ unsigned long max_lines;
};
/* indices for address_markers; keep sync'd w/ address_markers below */
@@ -46,6 +48,7 @@ enum address_markers_idx {
LOW_KERNEL_NR,
VMALLOC_START_NR,
VMEMMAP_START_NR,
+ ESPFIX_START_NR,
HIGH_KERNEL_NR,
MODULES_VADDR_NR,
MODULES_END_NR,
@@ -68,6 +71,7 @@ static struct addr_marker address_markers[] = {
{ PAGE_OFFSET, "Low Kernel Mapping" },
{ VMALLOC_START, "vmalloc() Area" },
{ VMEMMAP_START, "Vmemmap" },
+ { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
{ __START_KERNEL_map, "High Kernel Mapping" },
{ MODULES_VADDR, "Modules" },
{ MODULES_END, "End Modules" },
@@ -182,7 +186,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
pgprot_t new_prot, int level)
{
pgprotval_t prot, cur;
- static const char units[] = "KMGTPE";
+ static const char units[] = "BKMGTPE";
/*
* If we have a "break" in the series, we need to flush the state that
@@ -197,6 +201,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
st->current_prot = new_prot;
st->level = level;
st->marker = address_markers;
+ st->lines = 0;
pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
st->marker->name);
} else if (prot != cur || level != st->level ||
@@ -208,17 +213,24 @@ static void note_page(struct seq_file *m, struct pg_state *st,
/*
* Now print the actual finished series
*/
- pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ",
- width, st->start_address,
- width, st->current_address);
-
- delta = (st->current_address - st->start_address) >> 10;
- while (!(delta & 1023) && unit[1]) {
- delta >>= 10;
- unit++;
+ if (!st->marker->max_lines ||
+ st->lines < st->marker->max_lines) {
+ pt_dump_seq_printf(m, st->to_dmesg,
+ "0x%0*lx-0x%0*lx ",
+ width, st->start_address,
+ width, st->current_address);
+
+ delta = st->current_address - st->start_address;
+ while (!(delta & 1023) && unit[1]) {
+ delta >>= 10;
+ unit++;
+ }
+ pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ",
+ delta, *unit);
+ printk_prot(m, st->current_prot, st->level,
+ st->to_dmesg);
}
- pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit);
- printk_prot(m, st->current_prot, st->level, st->to_dmesg);
+ st->lines++;
/*
* We print markers for special areas of address space,
@@ -226,7 +238,17 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* This helps in the interpretation.
*/
if (st->current_address >= st->marker[1].start_address) {
+ if (st->marker->max_lines &&
+ st->lines > st->marker->max_lines) {
+ unsigned long nskip =
+ st->lines - st->marker->max_lines;
+ pt_dump_seq_printf(m, st->to_dmesg,
+ "... %lu entr%s skipped ... \n",
+ nskip,
+ nskip == 1 ? "y" : "ies");
+ }
st->marker++;
+ st->lines = 0;
pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
st->marker->name);
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8e5722992677..858b47b5221b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,7 +18,8 @@
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
-#include <asm/fixmap.h> /* VSYSCALL_START */
+#include <asm/fixmap.h> /* VSYSCALL_ADDR */
+#include <asm/vsyscall.h> /* emulate_vsyscall */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -771,7 +772,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
* emulation.
*/
if (unlikely((error_code & PF_INSTR) &&
- ((address & ~0xfff) == VSYSCALL_START))) {
+ ((address & ~0xfff) == VSYSCALL_ADDR))) {
if (emulate_vsyscall(regs, address))
return;
}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8c9f647ff9e1..8b977ebf9388 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -58,11 +58,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
{
return NULL;
}
-
-int pmd_huge_support(void)
-{
- return 0;
-}
#else
struct page *
@@ -80,11 +75,6 @@ int pud_huge(pud_t pud)
{
return !!(pud_val(pud) & _PAGE_PSE);
}
-
-int pmd_huge_support(void)
-{
- return 1;
-}
#endif
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f35c66c5959a..df1a9927ad29 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1055,8 +1055,8 @@ void __init mem_init(void)
after_bootmem = 1;
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
- VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
+ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
+ PAGE_SIZE, KCORE_OTHER);
mem_init_print_info(NULL);
}
@@ -1185,11 +1185,19 @@ int kern_addr_valid(unsigned long addr)
* covers the 64bit vsyscall page now. 32bit has a real VMA now and does
* not need special handling anymore:
*/
+static const char *gate_vma_name(struct vm_area_struct *vma)
+{
+ return "[vsyscall]";
+}
+static struct vm_operations_struct gate_vma_ops = {
+ .name = gate_vma_name,
+};
static struct vm_area_struct gate_vma = {
- .vm_start = VSYSCALL_START,
- .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE),
+ .vm_start = VSYSCALL_ADDR,
+ .vm_end = VSYSCALL_ADDR + PAGE_SIZE,
.vm_page_prot = PAGE_READONLY_EXEC,
- .vm_flags = VM_READ | VM_EXEC
+ .vm_flags = VM_READ | VM_EXEC,
+ .vm_ops = &gate_vma_ops,
};
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
@@ -1218,29 +1226,46 @@ int in_gate_area(struct mm_struct *mm, unsigned long addr)
*/
int in_gate_area_no_mm(unsigned long addr)
{
- return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
+ return (addr & PAGE_MASK) == VSYSCALL_ADDR;
}
-const char *arch_vma_name(struct vm_area_struct *vma)
+static unsigned long probe_memory_block_size(void)
{
- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
- return "[vdso]";
- if (vma == &gate_vma)
- return "[vsyscall]";
- return NULL;
-}
+ /* start from 2g */
+ unsigned long bz = 1UL<<31;
#ifdef CONFIG_X86_UV
-unsigned long memory_block_size_bytes(void)
-{
if (is_uv_system()) {
printk(KERN_INFO "UV: memory block size 2GB\n");
return 2UL * 1024 * 1024 * 1024;
}
- return MIN_MEMORY_BLOCK_SIZE;
-}
#endif
+ /* less than 64g installed */
+ if ((max_pfn << PAGE_SHIFT) < (16UL << 32))
+ return MIN_MEMORY_BLOCK_SIZE;
+
+ /* get the tail size */
+ while (bz > MIN_MEMORY_BLOCK_SIZE) {
+ if (!((max_pfn << PAGE_SHIFT) & (bz - 1)))
+ break;
+ bz >>= 1;
+ }
+
+ printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20);
+
+ return bz;
+}
+
+static unsigned long memory_block_size_probed;
+unsigned long memory_block_size_bytes(void)
+{
+ if (!memory_block_size_probed)
+ memory_block_size_probed = probe_memory_block_size();
+
+ return memory_block_size_probed;
+}
+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* Initialise the sparsemem vmemmap using huge-pages at the PMD level.
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index bc7527e109c8..baff1da354e0 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -367,6 +367,12 @@ void __init early_ioremap_init(void)
{
pmd_t *pmd;
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
+#else
+ WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
+#endif
+
early_ioremap_setup();
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1d045f9c390f..a32b706c401a 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -559,7 +559,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
int i, nid;
nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
unsigned long start, end;
- struct memblock_type *type = &memblock.reserved;
+ struct memblock_region *r;
/*
* At this time, all memory regions reserved by memblock are
@@ -573,8 +573,8 @@ static void __init numa_clear_kernel_node_hotplug(void)
}
/* Mark all kernel nodes. */
- for (i = 0; i < type->cnt; i++)
- node_set(type->regions[i].nid, numa_kernel_nodes);
+ for_each_memblock(reserved, r)
+ node_set(r->nid, numa_kernel_nodes);
/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
for (i = 0; i < numa_meminfo.nr_blks; i++) {
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 461bc8289024..6629f397b467 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -35,7 +35,7 @@ enum {
static int pte_testbit(pte_t pte)
{
- return pte_flags(pte) & _PAGE_UNUSED1;
+ return pte_flags(pte) & _PAGE_SOFTW1;
}
struct split_state {
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 0004ac72dbdd..6fb6927f9e76 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -456,9 +456,9 @@ void __init reserve_top_address(unsigned long reserve)
{
#ifdef CONFIG_X86_32
BUG_ON(fixmaps_set > 0);
- printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
- (int)-reserve);
- __FIXADDR_TOP = -reserve - PAGE_SIZE;
+ __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
+ printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
+ -reserve, __FIXADDR_TOP + PAGE_SIZE);
#endif
}
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 9d8a509c9730..5ceda85b8687 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
{
void *vaddr;
- vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs);
- if (!vaddr)
- vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+ vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
return vaddr;
}
@@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
static struct dma_map_ops sta2x11_dma_ops = {
.alloc = sta2x11_swiotlb_alloc_coherent,
- .free = swiotlb_free_coherent,
+ .free = x86_swiotlb_free_coherent,
.map_page = swiotlb_map_page,
.unmap_page = swiotlb_unmap_page,
.map_sg = swiotlb_map_sg_attrs,
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 3781dd39e8bd..87fc96bcc13c 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -110,7 +110,7 @@ static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt2(get_time, tm, tc);
+ status = efi_call_virt(get_time, tm, tc);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -121,7 +121,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt1(set_time, tm);
+ status = efi_call_virt(set_time, tm);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -134,8 +134,7 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt3(get_wakeup_time,
- enabled, pending, tm);
+ status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -146,8 +145,7 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
efi_status_t status;
spin_lock_irqsave(&rtc_lock, flags);
- status = efi_call_virt2(set_wakeup_time,
- enabled, tm);
+ status = efi_call_virt(set_wakeup_time, enabled, tm);
spin_unlock_irqrestore(&rtc_lock, flags);
return status;
}
@@ -158,17 +156,17 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
unsigned long *data_size,
void *data)
{
- return efi_call_virt5(get_variable,
- name, vendor, attr,
- data_size, data);
+ return efi_call_virt(get_variable,
+ name, vendor, attr,
+ data_size, data);
}
static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
- return efi_call_virt3(get_next_variable,
- name_size, name, vendor);
+ return efi_call_virt(get_next_variable,
+ name_size, name, vendor);
}
static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@ -177,9 +175,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
unsigned long data_size,
void *data)
{
- return efi_call_virt5(set_variable,
- name, vendor, attr,
- data_size, data);
+ return efi_call_virt(set_variable,
+ name, vendor, attr,
+ data_size, data);
}
static efi_status_t virt_efi_query_variable_info(u32 attr,
@@ -190,13 +188,13 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt4(query_variable_info, attr, storage_space,
- remaining_space, max_variable_size);
+ return efi_call_virt(query_variable_info, attr, storage_space,
+ remaining_space, max_variable_size);
}
static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
{
- return efi_call_virt1(get_next_high_mono_count, count);
+ return efi_call_virt(get_next_high_mono_count, count);
}
static void virt_efi_reset_system(int reset_type,
@@ -204,8 +202,8 @@ static void virt_efi_reset_system(int reset_type,
unsigned long data_size,
efi_char16_t *data)
{
- efi_call_virt4(reset_system, reset_type, status,
- data_size, data);
+ __efi_call_virt(reset_system, reset_type, status,
+ data_size, data);
}
static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
@@ -215,7 +213,7 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt3(update_capsule, capsules, count, sg_list);
+ return efi_call_virt(update_capsule, capsules, count, sg_list);
}
static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
@@ -226,8 +224,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
- return efi_call_virt4(query_capsule_caps, capsules, count, max_size,
- reset_type);
+ return efi_call_virt(query_capsule_caps, capsules, count, max_size,
+ reset_type);
}
static efi_status_t __init phys_efi_set_virtual_address_map(
@@ -239,9 +237,9 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
efi_status_t status;
efi_call_phys_prelog();
- status = efi_call_phys4(efi_phys.set_virtual_address_map,
- memory_map_size, descriptor_size,
- descriptor_version, virtual_map);
+ status = efi_call_phys(efi_phys.set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
efi_call_phys_epilog();
return status;
}
@@ -919,6 +917,9 @@ static void __init save_runtime_map(void)
void *tmp, *p, *q = NULL;
int count = 0;
+ if (efi_enabled(EFI_OLD_MEMMAP))
+ return;
+
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index e0984ef0374b..5fcda7272550 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -73,84 +73,7 @@
2:
.endm
-ENTRY(efi_call0)
- SAVE_XMM
- subq $32, %rsp
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call0)
-
-ENTRY(efi_call1)
- SAVE_XMM
- subq $32, %rsp
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call1)
-
-ENTRY(efi_call2)
- SAVE_XMM
- subq $32, %rsp
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call2)
-
-ENTRY(efi_call3)
- SAVE_XMM
- subq $32, %rsp
- mov %rcx, %r8
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call3)
-
-ENTRY(efi_call4)
- SAVE_XMM
- subq $32, %rsp
- mov %r8, %r9
- mov %rcx, %r8
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $32, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call4)
-
-ENTRY(efi_call5)
- SAVE_XMM
- subq $48, %rsp
- mov %r9, 32(%rsp)
- mov %r8, %r9
- mov %rcx, %r8
- mov %rsi, %rcx
- SWITCH_PGT
- call *%rdi
- RESTORE_PGT
- addq $48, %rsp
- RESTORE_XMM
- ret
-ENDPROC(efi_call5)
-
-ENTRY(efi_call6)
+ENTRY(efi_call)
SAVE_XMM
mov (%rsp), %rax
mov 8(%rax), %rax
@@ -166,7 +89,7 @@ ENTRY(efi_call6)
addq $48, %rsp
RESTORE_XMM
ret
-ENDPROC(efi_call6)
+ENDPROC(efi_call)
#ifdef CONFIG_EFI_MIXED
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 097e7a7940d8..af9307f2cc28 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -20,3 +20,4 @@ obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o
obj-$(subst m,y,$(CONFIG_SERIAL_MRST_MAX3110)) += platform_max3111.o
# MISC Devices
obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
new file mode 100644
index 000000000000..973cf3bfa9fd
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
@@ -0,0 +1,72 @@
+/*
+ * platform_wdt.c: Watchdog platform library file
+ *
+ * (C) Copyright 2014 Intel Corporation
+ * Author: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/intel-mid_wdt.h>
+#include <asm/intel-mid.h>
+#include <asm/io_apic.h>
+
+#define TANGIER_EXT_TIMER0_MSI 15
+
+static struct platform_device wdt_dev = {
+ .name = "intel_mid_wdt",
+ .id = -1,
+};
+
+static int tangier_probe(struct platform_device *pdev)
+{
+ int ioapic;
+ int irq;
+ struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
+ struct io_apic_irq_attr irq_attr = { 0 };
+
+ if (!pdata)
+ return -EINVAL;
+
+ irq = pdata->irq;
+ ioapic = mp_find_ioapic(irq);
+ if (ioapic >= 0) {
+ int ret;
+ irq_attr.ioapic = ioapic;
+ irq_attr.ioapic_pin = irq;
+ irq_attr.trigger = 1;
+ /* irq_attr.polarity = 0; -> Active high */
+ ret = io_apic_set_pci_routing(NULL, irq, &irq_attr);
+ if (ret)
+ return ret;
+ } else {
+ dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n",
+ irq);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct intel_mid_wdt_pdata tangier_pdata = {
+ .irq = TANGIER_EXT_TIMER0_MSI,
+ .probe = tangier_probe,
+};
+
+static int __init register_mid_wdt(void)
+{
+ if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) {
+ wdt_dev.dev.platform_data = &tangier_pdata;
+ return platform_device_register(&wdt_dev);
+ }
+
+ return -ENODEV;
+}
+
+rootfs_initcall(register_mid_wdt);
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 766612137a62..1584cbed0dce 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -39,7 +39,7 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
*/
return BIOS_STATUS_UNIMPLEMENTED;
- ret = efi_call6((void *)__va(tab->function), (u64)which,
+ ret = efi_call((void *)__va(tab->function), (u64)which,
a1, a2, a3, a4, a5);
return ret;
}
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index acf7752da952..b233681af4de 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -238,11 +238,9 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
unsigned long mmr_offset, int limit)
{
- int irq, ret;
+ int ret, irq = irq_alloc_hwirq(uv_blade_to_memory_nid(mmr_blade));
- irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
-
- if (irq <= 0)
+ if (!irq)
return -EBUSY;
ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
@@ -250,7 +248,7 @@ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
if (ret == irq)
uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
else
- destroy_irq(irq);
+ irq_free_hwirq(irq);
return ret;
}
@@ -285,6 +283,6 @@ void uv_teardown_irq(unsigned int irq)
n = n->rb_right;
}
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
- destroy_irq(irq);
+ irq_free_hwirq(irq);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index be27da60dc8f..c89c93320c12 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -85,7 +85,7 @@ static cpumask_var_t uv_nmi_cpu_mask;
* Default is all stack dumps go to the console and buffer.
* Lower level to send to log buffer only.
*/
-static int uv_nmi_loglevel = 7;
+static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644);
/*
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 04376ac3d9ef..ec255a1646d2 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -212,10 +212,10 @@
203 common sched_setaffinity sys_sched_setaffinity
204 common sched_getaffinity sys_sched_getaffinity
205 64 set_thread_area
-206 common io_setup sys_io_setup
+206 64 io_setup sys_io_setup
207 common io_destroy sys_io_destroy
208 common io_getevents sys_io_getevents
-209 common io_submit sys_io_submit
+209 64 io_submit sys_io_submit
210 common io_cancel sys_io_cancel
211 64 get_thread_area
212 common lookup_dcookie sys_lookup_dcookie
@@ -359,3 +359,5 @@
540 x32 process_vm_writev compat_sys_process_vm_writev
541 x32 setsockopt compat_sys_setsockopt
542 x32 getsockopt compat_sys_getsockopt
+543 x32 io_setup compat_sys_io_setup
+544 x32 io_submit compat_sys_io_submit
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index af91901babb8..916cda4cd5b4 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -12,7 +12,7 @@
#include <asm/page.h>
#include <linux/init.h>
-unsigned int __read_mostly vdso_enabled = 1;
+static unsigned int __read_mostly vdso_enabled = 1;
unsigned long um_vdso_addr;
extern unsigned long task_size;
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore
index 3282874bc61d..aae8ffdd5880 100644
--- a/arch/x86/vdso/.gitignore
+++ b/arch/x86/vdso/.gitignore
@@ -1,8 +1,7 @@
vdso.lds
-vdso-syms.lds
vdsox32.lds
-vdsox32-syms.lds
-vdso32-syms.lds
vdso32-syscall-syms.lds
vdso32-sysenter-syms.lds
vdso32-int80-syms.lds
+vdso-image-*.c
+vdso2c
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index c580d1210ffe..9769df094035 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -24,15 +24,30 @@ vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y))
# files to link into kernel
obj-y += vma.o
-obj-$(VDSO64-y) += vdso.o
-obj-$(VDSOX32-y) += vdsox32.o
-obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
+
+# vDSO images to build
+vdso_img-$(VDSO64-y) += 64
+vdso_img-$(VDSOX32-y) += x32
+vdso_img-$(VDSO32-y) += 32-int80
+vdso_img-$(CONFIG_COMPAT) += 32-syscall
+vdso_img-$(VDSO32-y) += 32-sysenter
+
+obj-$(VDSO32-y) += vdso32-setup.o
vobjs := $(foreach F,$(vobj64s),$(obj)/$F)
$(obj)/vdso.o: $(obj)/vdso.so
-targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
+targets += vdso.lds $(vobjs-y)
+
+# Build the vDSO image C files and link them in.
+vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
+vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
+vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
+obj-y += $(vdso_img_objs)
+targets += $(vdso_img_cfiles)
+targets += $(vdso_img_sodbg)
+.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c)
export CPPFLAGS_vdso.lds += -P -C
@@ -41,14 +56,19 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
$(DISABLE_LTO)
-$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
-
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+hostprogs-y += vdso2c
+
+quiet_cmd_vdso2c = VDSO2C $@
+define cmd_vdso2c
+ $(obj)/vdso2c $< $@
+endef
+
+$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE
+ $(call if_changed,vdso2c)
#
# Don't omit frame pointers for ease of userspace debugging, but do
@@ -68,22 +88,6 @@ CFLAGS_REMOVE_vclock_gettime.o = -pg
CFLAGS_REMOVE_vgetcpu.o = -pg
CFLAGS_REMOVE_vvar.o = -pg
-targets += vdso-syms.lds
-obj-$(VDSO64-y) += vdso-syms.lds
-
-#
-# Match symbols in the DSO that look like VDSO*; produce a file of constants.
-#
-sed-vdsosym := -e 's/^00*/0/' \
- -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
-quiet_cmd_vdsosym = VDSOSYM $@
-define cmd_vdsosym
- $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
-endef
-
-$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
- $(call if_changed,vdsosym)
-
#
# X32 processes use x32 vDSO to access 64bit kernel data.
#
@@ -94,9 +98,6 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
# so that it can reach 64bit address space with 64bit pointers.
#
-targets += vdsox32-syms.lds
-obj-$(VDSOX32-y) += vdsox32-syms.lds
-
CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
-Wl,-soname=linux-vdso.so.1 \
@@ -113,9 +114,7 @@ quiet_cmd_x32 = X32 $@
$(obj)/%-x32.o: $(obj)/%.o FORCE
$(call if_changed,x32)
-targets += vdsox32.so vdsox32.so.dbg vdsox32.lds $(vobjx32s-y)
-
-$(obj)/vdsox32.o: $(src)/vdsox32.S $(obj)/vdsox32.so
+targets += vdsox32.lds $(vobjx32s-y)
$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso)
@@ -123,7 +122,6 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
#
# Build multiple 32-bit vDSO images to choose from at boot time.
#
-obj-$(VDSO32-y) += vdso32-syms.lds
vdso32.so-$(VDSO32-y) += int80
vdso32.so-$(CONFIG_COMPAT) += syscall
vdso32.so-$(VDSO32-y) += sysenter
@@ -138,10 +136,8 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds
-targets += $(vdso32-images) $(vdso32-images:=.dbg)
targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
-
-extra-y += $(vdso32-images)
+targets += vdso32/vclock_gettime.o
$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
@@ -166,27 +162,6 @@ $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
$(obj)/vdso32/%.o
$(call if_changed,vdso)
-# Make vdso32-*-syms.lds from each image, and then make sure they match.
-# The only difference should be that some do not define VDSO32_SYSENTER_RETURN.
-
-targets += vdso32-syms.lds $(vdso32.so-y:%=vdso32-%-syms.lds)
-
-quiet_cmd_vdso32sym = VDSOSYM $@
-define cmd_vdso32sym
- if LC_ALL=C sort -u $(filter-out FORCE,$^) > $(@D)/.tmp_$(@F) && \
- $(foreach H,$(filter-out FORCE,$^),\
- if grep -q VDSO32_SYSENTER_RETURN $H; \
- then diff -u $(@D)/.tmp_$(@F) $H; \
- else sed /VDSO32_SYSENTER_RETURN/d $(@D)/.tmp_$(@F) | \
- diff -u - $H; fi &&) : ;\
- then mv -f $(@D)/.tmp_$(@F) $@; \
- else rm -f $(@D)/.tmp_$(@F); exit 1; \
- fi
-endef
-
-$(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE
- $(call if_changed,vdso32sym)
-
#
# The DSO images are built using a special linker script.
#
@@ -197,7 +172,7 @@ quiet_cmd_vdso = VDSO $@
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
- $(LTO_CFLAGS)
+ -Wl,-Bsymbolic $(LTO_CFLAGS)
GCOV_PROFILE := n
#
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 16d686171e9a..b2e4f493e5b0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -30,9 +30,12 @@ extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
#ifdef CONFIG_HPET_TIMER
-static inline u32 read_hpet_counter(const volatile void *addr)
+extern u8 hpet_page
+ __attribute__((visibility("hidden")));
+
+static notrace cycle_t vread_hpet(void)
{
- return *(const volatile u32 *) (addr + HPET_COUNTER);
+ return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
}
#endif
@@ -43,11 +46,6 @@ static inline u32 read_hpet_counter(const volatile void *addr)
#include <asm/fixmap.h>
#include <asm/pvclock.h>
-static notrace cycle_t vread_hpet(void)
-{
- return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET));
-}
-
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
@@ -137,16 +135,6 @@ static notrace cycle_t vread_pvclock(int *mode)
#else
-extern u8 hpet_page
- __attribute__((visibility("hidden")));
-
-#ifdef CONFIG_HPET_TIMER
-static notrace cycle_t vread_hpet(void)
-{
- return read_hpet_counter((const void *)(&hpet_page));
-}
-#endif
-
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
@@ -154,7 +142,7 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
asm(
"mov %%ebx, %%edx \n"
"mov %2, %%ebx \n"
- "call VDSO32_vsyscall \n"
+ "call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
: "=a" (ret)
: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
@@ -169,7 +157,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
asm(
"mov %%ebx, %%edx \n"
"mov %2, %%ebx \n"
- "call VDSO32_vsyscall \n"
+ "call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
: "=a" (ret)
: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 9df017ab2285..2ec72f651ebf 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -1,3 +1,5 @@
+#include <asm/vdso.h>
+
/*
* Linker script for vDSO. This is an ELF shared object prelinked to
* its virtual address, and with only one read-only segment.
@@ -6,20 +8,6 @@
SECTIONS
{
-#ifdef BUILD_VDSO32
-#include <asm/vdso32.h>
-
- hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
-
- vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
-
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset;
-#define __VVAR_KERNEL_LDS
-#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
-#undef EMIT_VVAR
-#endif
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
@@ -60,10 +48,30 @@ SECTIONS
.text : { *(.text*) } :text =0x90909090,
/*
- * The comma above works around a bug in gold:
- * https://sourceware.org/bugzilla/show_bug.cgi?id=16804
+ * The remainder of the vDSO consists of special pages that are
+ * shared between the kernel and userspace. It needs to be at the
+ * end so that it doesn't overlap the mapping of the actual
+ * vDSO image.
*/
+ . = ALIGN(PAGE_SIZE);
+ vvar_page = .;
+
+ /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
+#define __VVAR_KERNEL_LDS
+#include <asm/vvar.h>
+#undef __VVAR_KERNEL_LDS
+#undef EMIT_VVAR
+
+ . = vvar_page + PAGE_SIZE;
+
+ hpet_page = .;
+ . = . + PAGE_SIZE;
+
+ . = ALIGN(PAGE_SIZE);
+ end_mapping = .;
+
/DISCARD/ : {
*(.discard)
*(.discard.*)
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
deleted file mode 100644
index be3f23b09af5..000000000000
--- a/arch/x86/vdso/vdso.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm/vdso.h>
-
-DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so")
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
index b96b2677cad8..75e3404c83b1 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/vdso/vdso.lds.S
@@ -1,14 +1,11 @@
/*
* Linker script for 64-bit vDSO.
* We #include the file to define the layout details.
- * Here we only choose the prelinked virtual address.
*
* This file defines the version script giving the user-exported symbols in
- * the DSO. We can define local symbols here called VDSO* to make their
- * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ * the DSO.
*/
-#define VDSO_PRELINK 0xffffffffff700000
#include "vdso-layout.lds.S"
/*
@@ -28,5 +25,3 @@ VERSION {
local: *;
};
}
-
-VDSO64_PRELINK = VDSO_PRELINK;
diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
new file mode 100644
index 000000000000..450ac6eaf613
--- /dev/null
+++ b/arch/x86/vdso/vdso2c.c
@@ -0,0 +1,175 @@
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <err.h>
+
+#include <sys/mman.h>
+#include <sys/types.h>
+
+#include <tools/le_byteshift.h>
+
+#include <linux/elf.h>
+#include <linux/types.h>
+
+const char *outfilename;
+
+/* Symbols that we need in vdso2c. */
+enum {
+ sym_vvar_page,
+ sym_hpet_page,
+ sym_end_mapping,
+};
+
+const int special_pages[] = {
+ sym_vvar_page,
+ sym_hpet_page,
+};
+
+char const * const required_syms[] = {
+ [sym_vvar_page] = "vvar_page",
+ [sym_hpet_page] = "hpet_page",
+ [sym_end_mapping] = "end_mapping",
+ "VDSO32_NOTE_MASK",
+ "VDSO32_SYSENTER_RETURN",
+ "__kernel_vsyscall",
+ "__kernel_sigreturn",
+ "__kernel_rt_sigreturn",
+};
+
+__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
+static void fail(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ fprintf(stderr, "Error: ");
+ vfprintf(stderr, format, ap);
+ unlink(outfilename);
+ exit(1);
+ va_end(ap);
+}
+
+/*
+ * Evil macros to do a little-endian read.
+ */
+#define GLE(x, bits, ifnot) \
+ __builtin_choose_expr( \
+ (sizeof(*(x)) == bits/8), \
+ (__typeof__(*(x)))get_unaligned_le##bits(x), ifnot)
+
+extern void bad_get_le(void);
+#define LAST_LE(x) \
+ __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le())
+
+#define GET_LE(x) \
+ GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_LE(x))))
+
+#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
+
+#define BITS 64
+#define GOFUNC go64
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Phdr Elf64_Phdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Dyn Elf64_Dyn
+#include "vdso2c.h"
+#undef BITS
+#undef GOFUNC
+#undef Elf_Ehdr
+#undef Elf_Shdr
+#undef Elf_Phdr
+#undef Elf_Sym
+#undef Elf_Dyn
+
+#define BITS 32
+#define GOFUNC go32
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Phdr Elf32_Phdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Dyn Elf32_Dyn
+#include "vdso2c.h"
+#undef BITS
+#undef GOFUNC
+#undef Elf_Ehdr
+#undef Elf_Shdr
+#undef Elf_Phdr
+#undef Elf_Sym
+#undef Elf_Dyn
+
+static void go(void *addr, size_t len, FILE *outfile, const char *name)
+{
+ Elf64_Ehdr *hdr = (Elf64_Ehdr *)addr;
+
+ if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
+ go64(addr, len, outfile, name);
+ } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
+ go32(addr, len, outfile, name);
+ } else {
+ fail("unknown ELF class\n");
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int fd;
+ off_t len;
+ void *addr;
+ FILE *outfile;
+ char *name, *tmp;
+ int namelen;
+
+ if (argc != 3) {
+ printf("Usage: vdso2c INPUT OUTPUT\n");
+ return 1;
+ }
+
+ /*
+ * Figure out the struct name. If we're writing to a .so file,
+ * generate raw output insted.
+ */
+ name = strdup(argv[2]);
+ namelen = strlen(name);
+ if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
+ name = NULL;
+ } else {
+ tmp = strrchr(name, '/');
+ if (tmp)
+ name = tmp + 1;
+ tmp = strchr(name, '.');
+ if (tmp)
+ *tmp = '\0';
+ for (tmp = name; *tmp; tmp++)
+ if (*tmp == '-')
+ *tmp = '_';
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd == -1)
+ err(1, "%s", argv[1]);
+
+ len = lseek(fd, 0, SEEK_END);
+ if (len == (off_t)-1)
+ err(1, "lseek");
+
+ addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED)
+ err(1, "mmap");
+
+ outfilename = argv[2];
+ outfile = fopen(outfilename, "w");
+ if (!outfile)
+ err(1, "%s", argv[2]);
+
+ go(addr, (size_t)len, outfile, name);
+
+ munmap(addr, len);
+ fclose(outfile);
+
+ return 0;
+}
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
new file mode 100644
index 000000000000..8a074637a576
--- /dev/null
+++ b/arch/x86/vdso/vdso2c.h
@@ -0,0 +1,163 @@
+/*
+ * This file is included twice from vdso2c.c. It generates code for 32-bit
+ * and 64-bit vDSOs. We need both for 64-bit builds, since 32-bit vDSOs
+ * are built for 32-bit userspace.
+ */
+
+static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
+{
+ int found_load = 0;
+ unsigned long load_size = -1; /* Work around bogus warning */
+ unsigned long data_size;
+ Elf_Ehdr *hdr = (Elf_Ehdr *)addr;
+ int i;
+ unsigned long j;
+ Elf_Shdr *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
+ *alt_sec = NULL;
+ Elf_Dyn *dyn = 0, *dyn_end = 0;
+ const char *secstrings;
+ uint64_t syms[NSYMS] = {};
+
+ Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff));
+
+ /* Walk the segment table. */
+ for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
+ if (GET_LE(&pt[i].p_type) == PT_LOAD) {
+ if (found_load)
+ fail("multiple PT_LOAD segs\n");
+
+ if (GET_LE(&pt[i].p_offset) != 0 ||
+ GET_LE(&pt[i].p_vaddr) != 0)
+ fail("PT_LOAD in wrong place\n");
+
+ if (GET_LE(&pt[i].p_memsz) != GET_LE(&pt[i].p_filesz))
+ fail("cannot handle memsz != filesz\n");
+
+ load_size = GET_LE(&pt[i].p_memsz);
+ found_load = 1;
+ } else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) {
+ dyn = addr + GET_LE(&pt[i].p_offset);
+ dyn_end = addr + GET_LE(&pt[i].p_offset) +
+ GET_LE(&pt[i].p_memsz);
+ }
+ }
+ if (!found_load)
+ fail("no PT_LOAD seg\n");
+ data_size = (load_size + 4095) / 4096 * 4096;
+
+ /* Walk the dynamic table */
+ for (i = 0; dyn + i < dyn_end &&
+ GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
+ typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag);
+ if (tag == DT_REL || tag == DT_RELSZ ||
+ tag == DT_RELENT || tag == DT_TEXTREL)
+ fail("vdso image contains dynamic relocations\n");
+ }
+
+ /* Walk the section table */
+ secstrings_hdr = addr + GET_LE(&hdr->e_shoff) +
+ GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
+ secstrings = addr + GET_LE(&secstrings_hdr->sh_offset);
+ for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
+ Elf_Shdr *sh = addr + GET_LE(&hdr->e_shoff) +
+ GET_LE(&hdr->e_shentsize) * i;
+ if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
+ symtab_hdr = sh;
+
+ if (!strcmp(secstrings + GET_LE(&sh->sh_name),
+ ".altinstructions"))
+ alt_sec = sh;
+ }
+
+ if (!symtab_hdr)
+ fail("no symbol table\n");
+
+ strtab_hdr = addr + GET_LE(&hdr->e_shoff) +
+ GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
+
+ /* Walk the symbol table */
+ for (i = 0;
+ i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
+ i++) {
+ int k;
+ Elf_Sym *sym = addr + GET_LE(&symtab_hdr->sh_offset) +
+ GET_LE(&symtab_hdr->sh_entsize) * i;
+ const char *name = addr + GET_LE(&strtab_hdr->sh_offset) +
+ GET_LE(&sym->st_name);
+ for (k = 0; k < NSYMS; k++) {
+ if (!strcmp(name, required_syms[k])) {
+ if (syms[k]) {
+ fail("duplicate symbol %s\n",
+ required_syms[k]);
+ }
+ syms[k] = GET_LE(&sym->st_value);
+ }
+ }
+ }
+
+ /* Validate mapping addresses. */
+ for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
+ if (!syms[i])
+ continue; /* The mapping isn't used; ignore it. */
+
+ if (syms[i] % 4096)
+ fail("%s must be a multiple of 4096\n",
+ required_syms[i]);
+ if (syms[i] < data_size)
+ fail("%s must be after the text mapping\n",
+ required_syms[i]);
+ if (syms[sym_end_mapping] < syms[i] + 4096)
+ fail("%s overruns end_mapping\n", required_syms[i]);
+ }
+ if (syms[sym_end_mapping] % 4096)
+ fail("end_mapping must be a multiple of 4096\n");
+
+ /* Remove sections. */
+ hdr->e_shoff = 0;
+ hdr->e_shentsize = 0;
+ hdr->e_shnum = 0;
+ hdr->e_shstrndx = htole16(SHN_UNDEF);
+
+ if (!name) {
+ fwrite(addr, load_size, 1, outfile);
+ return;
+ }
+
+ fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
+ fprintf(outfile, "#include <linux/linkage.h>\n");
+ fprintf(outfile, "#include <asm/page_types.h>\n");
+ fprintf(outfile, "#include <asm/vdso.h>\n");
+ fprintf(outfile, "\n");
+ fprintf(outfile,
+ "static unsigned char raw_data[%lu] __page_aligned_data = {",
+ data_size);
+ for (j = 0; j < load_size; j++) {
+ if (j % 10 == 0)
+ fprintf(outfile, "\n\t");
+ fprintf(outfile, "0x%02X, ", (int)((unsigned char *)addr)[j]);
+ }
+ fprintf(outfile, "\n};\n\n");
+
+ fprintf(outfile, "static struct page *pages[%lu];\n\n",
+ data_size / 4096);
+
+ fprintf(outfile, "const struct vdso_image %s = {\n", name);
+ fprintf(outfile, "\t.data = raw_data,\n");
+ fprintf(outfile, "\t.size = %lu,\n", data_size);
+ fprintf(outfile, "\t.text_mapping = {\n");
+ fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
+ fprintf(outfile, "\t\t.pages = pages,\n");
+ fprintf(outfile, "\t},\n");
+ if (alt_sec) {
+ fprintf(outfile, "\t.alt = %lu,\n",
+ (unsigned long)GET_LE(&alt_sec->sh_offset));
+ fprintf(outfile, "\t.alt_len = %lu,\n",
+ (unsigned long)GET_LE(&alt_sec->sh_size));
+ }
+ for (i = 0; i < NSYMS; i++) {
+ if (syms[i])
+ fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n",
+ required_syms[i], syms[i]);
+ }
+ fprintf(outfile, "};\n");
+}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index e1f220e3ca68..e4f7781ee162 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -8,27 +8,12 @@
#include <linux/init.h>
#include <linux/smp.h>
-#include <linux/thread_info.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include <linux/string.h>
-#include <linux/elf.h>
-#include <linux/mm.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
#include <asm/cpufeature.h>
-#include <asm/msr.h>
-#include <asm/pgtable.h>
-#include <asm/unistd.h>
-#include <asm/elf.h>
-#include <asm/tlbflush.h>
+#include <asm/processor.h>
#include <asm/vdso.h>
-#include <asm/proto.h>
-#include <asm/fixmap.h>
-#include <asm/hpet.h>
-#include <asm/vvar.h>
#ifdef CONFIG_COMPAT_VDSO
#define VDSO_DEFAULT 0
@@ -36,23 +21,17 @@
#define VDSO_DEFAULT 1
#endif
-#ifdef CONFIG_X86_64
-#define vdso_enabled sysctl_vsyscall32
-#define arch_setup_additional_pages syscall32_setup_pages
-extern int sysctl_ldt16;
-#endif
-
/*
* Should the kernel map a VDSO page into processes and pass its
* address down to glibc upon exec()?
*/
-unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
+unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT;
-static int __init vdso_setup(char *s)
+static int __init vdso32_setup(char *s)
{
- vdso_enabled = simple_strtoul(s, NULL, 0);
+ vdso32_enabled = simple_strtoul(s, NULL, 0);
- if (vdso_enabled > 1)
+ if (vdso32_enabled > 1)
pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
return 1;
@@ -63,177 +42,45 @@ static int __init vdso_setup(char *s)
* behavior on both 64-bit and 32-bit kernels.
* On 32-bit kernels, vdso=[012] means the same thing.
*/
-__setup("vdso32=", vdso_setup);
+__setup("vdso32=", vdso32_setup);
#ifdef CONFIG_X86_32
-__setup_param("vdso=", vdso32_setup, vdso_setup, 0);
-
-EXPORT_SYMBOL_GPL(vdso_enabled);
+__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
#endif
-static struct page **vdso32_pages;
-static unsigned vdso32_size;
-
#ifdef CONFIG_X86_64
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
-/* May not be __init: called during resume */
-void syscall32_cpu_init(void)
-{
- /* Load these always in case some future AMD CPU supports
- SYSENTER from compat mode too. */
- wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
- wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
- wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
-
#else /* CONFIG_X86_32 */
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
#define vdso32_syscall() (0)
-void enable_sep_cpu(void)
-{
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
- if (!boot_cpu_has(X86_FEATURE_SEP)) {
- put_cpu();
- return;
- }
-
- tss->x86_tss.ss1 = __KERNEL_CS;
- tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
- wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
- put_cpu();
-}
-
#endif /* CONFIG_X86_64 */
+#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
+const struct vdso_image *selected_vdso32;
+#endif
+
int __init sysenter_setup(void)
{
- char *vdso32_start, *vdso32_end;
- int npages, i;
-
#ifdef CONFIG_COMPAT
- if (vdso32_syscall()) {
- vdso32_start = vdso32_syscall_start;
- vdso32_end = vdso32_syscall_end;
- vdso32_pages = vdso32_syscall_pages;
- } else
+ if (vdso32_syscall())
+ selected_vdso32 = &vdso_image_32_syscall;
+ else
#endif
- if (vdso32_sysenter()) {
- vdso32_start = vdso32_sysenter_start;
- vdso32_end = vdso32_sysenter_end;
- vdso32_pages = vdso32_sysenter_pages;
- } else {
- vdso32_start = vdso32_int80_start;
- vdso32_end = vdso32_int80_end;
- vdso32_pages = vdso32_int80_pages;
- }
-
- npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE;
- vdso32_size = npages << PAGE_SHIFT;
- for (i = 0; i < npages; i++)
- vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE);
+ if (vdso32_sysenter())
+ selected_vdso32 = &vdso_image_32_sysenter;
+ else
+ selected_vdso32 = &vdso_image_32_int80;
- patch_vdso32(vdso32_start, vdso32_size);
+ init_vdso_image(selected_vdso32);
return 0;
}
-/* Setup a VMA at program startup for the vsyscall page */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr;
- int ret = 0;
- struct vm_area_struct *vma;
-
-#ifdef CONFIG_X86_X32_ABI
- if (test_thread_flag(TIF_X32))
- return x32_setup_additional_pages(bprm, uses_interp);
-#endif
-
- if (vdso_enabled != 1) /* Other values all mean "disabled" */
- return 0;
-
- down_write(&mm->mmap_sem);
-
- addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0);
- if (IS_ERR_VALUE(addr)) {
- ret = addr;
- goto up_fail;
- }
-
- addr += VDSO_OFFSET(VDSO_PREV_PAGES);
-
- current->mm->context.vdso = (void *)addr;
-
- /*
- * MAYWRITE to allow gdb to COW and set breakpoints
- */
- ret = install_special_mapping(mm,
- addr,
- vdso32_size,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso32_pages);
-
- if (ret)
- goto up_fail;
-
- vma = _install_special_mapping(mm,
- addr - VDSO_OFFSET(VDSO_PREV_PAGES),
- VDSO_OFFSET(VDSO_PREV_PAGES),
- VM_READ,
- NULL);
-
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto up_fail;
- }
-
- ret = remap_pfn_range(vma,
- addr - VDSO_OFFSET(VDSO_VVAR_PAGE),
- __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
- PAGE_SIZE,
- PAGE_READONLY);
-
- if (ret)
- goto up_fail;
-
-#ifdef CONFIG_HPET_TIMER
- if (hpet_address) {
- ret = io_remap_pfn_range(vma,
- addr - VDSO_OFFSET(VDSO_HPET_PAGE),
- hpet_address >> PAGE_SHIFT,
- PAGE_SIZE,
- pgprot_noncached(PAGE_READONLY));
-
- if (ret)
- goto up_fail;
- }
-#endif
-
- current_thread_info()->sysenter_return =
- VDSO32_SYMBOL(addr, SYSENTER_RETURN);
-
- up_fail:
- if (ret)
- current->mm->context.vdso = NULL;
-
- up_write(&mm->mmap_sem);
-
- return ret;
-}
-
#ifdef CONFIG_X86_64
subsys_initcall(sysenter_setup);
@@ -245,14 +92,7 @@ subsys_initcall(sysenter_setup);
static struct ctl_table abi_table2[] = {
{
.procname = "vsyscall32",
- .data = &sysctl_vsyscall32,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "ldt16",
- .data = &sysctl_ldt16,
+ .data = &vdso32_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
@@ -279,13 +119,6 @@ __initcall(ia32_binfmt_init);
#else /* CONFIG_X86_32 */
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
- return "[vdso]";
- return NULL;
-}
-
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return NULL;
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S
deleted file mode 100644
index 018bcd9f97b4..000000000000
--- a/arch/x86/vdso/vdso32.S
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <asm/vdso.h>
-
-DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so")
-
-#ifdef CONFIG_COMPAT
-DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so")
-#endif
-
-DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so")
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S
index aadb8b9994cd..31056cf294bf 100644
--- a/arch/x86/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/vdso/vdso32/vdso32.lds.S
@@ -1,17 +1,14 @@
/*
* Linker script for 32-bit vDSO.
* We #include the file to define the layout details.
- * Here we only choose the prelinked virtual address.
*
* This file defines the version script giving the user-exported symbols in
- * the DSO. We can define local symbols here called VDSO* to make their
- * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ * the DSO.
*/
#include <asm/page.h>
#define BUILD_VDSO32
-#define VDSO_PRELINK 0
#include "../vdso-layout.lds.S"
@@ -38,13 +35,3 @@ VERSION
local: *;
};
}
-
-/*
- * Symbols we define here called VDSO* get their values into vdso32-syms.h.
- */
-VDSO32_vsyscall = __kernel_vsyscall;
-VDSO32_sigreturn = __kernel_sigreturn;
-VDSO32_rt_sigreturn = __kernel_rt_sigreturn;
-VDSO32_clock_gettime = clock_gettime;
-VDSO32_gettimeofday = gettimeofday;
-VDSO32_time = time;
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S
deleted file mode 100644
index f4aa34e7f370..000000000000
--- a/arch/x86/vdso/vdsox32.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm/vdso.h>
-
-DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so")
diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S
index 62272aa2ae0a..46b991b578a8 100644
--- a/arch/x86/vdso/vdsox32.lds.S
+++ b/arch/x86/vdso/vdsox32.lds.S
@@ -1,14 +1,11 @@
/*
* Linker script for x32 vDSO.
* We #include the file to define the layout details.
- * Here we only choose the prelinked virtual address.
*
* This file defines the version script giving the user-exported symbols in
- * the DSO. We can define local symbols here called VDSO* to make their
- * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ * the DSO.
*/
-#define VDSO_PRELINK 0
#include "vdso-layout.lds.S"
/*
@@ -24,5 +21,3 @@ VERSION {
local: *;
};
}
-
-VDSOX32_PRELINK = VDSO_PRELINK;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 1ad102613127..e1513c47872a 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -15,115 +15,51 @@
#include <asm/proto.h>
#include <asm/vdso.h>
#include <asm/page.h>
+#include <asm/hpet.h>
#if defined(CONFIG_X86_64)
-unsigned int __read_mostly vdso_enabled = 1;
+unsigned int __read_mostly vdso64_enabled = 1;
-DECLARE_VDSO_IMAGE(vdso);
extern unsigned short vdso_sync_cpuid;
-static unsigned vdso_size;
-
-#ifdef CONFIG_X86_X32_ABI
-DECLARE_VDSO_IMAGE(vdsox32);
-static unsigned vdsox32_size;
-#endif
#endif
-#if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \
- defined(CONFIG_COMPAT)
-void __init patch_vdso32(void *vdso, size_t len)
+void __init init_vdso_image(const struct vdso_image *image)
{
- Elf32_Ehdr *hdr = vdso;
- Elf32_Shdr *sechdrs, *alt_sec = 0;
- char *secstrings;
- void *alt_data;
int i;
+ int npages = (image->size) / PAGE_SIZE;
- BUG_ON(len < sizeof(Elf32_Ehdr));
- BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0);
-
- sechdrs = (void *)hdr + hdr->e_shoff;
- secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
- for (i = 1; i < hdr->e_shnum; i++) {
- Elf32_Shdr *shdr = &sechdrs[i];
- if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) {
- alt_sec = shdr;
- goto found;
- }
- }
-
- /* If we get here, it's probably a bug. */
- pr_warning("patch_vdso32: .altinstructions not found\n");
- return; /* nothing to patch */
+ BUG_ON(image->size % PAGE_SIZE != 0);
+ for (i = 0; i < npages; i++)
+ image->text_mapping.pages[i] =
+ virt_to_page(image->data + i*PAGE_SIZE);
-found:
- alt_data = (void *)hdr + alt_sec->sh_offset;
- apply_alternatives(alt_data, alt_data + alt_sec->sh_size);
+ apply_alternatives((struct alt_instr *)(image->data + image->alt),
+ (struct alt_instr *)(image->data + image->alt +
+ image->alt_len));
}
-#endif
#if defined(CONFIG_X86_64)
-static void __init patch_vdso64(void *vdso, size_t len)
-{
- Elf64_Ehdr *hdr = vdso;
- Elf64_Shdr *sechdrs, *alt_sec = 0;
- char *secstrings;
- void *alt_data;
- int i;
-
- BUG_ON(len < sizeof(Elf64_Ehdr));
- BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0);
-
- sechdrs = (void *)hdr + hdr->e_shoff;
- secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
- for (i = 1; i < hdr->e_shnum; i++) {
- Elf64_Shdr *shdr = &sechdrs[i];
- if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) {
- alt_sec = shdr;
- goto found;
- }
- }
-
- /* If we get here, it's probably a bug. */
- pr_warning("patch_vdso64: .altinstructions not found\n");
- return; /* nothing to patch */
-
-found:
- alt_data = (void *)hdr + alt_sec->sh_offset;
- apply_alternatives(alt_data, alt_data + alt_sec->sh_size);
-}
-
static int __init init_vdso(void)
{
- int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
- int i;
-
- patch_vdso64(vdso_start, vdso_end - vdso_start);
-
- vdso_size = npages << PAGE_SHIFT;
- for (i = 0; i < npages; i++)
- vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
+ init_vdso_image(&vdso_image_64);
#ifdef CONFIG_X86_X32_ABI
- patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start);
- npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE;
- vdsox32_size = npages << PAGE_SHIFT;
- for (i = 0; i < npages; i++)
- vdsox32_pages[i] = virt_to_page(vdsox32_start + i*PAGE_SIZE);
+ init_vdso_image(&vdso_image_x32);
#endif
return 0;
}
subsys_initcall(init_vdso);
+#endif
struct linux_binprm;
/* Put the vdso above the (randomized) stack with another randomized offset.
This way there is no hole in the middle of address space.
To save memory make sure it is still in the same PTE as the stack top.
- This doesn't give that many random bits */
+ This doesn't give that many random bits.
+
+ Only used for the 64-bit and x32 vdsos. */
static unsigned long vdso_addr(unsigned long start, unsigned len)
{
unsigned long addr, end;
@@ -149,61 +85,149 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
return addr;
}
-/* Setup a VMA at program startup for the vsyscall page.
- Not called for compat tasks */
-static int setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp,
- struct page **pages,
- unsigned size)
+static int map_vdso(const struct vdso_image *image, bool calculate_addr)
{
struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
unsigned long addr;
- int ret;
-
- if (!vdso_enabled)
- return 0;
+ int ret = 0;
+ static struct page *no_pages[] = {NULL};
+ static struct vm_special_mapping vvar_mapping = {
+ .name = "[vvar]",
+ .pages = no_pages,
+ };
+
+ if (calculate_addr) {
+ addr = vdso_addr(current->mm->start_stack,
+ image->sym_end_mapping);
+ } else {
+ addr = 0;
+ }
down_write(&mm->mmap_sem);
- addr = vdso_addr(mm->start_stack, size);
- addr = get_unmapped_area(NULL, addr, size, 0, 0);
+
+ addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
- current->mm->context.vdso = (void *)addr;
+ current->mm->context.vdso = (void __user *)addr;
- ret = install_special_mapping(mm, addr, size,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- pages);
- if (ret) {
- current->mm->context.vdso = NULL;
+ /*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ */
+ vma = _install_special_mapping(mm,
+ addr,
+ image->size,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ &image->text_mapping);
+
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto up_fail;
}
+ vma = _install_special_mapping(mm,
+ addr + image->size,
+ image->sym_end_mapping - image->size,
+ VM_READ,
+ &vvar_mapping);
+
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto up_fail;
+ }
+
+ if (image->sym_vvar_page)
+ ret = remap_pfn_range(vma,
+ addr + image->sym_vvar_page,
+ __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
+ PAGE_SIZE,
+ PAGE_READONLY);
+
+ if (ret)
+ goto up_fail;
+
+#ifdef CONFIG_HPET_TIMER
+ if (hpet_address && image->sym_hpet_page) {
+ ret = io_remap_pfn_range(vma,
+ addr + image->sym_hpet_page,
+ hpet_address >> PAGE_SHIFT,
+ PAGE_SIZE,
+ pgprot_noncached(PAGE_READONLY));
+
+ if (ret)
+ goto up_fail;
+ }
+#endif
+
up_fail:
+ if (ret)
+ current->mm->context.vdso = NULL;
+
up_write(&mm->mmap_sem);
return ret;
}
+#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
+static int load_vdso32(void)
+{
+ int ret;
+
+ if (vdso32_enabled != 1) /* Other values all mean "disabled" */
+ return 0;
+
+ ret = map_vdso(selected_vdso32, false);
+ if (ret)
+ return ret;
+
+ if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
+ current_thread_info()->sysenter_return =
+ current->mm->context.vdso +
+ selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_X86_64
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- return setup_additional_pages(bprm, uses_interp, vdso_pages,
- vdso_size);
+ if (!vdso64_enabled)
+ return 0;
+
+ return map_vdso(&vdso_image_64, true);
}
+#ifdef CONFIG_COMPAT
+int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp)
+{
#ifdef CONFIG_X86_X32_ABI
-int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+ if (test_thread_flag(TIF_X32)) {
+ if (!vdso64_enabled)
+ return 0;
+
+ return map_vdso(&vdso_image_x32, true);
+ }
+#endif
+
+ return load_vdso32();
+}
+#endif
+#else
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- return setup_additional_pages(bprm, uses_interp, vdsox32_pages,
- vdsox32_size);
+ return load_vdso32();
}
#endif
+#ifdef CONFIG_X86_64
static __init int vdso_setup(char *s)
{
- vdso_enabled = simple_strtoul(s, NULL, 0);
+ vdso64_enabled = simple_strtoul(s, NULL, 0);
return 0;
}
__setup("vdso=", vdso_setup);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6f6e15d28466..e8a1201c3293 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1494,7 +1494,7 @@ static int xen_pgd_alloc(struct mm_struct *mm)
page->private = (unsigned long)user_pgd;
if (user_pgd != NULL) {
- user_pgd[pgd_index(VSYSCALL_START)] =
+ user_pgd[pgd_index(VSYSCALL_ADDR)] =
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
ret = 0;
}
@@ -2062,8 +2062,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
case FIX_KMAP_BEGIN ... FIX_KMAP_END:
# endif
#else
- case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
- case VVAR_PAGE:
+ case VSYSCALL_PAGE:
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
@@ -2104,8 +2103,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#ifdef CONFIG_X86_64
/* Replicate changes to map the vsyscall page into the user
pagetable vsyscall mapping. */
- if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
- idx == VVAR_PAGE) {
+ if (idx == VSYSCALL_PAGE) {
unsigned long vaddr = __fix_to_virt(idx);
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 210426a26cc0..821a11ada590 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -525,10 +525,17 @@ char * __init xen_memory_setup(void)
static void __init fiddle_vdso(void)
{
#ifdef CONFIG_X86_32
+ /*
+ * This could be called before selected_vdso32 is initialized, so
+ * just fiddle with both possible images. vdso_image_32_syscall
+ * can't be selected, since it only exists on 64-bit systems.
+ */
u32 *mask;
- mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
+ mask = vdso_image_32_int80.data +
+ vdso_image_32_int80.sym_VDSO32_NOTE_MASK;
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
- mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
+ mask = vdso_image_32_sysenter.data +
+ vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK;
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
#endif
}
OpenPOWER on IntegriCloud