34 files changed, 568 insertions, 3768 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9bcd0b56ca17..01663ee5f1b7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,7 +22,7 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
 
 CFLAGS_irq.o := -I$(src)/../include/asm/trace
 
-obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
+obj-y			:= process_$(BITS).o signal.o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o nmi.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
@@ -31,9 +31,6 @@ obj-y			+= probe_roms.o
 obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= mcount_64.o
-obj-y			+= syscall_$(BITS).o vsyscall_gtod.o
-obj-$(CONFIG_IA32_EMULATION)	+= syscall_32.o
-obj-$(CONFIG_X86_VSYSCALL_EMULATION)	+= vsyscall_64.o vsyscall_emu_64.o
 obj-$(CONFIG_X86_ESPFIX64)	+= espfix_64.o
 obj-$(CONFIG_SYSFS)	+= ksysfs.o
 obj-y			+= bootflag.o e820.o
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index dcaab87da629..d8f42f902a0f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -66,7 +66,7 @@ int main(void)
 	DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
 	DEFINE(NR_syscalls, sizeof(syscalls_64));
 
-	DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1);
+	DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
 	DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e4cf63301ff4..eb4f01269b5d 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -288,7 +288,7 @@ static int nearby_node(int apicid)
  *     Assumption: Number of cores in each internal node is the same.
  * (2) AMD processors supporting compute units
  */
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 static void amd_get_topology(struct cpuinfo_x86 *c)
 {
 	u32 nodes, cores_per_cu = 1;
@@ -341,7 +341,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
  */
 static void amd_detect_cmp(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 	unsigned bits;
 	int cpu = smp_processor_id();
 
@@ -420,7 +420,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
 
 static void early_init_amd_mc(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 	unsigned bits, ecx;
 
 	/* Multi core CPU? */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6bec0b55863e..cc7f753e571d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -508,7 +508,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
 
 void detect_ht(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 	u32 eax, ebx, ecx, edx;
 	int index_msb, core_bits;
 	static bool printed;
@@ -844,7 +844,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
 	if (c->cpuid_level >= 0x00000001) {
 		c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
 #ifdef CONFIG_X86_32
-# ifdef CONFIG_X86_HT
+# ifdef CONFIG_SMP
 		c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 # else
 		c->apicid = c->initial_apicid;
@@ -1026,7 +1026,7 @@ void enable_sep_cpu(void)
 	      (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
 	      0);
 
-	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
+	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
 out:
 	put_cpu();
@@ -1204,10 +1204,10 @@ void syscall_init(void)
 	 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
 	 */
 	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
-	wrmsrl(MSR_LSTAR, system_call);
+	wrmsrl(MSR_LSTAR, entry_SYSCALL_64);
 
 #ifdef CONFIG_IA32_EMULATION
-	wrmsrl(MSR_CSTAR, ia32_cstar_target);
+	wrmsrl(MSR_CSTAR, entry_SYSCALL_compat);
 	/*
 	 * This only works on Intel CPUs.
 	 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
@@ -1216,7 +1216,7 @@ void syscall_init(void)
 	 */
 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
 	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
-	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
 #else
 	wrmsrl(MSR_CSTAR, ignore_sysret);
 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index edcb0e28c336..be4febc58b94 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -654,7 +654,7 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 	unsigned int cpu = c->cpu_index;
 #endif
 
@@ -773,19 +773,19 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
 	if (new_l2) {
 		l2 = new_l2;
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 		per_cpu(cpu_llc_id, cpu) = l2_id;
 #endif
 	}
 
 	if (new_l3) {
 		l3 = new_l3;
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 		per_cpu(cpu_llc_id, cpu) = l3_id;
 #endif
 	}
 
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 	/*
 	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
 	 * turns means that the only possibility is SMT (as indicated in
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e535533d5ab8..5b974c97e31e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			  struct pt_regs *regs)
 {
 	int i, ret = 0;
+	char *tmp;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
 		m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			if (quirk_no_way_out)
 				quirk_no_way_out(i, m, regs);
 		}
-		if (mce_severity(m, mca_cfg.tolerant, msg, true) >=
-		    MCE_PANIC_SEVERITY)
+
+		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+			*msg = tmp;
 			ret = 1;
+		}
 	}
 	return ret;
 }
@@ -1047,6 +1050,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	char *msg = "Unknown";
 	u64 recover_paddr = ~0ull;
 	int flags = MF_ACTION_REQUIRED;
+	int lmce = 0;
 
 	prev_state = ist_enter(regs);
 
@@ -1074,11 +1078,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		kill_it = 1;
 
 	/*
-	 * Go through all the banks in exclusion of the other CPUs.
-	 * This way we don't report duplicated events on shared banks
-	 * because the first one to see it will clear it.
+	 * Check if this MCE is signaled to only this logical processor
 	 */
-	order = mce_start(&no_way_out);
+	if (m.mcgstatus & MCG_STATUS_LMCES)
+		lmce = 1;
+	else {
+		/*
+		 * Go through all the banks in exclusion of the other CPUs.
+		 * This way we don't report duplicated events on shared banks
+		 * because the first one to see it will clear it.
+		 * If this is a Local MCE, then no need to perform rendezvous.
+		 */
+		order = mce_start(&no_way_out);
+	}
+
 	for (i = 0; i < cfg->banks; i++) {
 		__clear_bit(i, toclear);
 		if (!test_bit(i, valid_banks))
@@ -1155,8 +1168,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	 * Do most of the synchronization with other CPUs.
 	 * When there's any problem use only local no_way_out state.
 	 */
-	if (mce_end(order) < 0)
-		no_way_out = worst >= MCE_PANIC_SEVERITY;
+	if (!lmce) {
+		if (mce_end(order) < 0)
+			no_way_out = worst >= MCE_PANIC_SEVERITY;
+	} else {
+		/*
+		 * Local MCE skipped calling mce_reign()
+		 * If we found a fatal error, we need to panic here.
+		 */
+		 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+			mce_panic("Machine check from unknown source",
+				NULL, NULL);
+	}
 
 	/*
 	 * At insane "tolerant" levels we take no action. Otherwise
@@ -1637,10 +1660,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 		mce_intel_feature_init(c);
 		mce_adjust_timer = cmci_intel_adjust_timer;
 		break;
-	case X86_VENDOR_AMD:
+
+	case X86_VENDOR_AMD: {
+		u32 ebx = cpuid_ebx(0x80000007);
+
 		mce_amd_feature_init(c);
-		mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
+		mce_flags.overflow_recov = !!(ebx & BIT(0));
+		mce_flags.succor	 = !!(ebx & BIT(1));
 		break;
+		}
+
 	default:
 		break;
 	}
@@ -1976,6 +2005,7 @@ void mce_disable_bank(int bank)
 /*
  * mce=off Disables machine check
  * mce=no_cmci Disables CMCI
+ * mce=no_lmce Disables LMCE
  * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
  * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
  * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@@ -1999,6 +2029,8 @@ static int __init mcheck_enable(char *str)
 		cfg->disabled = true;
 	else if (!strcmp(str, "no_cmci"))
 		cfg->cmci_disabled = true;
+	else if (!strcmp(str, "no_lmce"))
+		cfg->lmce_disabled = true;
 	else if (!strcmp(str, "dont_log_ce"))
 		cfg->dont_log_ce = true;
 	else if (!strcmp(str, "ignore_ce"))
@@ -2008,11 +2040,8 @@ static int __init mcheck_enable(char *str)
 	else if (!strcmp(str, "bios_cmci_threshold"))
 		cfg->bios_cmci_threshold = true;
 	else if (isdigit(str[0])) {
-		get_option(&str, &(cfg->tolerant));
-		if (*str == ',') {
-			++str;
+		if (get_option(&str, &cfg->tolerant) == 2)
 			get_option(&str, &(cfg->monarch_timeout));
-		}
 	} else {
 		pr_info("mce argument %s ignored. Please use /sys\n", str);
 		return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 55ad9b37cae8..e99b15077e94 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,19 +1,13 @@
 /*
- *  (c) 2005-2012 Advanced Micro Devices, Inc.
+ *  (c) 2005-2015 Advanced Micro Devices, Inc.
  *  Your use of this code is subject to the terms and conditions of the
  *  GNU general public license version 2. See "COPYING" or
  *  http://www.gnu.org/licenses/gpl.html
  *
  *  Written by Jacob Shin - AMD, Inc.
- *
  *  Maintained by: Borislav Petkov <bp@alien8.de>
  *
- *  April 2006
- *     - added support for AMD Family 0x10 processors
- *  May 2012
- *     - major scrubbing
- *
- *  All MC4_MISCi registers are shared between multi-cores
+ *  All MC4_MISCi registers are shared between cores on a node.
  */
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
@@ -32,6 +26,7 @@
 #include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
+#include <asm/trace/irq_vectors.h>
 
 #define NR_BLOCKS         9
 #define THRESHOLD_MAX     0xFFF
@@ -47,6 +42,13 @@
 #define MASK_BLKPTR_LO    0xFF000000
 #define MCG_XBLK_ADDR     0xC0000400
 
+/* Deferred error settings */
+#define MSR_CU_DEF_ERR		0xC0000410
+#define MASK_DEF_LVTOFF		0x000000F0
+#define MASK_DEF_INT_TYPE	0x00000006
+#define DEF_LVT_OFF		0x2
+#define DEF_INT_TYPE_APIC	0x2
+
 static const char * const th_names[] = {
 	"load_store",
 	"insn_fetch",
@@ -60,6 +62,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
 static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
 
 static void amd_threshold_interrupt(void);
+static void amd_deferred_error_interrupt(void);
+
+static void default_deferred_error_interrupt(void)
+{
+	pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
+}
+void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
 
 /*
  * CPU Initialization
@@ -196,7 +205,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
 	threshold_restart_bank(&tr);
 };
 
-static int setup_APIC_mce(int reserved, int new)
+static int setup_APIC_mce_threshold(int reserved, int new)
 {
 	if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
 					      APIC_EILVT_MSG_FIX, 0))
@@ -205,6 +214,39 @@ static int setup_APIC_mce(int reserved, int new)
 	return reserved;
 }
 
+static int setup_APIC_deferred_error(int reserved, int new)
+{
+	if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
+					      APIC_EILVT_MSG_FIX, 0))
+		return new;
+
+	return reserved;
+}
+
+static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
+{
+	u32 low = 0, high = 0;
+	int def_offset = -1, def_new;
+
+	if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
+		return;
+
+	def_new = (low & MASK_DEF_LVTOFF) >> 4;
+	if (!(low & MASK_DEF_LVTOFF)) {
+		pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
+		def_new = DEF_LVT_OFF;
+		low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
+	}
+
+	def_offset = setup_APIC_deferred_error(def_offset, def_new);
+	if ((def_offset == def_new) &&
+	    (deferred_error_int_vector != amd_deferred_error_interrupt))
+		deferred_error_int_vector = amd_deferred_error_interrupt;
+
+	low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
+	wrmsr(MSR_CU_DEF_ERR, low, high);
+}
+
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
@@ -252,7 +294,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 
 			b.interrupt_enable = 1;
 			new	= (high & MASK_LVTOFF_HI) >> 20;
-			offset  = setup_APIC_mce(offset, new);
+			offset  = setup_APIC_mce_threshold(offset, new);
 
 			if ((offset == new) &&
 			    (mce_threshold_vector != amd_threshold_interrupt))
@@ -262,6 +304,73 @@ init:
 			mce_threshold_block_init(&b, offset);
 		}
 	}
+
+	if (mce_flags.succor)
+		deferred_error_interrupt_enable(c);
+}
+
+static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+{
+	struct mce m;
+	u64 status;
+
+	rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+	if (!(status & MCI_STATUS_VAL))
+		return;
+
+	mce_setup(&m);
+
+	m.status = status;
+	m.bank = bank;
+
+	if (threshold_err)
+		m.misc = misc;
+
+	if (m.status & MCI_STATUS_ADDRV)
+		rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
+
+	mce_log(&m);
+	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+}
+
+static inline void __smp_deferred_error_interrupt(void)
+{
+	inc_irq_stat(irq_deferred_error_count);
+	deferred_error_int_vector();
+}
+
+asmlinkage __visible void smp_deferred_error_interrupt(void)
+{
+	entering_irq();
+	__smp_deferred_error_interrupt();
+	exiting_ack_irq();
+}
+
+asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+{
+	entering_irq();
+	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
+	__smp_deferred_error_interrupt();
+	trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
+	exiting_ack_irq();
+}
+
+/* APIC interrupt handler for deferred errors */
+static void amd_deferred_error_interrupt(void)
+{
+	u64 status;
+	unsigned int bank;
+
+	for (bank = 0; bank < mca_cfg.banks; ++bank) {
+		rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+
+		if (!(status & MCI_STATUS_VAL) ||
+		    !(status & MCI_STATUS_DEFERRED))
+			continue;
+
+		__log_error(bank, false, 0);
+		break;
+	}
 }
 
 /*
@@ -273,12 +382,12 @@ init:
  * the interrupt goes off when error_count reaches threshold_limit.
  * the handler will simply log mcelog w/ software defined bank number.
  */
+
 static void amd_threshold_interrupt(void)
 {
 	u32 low = 0, high = 0, address = 0;
 	int cpu = smp_processor_id();
 	unsigned int bank, block;
-	struct mce m;
 
 	/* assume first bank caused it */
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -321,15 +430,7 @@ static void amd_threshold_interrupt(void)
 	return;
 
 log:
-	mce_setup(&m);
-	rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
-	if (!(m.status & MCI_STATUS_VAL))
-		return;
-	m.misc = ((u64)high << 32) | low;
-	m.bank = bank;
-	mce_log(&m);
-
-	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+	__log_error(bank, true, ((u64)high << 32) | low);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b4a41cf030ed..844f56c5616d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -91,6 +91,36 @@ static int cmci_supported(int *banks)
 	return !!(cap & MCG_CMCI_P);
 }
 
+static bool lmce_supported(void)
+{
+	u64 tmp;
+
+	if (mca_cfg.lmce_disabled)
+		return false;
+
+	rdmsrl(MSR_IA32_MCG_CAP, tmp);
+
+	/*
+	 * LMCE depends on recovery support in the processor. Hence both
+	 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
+	 */
+	if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
+		   (MCG_SER_P | MCG_LMCE_P))
+		return false;
+
+	/*
+	 * BIOS should indicate support for LMCE by setting bit 20 in
+	 * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
+	 * generate a #GP fault.
+	 */
+	rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
+	if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
+		   (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
+		return true;
+
+	return false;
+}
+
 bool mce_intel_cmci_poll(void)
 {
 	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@@ -405,8 +435,22 @@ static void intel_init_cmci(void)
 	cmci_recheck();
 }
 
+void intel_init_lmce(void)
+{
+	u64 val;
+
+	if (!lmce_supported())
+		return;
+
+	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+
+	if (!(val & MCG_EXT_CTL_LMCE_EN))
+		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
+}
+
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
 	intel_init_thermal(c);
 	intel_init_cmci();
+	intel_init_lmce();
 }
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 5f90b85ff22e..70d7c93f4550 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -98,7 +98,8 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
 			continue;
 		base = range_state[i].base_pfn;
 		if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
-		    (mtrr_state.enabled & 1)) {
+		    (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
+		    (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
 			/* Var MTRR contains UC entry below 1M? Skip it: */
 			printk(BIOS_BUG_MSG, i);
 			if (base + size <= (1<<(20-PAGE_SHIFT)))
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7d74f7b3c6ba..3b533cf37c74 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -102,59 +102,76 @@ static int check_type_overlap(u8 *prev, u8 *curr)
 	return 0;
 }
 
-/*
- * Error/Semi-error returns:
- * 0xFF - when MTRR is not enabled
- * *repeat == 1 implies [start:end] spanned across MTRR range and type returned
- *		corresponds only to [start:*partial_end].
- *		Caller has to lookup again for [*partial_end:end].
+/**
+ * mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries
+ *
+ * Return the MTRR fixed memory type of 'start'.
+ *
+ * MTRR fixed entries are divided into the following ways:
+ *  0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges
+ *  0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges
+ *  0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges
+ *
+ * Return Values:
+ * MTRR_TYPE_(type)  - Matched memory type
+ * MTRR_TYPE_INVALID - Unmatched
+ */
+static u8 mtrr_type_lookup_fixed(u64 start, u64 end)
+{
+	int idx;
+
+	if (start >= 0x100000)
+		return MTRR_TYPE_INVALID;
+
+	/* 0x0 - 0x7FFFF */
+	if (start < 0x80000) {
+		idx = 0;
+		idx += (start >> 16);
+		return mtrr_state.fixed_ranges[idx];
+	/* 0x80000 - 0xBFFFF */
+	} else if (start < 0xC0000) {
+		idx = 1 * 8;
+		idx += ((start - 0x80000) >> 14);
+		return mtrr_state.fixed_ranges[idx];
+	}
+
+	/* 0xC0000 - 0xFFFFF */
+	idx = 3 * 8;
+	idx += ((start - 0xC0000) >> 12);
+	return mtrr_state.fixed_ranges[idx];
+}
+
+/**
+ * mtrr_type_lookup_variable - look up memory type in MTRR variable entries
+ *
+ * Return Value:
+ * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched)
+ *
+ * Output Arguments:
+ * repeat - Set to 1 when [start:end] spanned across MTRR range and type
+ *	    returned corresponds only to [start:*partial_end].  Caller has
+ *	    to lookup again for [*partial_end:end].
+ *
+ * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
+ *	     region is fully covered by a single MTRR entry or the default
+ *	     type.
  */
-static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
+static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
+				    int *repeat, u8 *uniform)
 {
 	int i;
 	u64 base, mask;
 	u8 prev_match, curr_match;
 
 	*repeat = 0;
-	if (!mtrr_state_set)
-		return 0xFF;
-
-	if (!mtrr_state.enabled)
-		return 0xFF;
+	*uniform = 1;
 
-	/* Make end inclusive end, instead of exclusive */
+	/* Make end inclusive instead of exclusive */
 	end--;
 
-	/* Look in fixed ranges. Just return the type as per start */
-	if (mtrr_state.have_fixed && (start < 0x100000)) {
-		int idx;
-
-		if (start < 0x80000) {
-			idx = 0;
-			idx += (start >> 16);
-			return mtrr_state.fixed_ranges[idx];
-		} else if (start < 0xC0000) {
-			idx = 1 * 8;
-			idx += ((start - 0x80000) >> 14);
-			return mtrr_state.fixed_ranges[idx];
-		} else if (start < 0x1000000) {
-			idx = 3 * 8;
-			idx += ((start - 0xC0000) >> 12);
-			return mtrr_state.fixed_ranges[idx];
-		}
-	}
-
-	/*
-	 * Look in variable ranges
-	 * Look of multiple ranges matching this address and pick type
-	 * as per MTRR precedence
-	 */
-	if (!(mtrr_state.enabled & 2))
-		return mtrr_state.def_type;
-
-	prev_match = 0xFF;
+	prev_match = MTRR_TYPE_INVALID;
 	for (i = 0; i < num_var_ranges; ++i) {
-		unsigned short start_state, end_state;
+		unsigned short start_state, end_state, inclusive;
 
 		if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
 			continue;
@@ -166,20 +183,29 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
 
 		start_state = ((start & mask) == (base & mask));
 		end_state = ((end & mask) == (base & mask));
+		inclusive = ((start < base) && (end > base));
 
-		if (start_state != end_state) {
+		if ((start_state != end_state) || inclusive) {
 			/*
 			 * We have start:end spanning across an MTRR.
-			 * We split the region into
-			 * either
-			 * (start:mtrr_end) (mtrr_end:end)
-			 * or
-			 * (start:mtrr_start) (mtrr_start:end)
+			 * We split the region into either
+			 *
+			 * - start_state:1
+			 * (start:mtrr_end)(mtrr_end:end)
+			 * - end_state:1
+			 * (start:mtrr_start)(mtrr_start:end)
+			 * - inclusive:1
+			 * (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end)
+			 *
 			 * depending on kind of overlap.
-			 * Return the type for first region and a pointer to
-			 * the start of second region so that caller will
-			 * lookup again on the second region.
-			 * Note: This way we handle multiple overlaps as well.
+			 *
+			 * Return the type of the first region and a pointer
+			 * to the start of next region so that caller will be
+			 * advised to lookup again after having adjusted start
+			 * and end.
+			 *
+			 * Note: This way we handle overlaps with multiple
+			 * entries and the default type properly.
 			 */
 			if (start_state)
 				*partial_end = base + get_mtrr_size(mask);
@@ -193,59 +219,94 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
 
 			end = *partial_end - 1; /* end is inclusive */
 			*repeat = 1;
+			*uniform = 0;
 		}
 
 		if ((start & mask) != (base & mask))
 			continue;
 
 		curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
-		if (prev_match == 0xFF) {
+		if (prev_match == MTRR_TYPE_INVALID) {
 			prev_match = curr_match;
 			continue;
 		}
 
+		*uniform = 0;
 		if (check_type_overlap(&prev_match, &curr_match))
 			return curr_match;
 	}
 
-	if (mtrr_tom2) {
-		if (start >= (1ULL<<32) && (end < mtrr_tom2))
-			return MTRR_TYPE_WRBACK;
-	}
-
-	if (prev_match != 0xFF)
+	if (prev_match != MTRR_TYPE_INVALID)
 		return prev_match;
 
 	return mtrr_state.def_type;
 }
 
-/*
- * Returns the effective MTRR type for the region
- * Error return:
- * 0xFF - when MTRR is not enabled
+/**
+ * mtrr_type_lookup - look up memory type in MTRR
+ *
+ * Return Values:
+ * MTRR_TYPE_(type)  - The effective MTRR type for the region
+ * MTRR_TYPE_INVALID - MTRR is disabled
+ *
+ * Output Argument:
+ * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
+ *	     region is fully covered by a single MTRR entry or the default
+ *	     type.
  */
-u8 mtrr_type_lookup(u64 start, u64 end)
+u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
 {
-	u8 type, prev_type;
+	u8 type, prev_type, is_uniform = 1, dummy;
 	int repeat;
 	u64 partial_end;
 
-	type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
+	if (!mtrr_state_set)
+		return MTRR_TYPE_INVALID;
+
+	if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED))
+		return MTRR_TYPE_INVALID;
+
+	/*
+	 * Look up the fixed ranges first, which take priority over
+	 * the variable ranges.
+	 */
+	if ((start < 0x100000) &&
+	    (mtrr_state.have_fixed) &&
+	    (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
+		is_uniform = 0;
+		type = mtrr_type_lookup_fixed(start, end);
+		goto out;
+	}
+
+	/*
+	 * Look up the variable ranges.  Look of multiple ranges matching
+	 * this address and pick type as per MTRR precedence.
+	 */
+	type = mtrr_type_lookup_variable(start, end, &partial_end,
+					 &repeat, &is_uniform);
 
 	/*
 	 * Common path is with repeat = 0.
 	 * However, we can have cases where [start:end] spans across some
-	 * MTRR range. Do repeated lookups for that case here.
+	 * MTRR ranges and/or the default type.  Do repeated lookups for
+	 * that case here.
 	 */
 	while (repeat) {
 		prev_type = type;
 		start = partial_end;
-		type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
+		is_uniform = 0;
+		type = mtrr_type_lookup_variable(start, end, &partial_end,
+						 &repeat, &dummy);
 
 		if (check_type_overlap(&prev_type, &type))
-			return type;
+			goto out;
 	}
 
+	if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2))
+		type = MTRR_TYPE_WRBACK;
+
+out:
+	*uniform = is_uniform;
 	return type;
 }
 
@@ -347,7 +408,9 @@ static void __init print_mtrr_state(void)
 		 mtrr_attrib_to_str(mtrr_state.def_type));
 	if (mtrr_state.have_fixed) {
 		pr_debug("MTRR fixed ranges %sabled:\n",
-			 mtrr_state.enabled & 1 ? "en" : "dis");
+			((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
+			 (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ?
+			 "en" : "dis");
 		print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
 		for (i = 0; i < 2; ++i)
 			print_fixed(0x80000 + i * 0x20000, 0x04000,
@@ -360,7 +423,7 @@ static void __init print_mtrr_state(void)
 		print_fixed_last();
 	}
 	pr_debug("MTRR variable ranges %sabled:\n",
-		 mtrr_state.enabled & 2 ? "en" : "dis");
+		 mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis");
 	high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
 
 	for (i = 0; i < num_var_ranges; ++i) {
@@ -382,7 +445,7 @@ static void __init print_mtrr_state(void)
 }
 
 /* Grab all of the MTRR state for this CPU into *state */
-void __init get_mtrr_state(void)
+bool __init get_mtrr_state(void)
 {
 	struct mtrr_var_range *vrs;
 	unsigned long flags;
@@ -426,6 +489,8 @@ void __init get_mtrr_state(void)
 
 	post_set();
 	local_irq_restore(flags);
+
+	return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
 }
 
 /* Some BIOS's are messed up and don't set all MTRRs the same! */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ea5f363a1948..e7ed0d8ebacb 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,6 +59,12 @@
 #define MTRR_TO_PHYS_WC_OFFSET 1000
 
 u32 num_var_ranges;
+static bool __mtrr_enabled;
+
+static bool mtrr_enabled(void)
+{
+	return __mtrr_enabled;
+}
 
 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
@@ -286,7 +292,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 	int i, replace, error;
 	mtrr_type ltype;
 
-	if (!mtrr_if)
+	if (!mtrr_enabled())
 		return -ENXIO;
 
 	error = mtrr_if->validate_add_page(base, size, type);
@@ -435,6 +441,8 @@ static int mtrr_check(unsigned long base, unsigned long size)
 int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
 	     bool increment)
 {
+	if (!mtrr_enabled())
+		return -ENODEV;
 	if (mtrr_check(base, size))
 		return -EINVAL;
 	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
@@ -463,8 +471,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 	unsigned long lbase, lsize;
 	int error = -EINVAL;
 
-	if (!mtrr_if)
-		return -ENXIO;
+	if (!mtrr_enabled())
+		return -ENODEV;
 
 	max = num_var_ranges;
 	/* No CPU hotplug when we change MTRR entries */
@@ -523,6 +531,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
  */
 int mtrr_del(int reg, unsigned long base, unsigned long size)
 {
+	if (!mtrr_enabled())
+		return -ENODEV;
 	if (mtrr_check(base, size))
 		return -EINVAL;
 	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
@@ -538,6 +548,9 @@ EXPORT_SYMBOL(mtrr_del);
  * attempts to add a WC MTRR covering size bytes starting at base and
  * logs an error if this fails.
  *
+ * The called should provide a power of two size on an equivalent
+ * power of two boundary.
+ *
  * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
  * but drivers should not try to interpret that return value.
  */
@@ -545,7 +558,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size)
 {
 	int ret;
 
-	if (pat_enabled)
+	if (pat_enabled() || !mtrr_enabled())
 		return 0;  /* Success!  (We don't need to do anything.) */
 
 	ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
@@ -577,7 +590,7 @@ void arch_phys_wc_del(int handle)
 EXPORT_SYMBOL(arch_phys_wc_del);
 
 /*
- * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
+ * arch_phys_wc_index - translates arch_phys_wc_add's return value
  * @handle: Return value from arch_phys_wc_add
  *
  * This will turn the return value from arch_phys_wc_add into an mtrr
@@ -587,14 +600,14 @@ EXPORT_SYMBOL(arch_phys_wc_del);
  * in printk line.  Alas there is an illegitimate use in some ancient
  * drm ioctls.
  */
-int phys_wc_to_mtrr_index(int handle)
+int arch_phys_wc_index(int handle)
 {
 	if (handle < MTRR_TO_PHYS_WC_OFFSET)
 		return -1;
 	else
 		return handle - MTRR_TO_PHYS_WC_OFFSET;
 }
-EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
+EXPORT_SYMBOL_GPL(arch_phys_wc_index);
 
 /*
  * HACK ALERT!
@@ -734,10 +747,12 @@ void __init mtrr_bp_init(void)
 	}
 
 	if (mtrr_if) {
+		__mtrr_enabled = true;
 		set_num_var_ranges();
 		init_table();
 		if (use_intel()) {
-			get_mtrr_state();
+			/* BIOS may override */
+			__mtrr_enabled = get_mtrr_state();
 
 			if (mtrr_cleanup(phys_addr)) {
 				changed_by_mtrr_cleanup = 1;
@@ -745,10 +760,16 @@ void __init mtrr_bp_init(void)
 			}
 		}
 	}
+
+	if (!mtrr_enabled())
+		pr_info("MTRR: Disabled\n");
 }
 
 void mtrr_ap_init(void)
 {
+	if (!mtrr_enabled())
+		return;
+
 	if (!use_intel() || mtrr_aps_delayed_init)
 		return;
 	/*
@@ -774,6 +795,9 @@ void mtrr_save_state(void)
 {
 	int first_cpu;
 
+	if (!mtrr_enabled())
+		return;
+
 	get_online_cpus();
 	first_cpu = cpumask_first(cpu_online_mask);
 	smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
@@ -782,6 +806,8 @@ void mtrr_save_state(void)
 
 void set_mtrr_aps_delayed_init(void)
 {
+	if (!mtrr_enabled())
+		return;
 	if (!use_intel())
 		return;
 
@@ -793,7 +819,7 @@ void set_mtrr_aps_delayed_init(void)
  */
 void mtrr_aps_init(void)
 {
-	if (!use_intel())
+	if (!use_intel() || !mtrr_enabled())
 		return;
 
 	/*
@@ -810,7 +836,7 @@ void mtrr_aps_init(void)
 
 void mtrr_bp_restore(void)
 {
-	if (!use_intel())
+	if (!use_intel() || !mtrr_enabled())
 		return;
 
 	mtrr_if->set_all();
@@ -818,7 +844,7 @@ void mtrr_bp_restore(void)
 
 static int __init mtrr_init_finialize(void)
 {
-	if (!mtrr_if)
+	if (!mtrr_enabled())
 		return 0;
 
 	if (use_intel()) {
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index df5e41f31a27..951884dcc433 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -51,7 +51,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
 void fill_mtrr_var_range(unsigned int index,
 		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
-void get_mtrr_state(void);
+bool get_mtrr_state(void);
 
 extern void set_mtrr_ops(const struct mtrr_ops *ops);
 
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c76d3e37c6e1..e068d6683dba 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -22,6 +22,7 @@
 #include <linux/elfcore.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include <asm/processor.h>
 #include <asm/hardirq.h>
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index fe9f0b79a18b..5cb9a4d6f623 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -627,8 +627,12 @@ static struct chipset early_qrk[] __initdata = {
 	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
 	  QFLAG_APPLY_ONCE, intel_graphics_stolen },
 	/*
-	 * HPET on current version of Baytrail platform has accuracy
-	 * problems, disable it for now:
+	 * HPET on the current version of the Baytrail platform has accuracy
+	 * problems: it will halt in deep idle state - so we disable it.
+	 *
+	 * More details can be found in section 18.10.1.3 of the datasheet:
+	 *
+	 *    http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/atom-z8000-datasheet-vol-1.pdf
 	 */
 	{ PCI_VENDOR_ID_INTEL, 0x0f00,
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
deleted file mode 100644
index 1c309763e321..000000000000
--- a/arch/x86/kernel/entry_32.S
+++ /dev/null
@@ -1,1401 +0,0 @@
-/*
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-
-/*
- * entry.S contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all interrupts
- * and faults that can result in a task-switch.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after a timer-interrupt and after each system call.
- *
- * I changed all the .align's to 4 (16 byte alignment), as that's faster
- * on a 486.
- *
- * Stack layout in 'syscall_exit':
- * 	ptrace needs to have all regs on the stack.
- *	if the order here is changed, it needs to be
- *	updated in fork.c:copy_process, signal.c:do_signal,
- *	ptrace.c and ptrace.h
- *
- *	 0(%esp) - %ebx
- *	 4(%esp) - %ecx
- *	 8(%esp) - %edx
- *       C(%esp) - %esi
- *	10(%esp) - %edi
- *	14(%esp) - %ebp
- *	18(%esp) - %eax
- *	1C(%esp) - %ds
- *	20(%esp) - %es
- *	24(%esp) - %fs
- *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
- *	2C(%esp) - orig_eax
- *	30(%esp) - %eip
- *	34(%esp) - %cs
- *	38(%esp) - %eflags
- *	3C(%esp) - %oldesp
- *	40(%esp) - %oldss
- *
- * "current" is in register %ebx during any slow entries.
- */
-
-#include <linux/linkage.h>
-#include <linux/err.h>
-#include <asm/thread_info.h>
-#include <asm/irqflags.h>
-#include <asm/errno.h>
-#include <asm/segment.h>
-#include <asm/smp.h>
-#include <asm/page_types.h>
-#include <asm/percpu.h>
-#include <asm/dwarf2.h>
-#include <asm/processor-flags.h>
-#include <asm/ftrace.h>
-#include <asm/irq_vectors.h>
-#include <asm/cpufeature.h>
-#include <asm/alternative-asm.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_LE	   0x40000000
-
-#ifndef CONFIG_AUDITSYSCALL
-#define sysenter_audit	syscall_trace_entry
-#define sysexit_audit	syscall_exit_work
-#endif
-
-	.section .entry.text, "ax"
-
-/*
- * We use macros for low-level operations which need to be overridden
- * for paravirtualization.  The following will never clobber any registers:
- *   INTERRUPT_RETURN (aka. "iret")
- *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
- *
- * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
- * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
- * Allowing a register to be clobbered can shrink the paravirt replacement
- * enough to patch inline, increasing performance.
- */
-
-#ifdef CONFIG_PREEMPT
-#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
-#else
-#define preempt_stop(clobbers)
-#define resume_kernel		restore_all
-#endif
-
-.macro TRACE_IRQS_IRET
-#ifdef CONFIG_TRACE_IRQFLAGS
-	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
-	jz 1f
-	TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-/*
- * User gs save/restore
- *
- * %gs is used for userland TLS and kernel only uses it for stack
- * canary which is required to be at %gs:20 by gcc.  Read the comment
- * at the top of stackprotector.h for more info.
- *
- * Local labels 98 and 99 are used.
- */
-#ifdef CONFIG_X86_32_LAZY_GS
-
- /* unfortunately push/pop can't be no-op */
-.macro PUSH_GS
-	pushl_cfi $0
-.endm
-.macro POP_GS pop=0
-	addl $(4 + \pop), %esp
-	CFI_ADJUST_CFA_OFFSET -(4 + \pop)
-.endm
-.macro POP_GS_EX
-.endm
-
- /* all the rest are no-op */
-.macro PTGS_TO_GS
-.endm
-.macro PTGS_TO_GS_EX
-.endm
-.macro GS_TO_REG reg
-.endm
-.macro REG_TO_PTGS reg
-.endm
-.macro SET_KERNEL_GS reg
-.endm
-
-#else	/* CONFIG_X86_32_LAZY_GS */
-
-.macro PUSH_GS
-	pushl_cfi %gs
-	/*CFI_REL_OFFSET gs, 0*/
-.endm
-
-.macro POP_GS pop=0
-98:	popl_cfi %gs
-	/*CFI_RESTORE gs*/
-  .if \pop <> 0
-	add $\pop, %esp
-	CFI_ADJUST_CFA_OFFSET -\pop
-  .endif
-.endm
-.macro POP_GS_EX
-.pushsection .fixup, "ax"
-99:	movl $0, (%esp)
-	jmp 98b
-.popsection
-	_ASM_EXTABLE(98b,99b)
-.endm
-
-.macro PTGS_TO_GS
-98:	mov PT_GS(%esp), %gs
-.endm
-.macro PTGS_TO_GS_EX
-.pushsection .fixup, "ax"
-99:	movl $0, PT_GS(%esp)
-	jmp 98b
-.popsection
-	_ASM_EXTABLE(98b,99b)
-.endm
-
-.macro GS_TO_REG reg
-	movl %gs, \reg
-	/*CFI_REGISTER gs, \reg*/
-.endm
-.macro REG_TO_PTGS reg
-	movl \reg, PT_GS(%esp)
-	/*CFI_REL_OFFSET gs, PT_GS*/
-.endm
-.macro SET_KERNEL_GS reg
-	movl $(__KERNEL_STACK_CANARY), \reg
-	movl \reg, %gs
-.endm
-
-#endif	/* CONFIG_X86_32_LAZY_GS */
-
-.macro SAVE_ALL
-	cld
-	PUSH_GS
-	pushl_cfi %fs
-	/*CFI_REL_OFFSET fs, 0;*/
-	pushl_cfi %es
-	/*CFI_REL_OFFSET es, 0;*/
-	pushl_cfi %ds
-	/*CFI_REL_OFFSET ds, 0;*/
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
-	movl $(__USER_DS), %edx
-	movl %edx, %ds
-	movl %edx, %es
-	movl $(__KERNEL_PERCPU), %edx
-	movl %edx, %fs
-	SET_KERNEL_GS %edx
-.endm
-
-.macro RESTORE_INT_REGS
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebp
-	CFI_RESTORE ebp
-	popl_cfi %eax
-	CFI_RESTORE eax
-.endm
-
-.macro RESTORE_REGS pop=0
-	RESTORE_INT_REGS
-1:	popl_cfi %ds
-	/*CFI_RESTORE ds;*/
-2:	popl_cfi %es
-	/*CFI_RESTORE es;*/
-3:	popl_cfi %fs
-	/*CFI_RESTORE fs;*/
-	POP_GS \pop
-.pushsection .fixup, "ax"
-4:	movl $0, (%esp)
-	jmp 1b
-5:	movl $0, (%esp)
-	jmp 2b
-6:	movl $0, (%esp)
-	jmp 3b
-.popsection
-	_ASM_EXTABLE(1b,4b)
-	_ASM_EXTABLE(2b,5b)
-	_ASM_EXTABLE(3b,6b)
-	POP_GS_EX
-.endm
-
-.macro RING0_INT_FRAME
-	CFI_STARTPROC simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA esp, 3*4
-	/*CFI_OFFSET cs, -2*4;*/
-	CFI_OFFSET eip, -3*4
-.endm
-
-.macro RING0_EC_FRAME
-	CFI_STARTPROC simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA esp, 4*4
-	/*CFI_OFFSET cs, -2*4;*/
-	CFI_OFFSET eip, -3*4
-.endm
-
-.macro RING0_PTREGS_FRAME
-	CFI_STARTPROC simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
-	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
-	CFI_OFFSET eip, PT_EIP-PT_OLDESP
-	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
-	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
-	CFI_OFFSET eax, PT_EAX-PT_OLDESP
-	CFI_OFFSET ebp, PT_EBP-PT_OLDESP
-	CFI_OFFSET edi, PT_EDI-PT_OLDESP
-	CFI_OFFSET esi, PT_ESI-PT_OLDESP
-	CFI_OFFSET edx, PT_EDX-PT_OLDESP
-	CFI_OFFSET ecx, PT_ECX-PT_OLDESP
-	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
-.endm
-
-ENTRY(ret_from_fork)
-	CFI_STARTPROC
-	pushl_cfi %eax
-	call schedule_tail
-	GET_THREAD_INFO(%ebp)
-	popl_cfi %eax
-	pushl_cfi $0x0202		# Reset kernel eflags
-	popfl_cfi
-	jmp syscall_exit
-	CFI_ENDPROC
-END(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-	CFI_STARTPROC
-	pushl_cfi %eax
-	call schedule_tail
-	GET_THREAD_INFO(%ebp)
-	popl_cfi %eax
-	pushl_cfi $0x0202		# Reset kernel eflags
-	popfl_cfi
-	movl PT_EBP(%esp),%eax
-	call *PT_EBX(%esp)
-	movl $0,PT_EAX(%esp)
-	jmp syscall_exit
-	CFI_ENDPROC
-ENDPROC(ret_from_kernel_thread)
-
-/*
- * Return to user mode is not as complex as all this looks,
- * but we want the default path for a system call return to
- * go as quickly as possible which is why some of this is
- * less clear than it otherwise should be.
- */
-
-	# userspace resumption stub bypassing syscall exit tracing
-	ALIGN
-	RING0_PTREGS_FRAME
-ret_from_exception:
-	preempt_stop(CLBR_ANY)
-ret_from_intr:
-	GET_THREAD_INFO(%ebp)
-#ifdef CONFIG_VM86
-	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
-	movb PT_CS(%esp), %al
-	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
-#else
-	/*
-	 * We can be coming here from child spawned by kernel_thread().
-	 */
-	movl PT_CS(%esp), %eax
-	andl $SEGMENT_RPL_MASK, %eax
-#endif
-	cmpl $USER_RPL, %eax
-	jb resume_kernel		# not returning to v8086 or userspace
-
-ENTRY(resume_userspace)
-	LOCKDEP_SYS_EXIT
- 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
-					# setting need_resched or sigpending
-					# between sampling and the iret
-	TRACE_IRQS_OFF
-	movl TI_flags(%ebp), %ecx
-	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
-					# int/exception return?
-	jne work_pending
-	jmp restore_all
-END(ret_from_exception)
-
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
-	DISABLE_INTERRUPTS(CLBR_ANY)
-need_resched:
-	cmpl $0,PER_CPU_VAR(__preempt_count)
-	jnz restore_all
-	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
-	jz restore_all
-	call preempt_schedule_irq
-	jmp need_resched
-END(resume_kernel)
-#endif
-	CFI_ENDPROC
-
-/* SYSENTER_RETURN points to after the "sysenter" instruction in
-   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
-
-	# sysenter call handler stub
-ENTRY(ia32_sysenter_target)
-	CFI_STARTPROC simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA esp, 0
-	CFI_REGISTER esp, ebp
-	movl TSS_sysenter_sp0(%esp),%esp
-sysenter_past_esp:
-	/*
-	 * Interrupts are disabled here, but we can't trace it until
-	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
-	 * we immediately enable interrupts at that point anyway.
-	 */
-	pushl_cfi $__USER_DS
-	/*CFI_REL_OFFSET ss, 0*/
-	pushl_cfi %ebp
-	CFI_REL_OFFSET esp, 0
-	pushfl_cfi
-	orl $X86_EFLAGS_IF, (%esp)
-	pushl_cfi $__USER_CS
-	/*CFI_REL_OFFSET cs, 0*/
-	/*
-	 * Push current_thread_info()->sysenter_return to the stack.
-	 * A tiny bit of offset fixup is necessary: TI_sysenter_return
-	 * is relative to thread_info, which is at the bottom of the
-	 * kernel stack page.  4*4 means the 4 words pushed above;
-	 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
-	 * and THREAD_SIZE takes us to the bottom.
-	 */
-	pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
-	CFI_REL_OFFSET eip, 0
-
-	pushl_cfi %eax
-	SAVE_ALL
-	ENABLE_INTERRUPTS(CLBR_NONE)
-
-/*
- * Load the potential sixth argument from user stack.
- * Careful about security.
- */
-	cmpl $__PAGE_OFFSET-3,%ebp
-	jae syscall_fault
-	ASM_STAC
-1:	movl (%ebp),%ebp
-	ASM_CLAC
-	movl %ebp,PT_EBP(%esp)
-	_ASM_EXTABLE(1b,syscall_fault)
-
-	GET_THREAD_INFO(%ebp)
-
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
-	jnz sysenter_audit
-sysenter_do_call:
-	cmpl $(NR_syscalls), %eax
-	jae sysenter_badsys
-	call *sys_call_table(,%eax,4)
-sysenter_after_call:
-	movl %eax,PT_EAX(%esp)
-	LOCKDEP_SYS_EXIT
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
-	movl TI_flags(%ebp), %ecx
-	testl $_TIF_ALLWORK_MASK, %ecx
-	jnz sysexit_audit
-sysenter_exit:
-/* if something modifies registers it must also disable sysexit */
-	movl PT_EIP(%esp), %edx
-	movl PT_OLDESP(%esp), %ecx
-	xorl %ebp,%ebp
-	TRACE_IRQS_ON
-1:	mov  PT_FS(%esp), %fs
-	PTGS_TO_GS
-	ENABLE_INTERRUPTS_SYSEXIT
-
-#ifdef CONFIG_AUDITSYSCALL
-sysenter_audit:
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
-	jnz syscall_trace_entry
-	/* movl PT_EAX(%esp), %eax	already set, syscall number: 1st arg to audit */
-	movl PT_EBX(%esp), %edx		/* ebx/a0: 2nd arg to audit */
-	/* movl PT_ECX(%esp), %ecx	already set, a1: 3nd arg to audit */
-	pushl_cfi PT_ESI(%esp)		/* a3: 5th arg */
-	pushl_cfi PT_EDX+4(%esp)	/* a2: 4th arg */
-	call __audit_syscall_entry
-	popl_cfi %ecx /* get that remapped edx off the stack */
-	popl_cfi %ecx /* get that remapped esi off the stack */
-	movl PT_EAX(%esp),%eax		/* reload syscall number */
-	jmp sysenter_do_call
-
-sysexit_audit:
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-	jnz syscall_exit_work
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_ANY)
-	movl %eax,%edx		/* second arg, syscall return value */
-	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
-	setbe %al		/* 1 if so, 0 if not */
-	movzbl %al,%eax		/* zero-extend that */
-	call __audit_syscall_exit
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
-	movl TI_flags(%ebp), %ecx
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-	jnz syscall_exit_work
-	movl PT_EAX(%esp),%eax	/* reload syscall return value */
-	jmp sysenter_exit
-#endif
-
-	CFI_ENDPROC
-.pushsection .fixup,"ax"
-2:	movl $0,PT_FS(%esp)
-	jmp 1b
-.popsection
-	_ASM_EXTABLE(1b,2b)
-	PTGS_TO_GS_EX
-ENDPROC(ia32_sysenter_target)
-
-	# system call handler stub
-ENTRY(system_call)
-	RING0_INT_FRAME			# can't unwind into user space anyway
-	ASM_CLAC
-	pushl_cfi %eax			# save orig_eax
-	SAVE_ALL
-	GET_THREAD_INFO(%ebp)
-					# system call tracing in operation / emulation
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
-	jnz syscall_trace_entry
-	cmpl $(NR_syscalls), %eax
-	jae syscall_badsys
-syscall_call:
-	call *sys_call_table(,%eax,4)
-syscall_after_call:
-	movl %eax,PT_EAX(%esp)		# store the return value
-syscall_exit:
-	LOCKDEP_SYS_EXIT
-	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
-					# setting need_resched or sigpending
-					# between sampling and the iret
-	TRACE_IRQS_OFF
-	movl TI_flags(%ebp), %ecx
-	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
-	jnz syscall_exit_work
-
-restore_all:
-	TRACE_IRQS_IRET
-restore_all_notrace:
-#ifdef CONFIG_X86_ESPFIX32
-	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
-	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
-	# are returning to the kernel.
-	# See comments in process.c:copy_thread() for details.
-	movb PT_OLDSS(%esp), %ah
-	movb PT_CS(%esp), %al
-	andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
-	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
-	CFI_REMEMBER_STATE
-	je ldt_ss			# returning to user-space with LDT SS
-#endif
-restore_nocheck:
-	RESTORE_REGS 4			# skip orig_eax/error_code
-irq_return:
-	INTERRUPT_RETURN
-.section .fixup,"ax"
-ENTRY(iret_exc)
-	pushl $0			# no error code
-	pushl $do_iret_error
-	jmp error_code
-.previous
-	_ASM_EXTABLE(irq_return,iret_exc)
-
-#ifdef CONFIG_X86_ESPFIX32
-	CFI_RESTORE_STATE
-ldt_ss:
-#ifdef CONFIG_PARAVIRT
-	/*
-	 * The kernel can't run on a non-flat stack if paravirt mode
-	 * is active.  Rather than try to fixup the high bits of
-	 * ESP, bypass this code entirely.  This may break DOSemu
-	 * and/or Wine support in a paravirt VM, although the option
-	 * is still available to implement the setting of the high
-	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
-	 */
-	cmpl $0, pv_info+PARAVIRT_enabled
-	jne restore_nocheck
-#endif
-
-/*
- * Setup and switch to ESPFIX stack
- *
- * We're returning to userspace with a 16 bit stack. The CPU will not
- * restore the high word of ESP for us on executing iret... This is an
- * "official" bug of all the x86-compatible CPUs, which we can work
- * around to make dosemu and wine happy. We do this by preloading the
- * high word of ESP with the high word of the userspace ESP while
- * compensating for the offset by changing to the ESPFIX segment with
- * a base address that matches for the difference.
- */
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
-	mov %esp, %edx			/* load kernel esp */
-	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
-	mov %dx, %ax			/* eax: new kernel esp */
-	sub %eax, %edx			/* offset (low word is 0) */
-	shr $16, %edx
-	mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
-	mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
-	pushl_cfi $__ESPFIX_SS
-	pushl_cfi %eax			/* new kernel esp */
-	/* Disable interrupts, but do not irqtrace this section: we
-	 * will soon execute iret and the tracer was already set to
-	 * the irqstate after the iret */
-	DISABLE_INTERRUPTS(CLBR_EAX)
-	lss (%esp), %esp		/* switch to espfix segment */
-	CFI_ADJUST_CFA_OFFSET -8
-	jmp restore_nocheck
-#endif
-	CFI_ENDPROC
-ENDPROC(system_call)
-
-	# perform work that needs to be done immediately before resumption
-	ALIGN
-	RING0_PTREGS_FRAME		# can't unwind into user space anyway
-work_pending:
-	testb $_TIF_NEED_RESCHED, %cl
-	jz work_notifysig
-work_resched:
-	call schedule
-	LOCKDEP_SYS_EXIT
-	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
-					# setting need_resched or sigpending
-					# between sampling and the iret
-	TRACE_IRQS_OFF
-	movl TI_flags(%ebp), %ecx
-	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
-					# than syscall tracing?
-	jz restore_all
-	testb $_TIF_NEED_RESCHED, %cl
-	jnz work_resched
-
-work_notifysig:				# deal with pending signals and
-					# notify-resume requests
-#ifdef CONFIG_VM86
-	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
-	movl %esp, %eax
-	jnz work_notifysig_v86		# returning to kernel-space or
-					# vm86-space
-1:
-#else
-	movl %esp, %eax
-#endif
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	movb PT_CS(%esp), %bl
-	andb $SEGMENT_RPL_MASK, %bl
-	cmpb $USER_RPL, %bl
-	jb resume_kernel
-	xorl %edx, %edx
-	call do_notify_resume
-	jmp resume_userspace
-
-#ifdef CONFIG_VM86
-	ALIGN
-work_notifysig_v86:
-	pushl_cfi %ecx			# save ti_flags for do_notify_resume
-	call save_v86_state		# %eax contains pt_regs pointer
-	popl_cfi %ecx
-	movl %eax, %esp
-	jmp 1b
-#endif
-END(work_pending)
-
-	# perform syscall exit tracing
-	ALIGN
-syscall_trace_entry:
-	movl $-ENOSYS,PT_EAX(%esp)
-	movl %esp, %eax
-	call syscall_trace_enter
-	/* What it returned is what we'll actually use.  */
-	cmpl $(NR_syscalls), %eax
-	jnae syscall_call
-	jmp syscall_exit
-END(syscall_trace_entry)
-
-	# perform syscall exit tracing
-	ALIGN
-syscall_exit_work:
-	testl $_TIF_WORK_SYSCALL_EXIT, %ecx
-	jz work_pending
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
-					# schedule() instead
-	movl %esp, %eax
-	call syscall_trace_leave
-	jmp resume_userspace
-END(syscall_exit_work)
-	CFI_ENDPROC
-
-	RING0_INT_FRAME			# can't unwind into user space anyway
-syscall_fault:
-	ASM_CLAC
-	GET_THREAD_INFO(%ebp)
-	movl $-EFAULT,PT_EAX(%esp)
-	jmp resume_userspace
-END(syscall_fault)
-
-syscall_badsys:
-	movl $-ENOSYS,%eax
-	jmp syscall_after_call
-END(syscall_badsys)
-
-sysenter_badsys:
-	movl $-ENOSYS,%eax
-	jmp sysenter_after_call
-END(sysenter_badsys)
-	CFI_ENDPROC
-
-.macro FIXUP_ESPFIX_STACK
-/*
- * Switch back for ESPFIX stack to the normal zerobased stack
- *
- * We can't call C functions using the ESPFIX stack. This code reads
- * the high word of the segment base from the GDT and swiches to the
- * normal stack and adjusts ESP with the matching offset.
- */
-#ifdef CONFIG_X86_ESPFIX32
-	/* fixup the stack */
-	mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
-	mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
-	shl $16, %eax
-	addl %esp, %eax			/* the adjusted stack pointer */
-	pushl_cfi $__KERNEL_DS
-	pushl_cfi %eax
-	lss (%esp), %esp		/* switch to the normal stack segment */
-	CFI_ADJUST_CFA_OFFSET -8
-#endif
-.endm
-.macro UNWIND_ESPFIX_STACK
-#ifdef CONFIG_X86_ESPFIX32
-	movl %ss, %eax
-	/* see if on espfix stack */
-	cmpw $__ESPFIX_SS, %ax
-	jne 27f
-	movl $__KERNEL_DS, %eax
-	movl %eax, %ds
-	movl %eax, %es
-	/* switch to normal stack */
-	FIXUP_ESPFIX_STACK
-27:
-#endif
-.endm
-
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
-	.align 8
-ENTRY(irq_entries_start)
-	RING0_INT_FRAME
-    vector=FIRST_EXTERNAL_VECTOR
-    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
-	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-    vector=vector+1
-	jmp	common_interrupt
-	CFI_ADJUST_CFA_OFFSET -4
-	.align	8
-    .endr
-END(irq_entries_start)
-
-/*
- * the CPU automatically disables interrupts when executing an IRQ vector,
- * so IRQ-flags tracing has to follow that:
- */
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
-	ASM_CLAC
-	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	movl %esp,%eax
-	call do_IRQ
-	jmp ret_from_intr
-ENDPROC(common_interrupt)
-	CFI_ENDPROC
-
-#define BUILD_INTERRUPT3(name, nr, fn)	\
-ENTRY(name)				\
-	RING0_INT_FRAME;		\
-	ASM_CLAC;			\
-	pushl_cfi $~(nr);		\
-	SAVE_ALL;			\
-	TRACE_IRQS_OFF			\
-	movl %esp,%eax;			\
-	call fn;			\
-	jmp ret_from_intr;		\
-	CFI_ENDPROC;			\
-ENDPROC(name)
-
-
-#ifdef CONFIG_TRACING
-#define TRACE_BUILD_INTERRUPT(name, nr)		\
-	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
-#else
-#define TRACE_BUILD_INTERRUPT(name, nr)
-#endif
-
-#define BUILD_INTERRUPT(name, nr) \
-	BUILD_INTERRUPT3(name, nr, smp_##name); \
-	TRACE_BUILD_INTERRUPT(name, nr)
-
-/* The include is where all of the SMP etc. interrupts come from */
-#include <asm/entry_arch.h>
-
-ENTRY(coprocessor_error)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0
-	pushl_cfi $do_coprocessor_error
-	jmp error_code
-	CFI_ENDPROC
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0
-#ifdef CONFIG_X86_INVD_BUG
-	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-	ALTERNATIVE "pushl_cfi $do_general_protection",	\
-		    "pushl $do_simd_coprocessor_error", \
-		    X86_FEATURE_XMM
-#else
-	pushl_cfi $do_simd_coprocessor_error
-#endif
-	jmp error_code
-	CFI_ENDPROC
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $-1			# mark this as an int
-	pushl_cfi $do_device_not_available
-	jmp error_code
-	CFI_ENDPROC
-END(device_not_available)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
-	iret
-	_ASM_EXTABLE(native_iret, iret_exc)
-END(native_iret)
-
-ENTRY(native_irq_enable_sysexit)
-	sti
-	sysexit
-END(native_irq_enable_sysexit)
-#endif
-
-ENTRY(overflow)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0
-	pushl_cfi $do_overflow
-	jmp error_code
-	CFI_ENDPROC
-END(overflow)
-
-ENTRY(bounds)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0
-	pushl_cfi $do_bounds
-	jmp error_code
-	CFI_ENDPROC
-END(bounds)
-
-ENTRY(invalid_op)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0
-	pushl_cfi $do_invalid_op
-	jmp error_code
-	CFI_ENDPROC
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0
-	pushl_cfi $do_coprocessor_segment_overrun
-	jmp error_code
-	CFI_ENDPROC
-END(coprocessor_segment_overrun)
-
-ENTRY(invalid_TSS)
-	RING0_EC_FRAME
-	ASM_CLAC
-	pushl_cfi $do_invalid_TSS
-	jmp error_code
-	CFI_ENDPROC
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
-	RING0_EC_FRAME
-	ASM_CLAC
-	pushl_cfi $do_segment_not_present
-	jmp error_code
-	CFI_ENDPROC
-END(segment_not_present)
-
-ENTRY(stack_segment)
-	RING0_EC_FRAME
-	ASM_CLAC
-	pushl_cfi $do_stack_segment
-	jmp error_code
-	CFI_ENDPROC
-END(stack_segment)
-
-ENTRY(alignment_check)
-	RING0_EC_FRAME
-	ASM_CLAC
-	pushl_cfi $do_alignment_check
-	jmp error_code
-	CFI_ENDPROC
-END(alignment_check)
-
-ENTRY(divide_error)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0			# no error code
-	pushl_cfi $do_divide_error
-	jmp error_code
-	CFI_ENDPROC
-END(divide_error)
-
-#ifdef CONFIG_X86_MCE
-ENTRY(machine_check)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0
-	pushl_cfi machine_check_vector
-	jmp error_code
-	CFI_ENDPROC
-END(machine_check)
-#endif
-
-ENTRY(spurious_interrupt_bug)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $0
-	pushl_cfi $do_spurious_interrupt_bug
-	jmp error_code
-	CFI_ENDPROC
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_XEN
-/* Xen doesn't set %esp to be precisely what the normal sysenter
-   entrypoint expects, so fix it up before using the normal path. */
-ENTRY(xen_sysenter_target)
-	RING0_INT_FRAME
-	addl $5*4, %esp		/* remove xen-provided frame */
-	CFI_ADJUST_CFA_OFFSET -5*4
-	jmp sysenter_past_esp
-	CFI_ENDPROC
-
-ENTRY(xen_hypervisor_callback)
-	CFI_STARTPROC
-	pushl_cfi $-1 /* orig_ax = -1 => not a system call */
-	SAVE_ALL
-	TRACE_IRQS_OFF
-
-	/* Check to see if we got the event in the critical
-	   region in xen_iret_direct, after we've reenabled
-	   events and checked for pending events.  This simulates
-	   iret instruction's behaviour where it delivers a
-	   pending interrupt when enabling interrupts. */
-	movl PT_EIP(%esp),%eax
-	cmpl $xen_iret_start_crit,%eax
-	jb   1f
-	cmpl $xen_iret_end_crit,%eax
-	jae  1f
-
-	jmp  xen_iret_crit_fixup
-
-ENTRY(xen_do_upcall)
-1:	mov %esp, %eax
-	call xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPT
-	call xen_maybe_preempt_hcall
-#endif
-	jmp  ret_from_intr
-	CFI_ENDPROC
-ENDPROC(xen_hypervisor_callback)
-
-# Hypervisor uses this for application faults while it executes.
-# We get here for two reasons:
-#  1. Fault while reloading DS, ES, FS or GS
-#  2. Fault while executing IRET
-# Category 1 we fix up by reattempting the load, and zeroing the segment
-# register if the load fails.
-# Category 2 we fix up by jumping to do_iret_error. We cannot use the
-# normal Linux return path in this case because if we use the IRET hypercall
-# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
-# We distinguish between categories by maintaining a status value in EAX.
-ENTRY(xen_failsafe_callback)
-	CFI_STARTPROC
-	pushl_cfi %eax
-	movl $1,%eax
-1:	mov 4(%esp),%ds
-2:	mov 8(%esp),%es
-3:	mov 12(%esp),%fs
-4:	mov 16(%esp),%gs
-	/* EAX == 0 => Category 1 (Bad segment)
-	   EAX != 0 => Category 2 (Bad IRET) */
-	testl %eax,%eax
-	popl_cfi %eax
-	lea 16(%esp),%esp
-	CFI_ADJUST_CFA_OFFSET -16
-	jz 5f
-	jmp iret_exc
-5:	pushl_cfi $-1 /* orig_ax = -1 => not a system call */
-	SAVE_ALL
-	jmp ret_from_exception
-	CFI_ENDPROC
-
-.section .fixup,"ax"
-6:	xorl %eax,%eax
-	movl %eax,4(%esp)
-	jmp 1b
-7:	xorl %eax,%eax
-	movl %eax,8(%esp)
-	jmp 2b
-8:	xorl %eax,%eax
-	movl %eax,12(%esp)
-	jmp 3b
-9:	xorl %eax,%eax
-	movl %eax,16(%esp)
-	jmp 4b
-.previous
-	_ASM_EXTABLE(1b,6b)
-	_ASM_EXTABLE(2b,7b)
-	_ASM_EXTABLE(3b,8b)
-	_ASM_EXTABLE(4b,9b)
-ENDPROC(xen_failsafe_callback)
-
-BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
-		xen_evtchn_do_upcall)
-
-#endif	/* CONFIG_XEN */
-
-#if IS_ENABLED(CONFIG_HYPERV)
-
-BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
-	hyperv_vector_handler)
-
-#endif /* CONFIG_HYPERV */
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-ENTRY(mcount)
-	ret
-END(mcount)
-
-ENTRY(ftrace_caller)
-	pushl %eax
-	pushl %ecx
-	pushl %edx
-	pushl $0	/* Pass NULL as regs pointer */
-	movl 4*4(%esp), %eax
-	movl 0x4(%ebp), %edx
-	movl function_trace_op, %ecx
-	subl $MCOUNT_INSN_SIZE, %eax
-
-.globl ftrace_call
-ftrace_call:
-	call ftrace_stub
-
-	addl $4,%esp	/* skip NULL pointer */
-	popl %edx
-	popl %ecx
-	popl %eax
-ftrace_ret:
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
-	jmp ftrace_stub
-#endif
-
-.globl ftrace_stub
-ftrace_stub:
-	ret
-END(ftrace_caller)
-
-ENTRY(ftrace_regs_caller)
-	pushf	/* push flags before compare (in cs location) */
-
-	/*
-	 * i386 does not save SS and ESP when coming from kernel.
-	 * Instead, to get sp, &regs->sp is used (see ptrace.h).
-	 * Unfortunately, that means eflags must be at the same location
-	 * as the current return ip is. We move the return ip into the
-	 * ip location, and move flags into the return ip location.
-	 */
-	pushl 4(%esp)	/* save return ip into ip slot */
-
-	pushl $0	/* Load 0 into orig_ax */
-	pushl %gs
-	pushl %fs
-	pushl %es
-	pushl %ds
-	pushl %eax
-	pushl %ebp
-	pushl %edi
-	pushl %esi
-	pushl %edx
-	pushl %ecx
-	pushl %ebx
-
-	movl 13*4(%esp), %eax	/* Get the saved flags */
-	movl %eax, 14*4(%esp)	/* Move saved flags into regs->flags location */
-				/* clobbering return ip */
-	movl $__KERNEL_CS,13*4(%esp)
-
-	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
-	subl $MCOUNT_INSN_SIZE, %eax	/* Adjust ip */
-	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
-	movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
-	pushl %esp		/* Save pt_regs as 4th parameter */
-
-GLOBAL(ftrace_regs_call)
-	call ftrace_stub
-
-	addl $4, %esp		/* Skip pt_regs */
-	movl 14*4(%esp), %eax	/* Move flags back into cs */
-	movl %eax, 13*4(%esp)	/* Needed to keep addl from modifying flags */
-	movl 12*4(%esp), %eax	/* Get return ip from regs->ip */
-	movl %eax, 14*4(%esp)	/* Put return ip back for ret */
-
-	popl %ebx
-	popl %ecx
-	popl %edx
-	popl %esi
-	popl %edi
-	popl %ebp
-	popl %eax
-	popl %ds
-	popl %es
-	popl %fs
-	popl %gs
-	addl $8, %esp		/* Skip orig_ax and ip */
-	popf			/* Pop flags at end (no addl to corrupt flags) */
-	jmp ftrace_ret
-
-	popf
-	jmp  ftrace_stub
-#else /* ! CONFIG_DYNAMIC_FTRACE */
-
-ENTRY(mcount)
-	cmpl $__PAGE_OFFSET, %esp
-	jb ftrace_stub		/* Paging not enabled yet? */
-
-	cmpl $ftrace_stub, ftrace_trace_function
-	jnz trace
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	cmpl $ftrace_stub, ftrace_graph_return
-	jnz ftrace_graph_caller
-
-	cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
-	jnz ftrace_graph_caller
-#endif
-.globl ftrace_stub
-ftrace_stub:
-	ret
-
-	/* taken from glibc */
-trace:
-	pushl %eax
-	pushl %ecx
-	pushl %edx
-	movl 0xc(%esp), %eax
-	movl 0x4(%ebp), %edx
-	subl $MCOUNT_INSN_SIZE, %eax
-
-	call *ftrace_trace_function
-
-	popl %edx
-	popl %ecx
-	popl %eax
-	jmp ftrace_stub
-END(mcount)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_TRACER */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-	pushl %eax
-	pushl %ecx
-	pushl %edx
-	movl 0xc(%esp), %eax
-	lea 0x4(%ebp), %edx
-	movl (%ebp), %ecx
-	subl $MCOUNT_INSN_SIZE, %eax
-	call prepare_ftrace_return
-	popl %edx
-	popl %ecx
-	popl %eax
-	ret
-END(ftrace_graph_caller)
-
-.globl return_to_handler
-return_to_handler:
-	pushl %eax
-	pushl %edx
-	movl %ebp, %eax
-	call ftrace_return_to_handler
-	movl %eax, %ecx
-	popl %edx
-	popl %eax
-	jmp *%ecx
-#endif
-
-#ifdef CONFIG_TRACING
-ENTRY(trace_page_fault)
-	RING0_EC_FRAME
-	ASM_CLAC
-	pushl_cfi $trace_do_page_fault
-	jmp error_code
-	CFI_ENDPROC
-END(trace_page_fault)
-#endif
-
-ENTRY(page_fault)
-	RING0_EC_FRAME
-	ASM_CLAC
-	pushl_cfi $do_page_fault
-	ALIGN
-error_code:
-	/* the function address is in %gs's slot on the stack */
-	pushl_cfi %fs
-	/*CFI_REL_OFFSET fs, 0*/
-	pushl_cfi %es
-	/*CFI_REL_OFFSET es, 0*/
-	pushl_cfi %ds
-	/*CFI_REL_OFFSET ds, 0*/
-	pushl_cfi_reg eax
-	pushl_cfi_reg ebp
-	pushl_cfi_reg edi
-	pushl_cfi_reg esi
-	pushl_cfi_reg edx
-	pushl_cfi_reg ecx
-	pushl_cfi_reg ebx
-	cld
-	movl $(__KERNEL_PERCPU), %ecx
-	movl %ecx, %fs
-	UNWIND_ESPFIX_STACK
-	GS_TO_REG %ecx
-	movl PT_GS(%esp), %edi		# get the function address
-	movl PT_ORIG_EAX(%esp), %edx	# get the error code
-	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
-	REG_TO_PTGS %ecx
-	SET_KERNEL_GS %ecx
-	movl $(__USER_DS), %ecx
-	movl %ecx, %ds
-	movl %ecx, %es
-	TRACE_IRQS_OFF
-	movl %esp,%eax			# pt_regs pointer
-	call *%edi
-	jmp ret_from_exception
-	CFI_ENDPROC
-END(page_fault)
-
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-.macro FIX_STACK offset ok label
-	cmpw $__KERNEL_CS, 4(%esp)
-	jne \ok
-\label:
-	movl TSS_sysenter_sp0 + \offset(%esp), %esp
-	CFI_DEF_CFA esp, 0
-	CFI_UNDEFINED eip
-	pushfl_cfi
-	pushl_cfi $__KERNEL_CS
-	pushl_cfi $sysenter_past_esp
-	CFI_REL_OFFSET eip, 0
-.endm
-
-ENTRY(debug)
-	RING0_INT_FRAME
-	ASM_CLAC
-	cmpl $ia32_sysenter_target,(%esp)
-	jne debug_stack_correct
-	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
-debug_stack_correct:
-	pushl_cfi $-1			# mark this as an int
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	xorl %edx,%edx			# error code 0
-	movl %esp,%eax			# pt_regs pointer
-	call do_debug
-	jmp ret_from_exception
-	CFI_ENDPROC
-END(debug)
-
-/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got  an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
- */
-ENTRY(nmi)
-	RING0_INT_FRAME
-	ASM_CLAC
-#ifdef CONFIG_X86_ESPFIX32
-	pushl_cfi %eax
-	movl %ss, %eax
-	cmpw $__ESPFIX_SS, %ax
-	popl_cfi %eax
-	je nmi_espfix_stack
-#endif
-	cmpl $ia32_sysenter_target,(%esp)
-	je nmi_stack_fixup
-	pushl_cfi %eax
-	movl %esp,%eax
-	/* Do not access memory above the end of our stack page,
-	 * it might not exist.
-	 */
-	andl $(THREAD_SIZE-1),%eax
-	cmpl $(THREAD_SIZE-20),%eax
-	popl_cfi %eax
-	jae nmi_stack_correct
-	cmpl $ia32_sysenter_target,12(%esp)
-	je nmi_debug_stack_check
-nmi_stack_correct:
-	/* We have a RING0_INT_FRAME here */
-	pushl_cfi %eax
-	SAVE_ALL
-	xorl %edx,%edx		# zero error code
-	movl %esp,%eax		# pt_regs pointer
-	call do_nmi
-	jmp restore_all_notrace
-	CFI_ENDPROC
-
-nmi_stack_fixup:
-	RING0_INT_FRAME
-	FIX_STACK 12, nmi_stack_correct, 1
-	jmp nmi_stack_correct
-
-nmi_debug_stack_check:
-	/* We have a RING0_INT_FRAME here */
-	cmpw $__KERNEL_CS,16(%esp)
-	jne nmi_stack_correct
-	cmpl $debug,(%esp)
-	jb nmi_stack_correct
-	cmpl $debug_esp_fix_insn,(%esp)
-	ja nmi_stack_correct
-	FIX_STACK 24, nmi_stack_correct, 1
-	jmp nmi_stack_correct
-
-#ifdef CONFIG_X86_ESPFIX32
-nmi_espfix_stack:
-	/* We have a RING0_INT_FRAME here.
-	 *
-	 * create the pointer to lss back
-	 */
-	pushl_cfi %ss
-	pushl_cfi %esp
-	addl $4, (%esp)
-	/* copy the iret frame of 12 bytes */
-	.rept 3
-	pushl_cfi 16(%esp)
-	.endr
-	pushl_cfi %eax
-	SAVE_ALL
-	FIXUP_ESPFIX_STACK		# %eax == %esp
-	xorl %edx,%edx			# zero error code
-	call do_nmi
-	RESTORE_REGS
-	lss 12+4(%esp), %esp		# back to espfix stack
-	CFI_ADJUST_CFA_OFFSET -24
-	jmp irq_return
-#endif
-	CFI_ENDPROC
-END(nmi)
-
-ENTRY(int3)
-	RING0_INT_FRAME
-	ASM_CLAC
-	pushl_cfi $-1			# mark this as an int
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	xorl %edx,%edx		# zero error code
-	movl %esp,%eax		# pt_regs pointer
-	call do_int3
-	jmp ret_from_exception
-	CFI_ENDPROC
-END(int3)
-
-ENTRY(general_protection)
-	RING0_EC_FRAME
-	pushl_cfi $do_general_protection
-	jmp error_code
-	CFI_ENDPROC
-END(general_protection)
-
-#ifdef CONFIG_KVM_GUEST
-ENTRY(async_page_fault)
-	RING0_EC_FRAME
-	ASM_CLAC
-	pushl_cfi $do_async_page_fault
-	jmp error_code
-	CFI_ENDPROC
-END(async_page_fault)
-#endif
-
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
deleted file mode 100644
index 22aadc917868..000000000000
--- a/arch/x86/kernel/entry_64.S
+++ /dev/null
@@ -1,1642 +0,0 @@
-/*
- *  linux/arch/x86_64/entry.S
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
- *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
- */
-
-/*
- * entry.S contains the system-call and fault low-level handling routines.
- *
- * Some of this is documented in Documentation/x86/entry_64.txt
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after an interrupt and after each system call.
- *
- * A note on terminology:
- * - iret frame: Architecture defined interrupt frame from SS to RIP
- * at the top of the kernel process stack.
- *
- * Some macro usage:
- * - CFI macros are used to generate dwarf2 unwind information for better
- * backtraces. They don't change any code.
- * - ENTRY/END Define functions in the symbol table.
- * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
- * - idtentry - Define exception entry points.
- */
-
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/dwarf2.h>
-#include <asm/calling.h>
-#include <asm/asm-offsets.h>
-#include <asm/msr.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/hw_irq.h>
-#include <asm/page_types.h>
-#include <asm/irqflags.h>
-#include <asm/paravirt.h>
-#include <asm/percpu.h>
-#include <asm/asm.h>
-#include <asm/context_tracking.h>
-#include <asm/smap.h>
-#include <asm/pgtable_types.h>
-#include <linux/err.h>
-
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_64BIT 0x80000000
-#define __AUDIT_ARCH_LE	   0x40000000
-
-	.code64
-	.section .entry.text, "ax"
-
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret64)
-	swapgs
-	sysretq
-ENDPROC(native_usergs_sysret64)
-#endif /* CONFIG_PARAVIRT */
-
-
-.macro TRACE_IRQS_IRETQ
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
-	jnc  1f
-	TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-/*
- * When dynamic function tracer is enabled it will add a breakpoint
- * to all locations that it is about to modify, sync CPUs, update
- * all the code, sync CPUs, then remove the breakpoints. In this time
- * if lockdep is enabled, it might jump back into the debug handler
- * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
- *
- * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
- * make sure the stack pointer does not get reset back to the top
- * of the debug stack, and instead just reuses the current stack.
- */
-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
-
-.macro TRACE_IRQS_OFF_DEBUG
-	call debug_stack_set_zero
-	TRACE_IRQS_OFF
-	call debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_ON_DEBUG
-	call debug_stack_set_zero
-	TRACE_IRQS_ON
-	call debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_IRETQ_DEBUG
-	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
-	jnc  1f
-	TRACE_IRQS_ON_DEBUG
-1:
-.endm
-
-#else
-# define TRACE_IRQS_OFF_DEBUG		TRACE_IRQS_OFF
-# define TRACE_IRQS_ON_DEBUG		TRACE_IRQS_ON
-# define TRACE_IRQS_IRETQ_DEBUG		TRACE_IRQS_IRETQ
-#endif
-
-/*
- * empty frame
- */
-	.macro EMPTY_FRAME start=1 offset=0
-	.if \start
-	CFI_STARTPROC simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA rsp,8+\offset
-	.else
-	CFI_DEF_CFA_OFFSET 8+\offset
-	.endif
-	.endm
-
-/*
- * initial frame state for interrupts (and exceptions without error code)
- */
-	.macro INTR_FRAME start=1 offset=0
-	EMPTY_FRAME \start, 5*8+\offset
-	/*CFI_REL_OFFSET ss, 4*8+\offset*/
-	CFI_REL_OFFSET rsp, 3*8+\offset
-	/*CFI_REL_OFFSET rflags, 2*8+\offset*/
-	/*CFI_REL_OFFSET cs, 1*8+\offset*/
-	CFI_REL_OFFSET rip, 0*8+\offset
-	.endm
-
-/*
- * initial frame state for exceptions with error code (and interrupts
- * with vector already pushed)
- */
-	.macro XCPT_FRAME start=1 offset=0
-	INTR_FRAME \start, 1*8+\offset
-	.endm
-
-/*
- * frame that enables passing a complete pt_regs to a C function.
- */
-	.macro DEFAULT_FRAME start=1 offset=0
-	XCPT_FRAME \start, ORIG_RAX+\offset
-	CFI_REL_OFFSET rdi, RDI+\offset
-	CFI_REL_OFFSET rsi, RSI+\offset
-	CFI_REL_OFFSET rdx, RDX+\offset
-	CFI_REL_OFFSET rcx, RCX+\offset
-	CFI_REL_OFFSET rax, RAX+\offset
-	CFI_REL_OFFSET r8, R8+\offset
-	CFI_REL_OFFSET r9, R9+\offset
-	CFI_REL_OFFSET r10, R10+\offset
-	CFI_REL_OFFSET r11, R11+\offset
-	CFI_REL_OFFSET rbx, RBX+\offset
-	CFI_REL_OFFSET rbp, RBP+\offset
-	CFI_REL_OFFSET r12, R12+\offset
-	CFI_REL_OFFSET r13, R13+\offset
-	CFI_REL_OFFSET r14, R14+\offset
-	CFI_REL_OFFSET r15, R15+\offset
-	.endm
-
-/*
- * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
- *
- * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
- * then loads new ss, cs, and rip from previously programmed MSRs.
- * rflags gets masked by a value from another MSR (so CLD and CLAC
- * are not needed). SYSCALL does not save anything on the stack
- * and does not change rsp.
- *
- * Registers on entry:
- * rax  system call number
- * rcx  return address
- * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
- * rdi  arg0
- * rsi  arg1
- * rdx  arg2
- * r10  arg3 (needs to be moved to rcx to conform to C ABI)
- * r8   arg4
- * r9   arg5
- * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
- *
- * Only called from user space.
- *
- * When user can change pt_regs->foo always force IRET. That is because
- * it deals with uncanonical addresses better. SYSRET has trouble
- * with them due to bugs in both AMD and Intel CPUs.
- */
-
-ENTRY(system_call)
-	CFI_STARTPROC	simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,0
-	CFI_REGISTER	rip,rcx
-	/*CFI_REGISTER	rflags,r11*/
-
-	/*
-	 * Interrupts are off on entry.
-	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-	 * it is too small to ever cause noticeable irq latency.
-	 */
-	SWAPGS_UNSAFE_STACK
-	/*
-	 * A hypervisor implementation might want to use a label
-	 * after the swapgs, so that it can do the swapgs
-	 * for the guest and jump here on syscall.
-	 */
-GLOBAL(system_call_after_swapgs)
-
-	movq	%rsp,PER_CPU_VAR(rsp_scratch)
-	movq	PER_CPU_VAR(cpu_current_top_of_stack),%rsp
-
-	/* Construct struct pt_regs on stack */
-	pushq_cfi $__USER_DS			/* pt_regs->ss */
-	pushq_cfi PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */
-	/*
-	 * Re-enable interrupts.
-	 * We use 'rsp_scratch' as a scratch space, hence irq-off block above
-	 * must execute atomically in the face of possible interrupt-driven
-	 * task preemption. We must enable interrupts only after we're done
-	 * with using rsp_scratch:
-	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	pushq_cfi	%r11			/* pt_regs->flags */
-	pushq_cfi	$__USER_CS		/* pt_regs->cs */
-	pushq_cfi	%rcx			/* pt_regs->ip */
-	CFI_REL_OFFSET rip,0
-	pushq_cfi_reg	rax			/* pt_regs->orig_ax */
-	pushq_cfi_reg	rdi			/* pt_regs->di */
-	pushq_cfi_reg	rsi			/* pt_regs->si */
-	pushq_cfi_reg	rdx			/* pt_regs->dx */
-	pushq_cfi_reg	rcx			/* pt_regs->cx */
-	pushq_cfi	$-ENOSYS		/* pt_regs->ax */
-	pushq_cfi_reg	r8			/* pt_regs->r8 */
-	pushq_cfi_reg	r9			/* pt_regs->r9 */
-	pushq_cfi_reg	r10			/* pt_regs->r10 */
-	pushq_cfi_reg	r11			/* pt_regs->r11 */
-	sub	$(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */
-	CFI_ADJUST_CFA_OFFSET 6*8
-
-	testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz tracesys
-system_call_fastpath:
-#if __SYSCALL_MASK == ~0
-	cmpq $__NR_syscall_max,%rax
-#else
-	andl $__SYSCALL_MASK,%eax
-	cmpl $__NR_syscall_max,%eax
-#endif
-	ja	1f	/* return -ENOSYS (already in pt_regs->ax) */
-	movq %r10,%rcx
-	call *sys_call_table(,%rax,8)
-	movq %rax,RAX(%rsp)
-1:
-/*
- * Syscall return path ending with SYSRET (fast path).
- * Has incompletely filled pt_regs.
- */
-	LOCKDEP_SYS_EXIT
-	/*
-	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-	 * it is too small to ever cause noticeable irq latency.
-	 */
-	DISABLE_INTERRUPTS(CLBR_NONE)
-
-	/*
-	 * We must check ti flags with interrupts (or at least preemption)
-	 * off because we must *never* return to userspace without
-	 * processing exit work that is enqueued if we're preempted here.
-	 * In particular, returning to userspace with any of the one-shot
-	 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
-	 * very bad.
-	 */
-	testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz int_ret_from_sys_call_irqs_off	/* Go to the slow path */
-
-	CFI_REMEMBER_STATE
-
-	RESTORE_C_REGS_EXCEPT_RCX_R11
-	movq	RIP(%rsp),%rcx
-	CFI_REGISTER	rip,rcx
-	movq	EFLAGS(%rsp),%r11
-	/*CFI_REGISTER	rflags,r11*/
-	movq	RSP(%rsp),%rsp
-	/*
-	 * 64bit SYSRET restores rip from rcx,
-	 * rflags from r11 (but RF and VM bits are forced to 0),
-	 * cs and ss are loaded from MSRs.
-	 * Restoration of rflags re-enables interrupts.
-	 *
-	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
-	 * descriptor is not reinitialized.  This means that we should
-	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
-	 * exit the kernel, and re-enter using an interrupt vector.  (All
-	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
-	 * from happening by reloading SS in __switch_to.  (Actually
-	 * detecting the failure in 64-bit userspace is tricky but can be
-	 * done.)
-	 */
-	USERGS_SYSRET64
-
-	CFI_RESTORE_STATE
-
-	/* Do syscall entry tracing */
-tracesys:
-	movq %rsp, %rdi
-	movl $AUDIT_ARCH_X86_64, %esi
-	call syscall_trace_enter_phase1
-	test %rax, %rax
-	jnz tracesys_phase2		/* if needed, run the slow path */
-	RESTORE_C_REGS_EXCEPT_RAX	/* else restore clobbered regs */
-	movq ORIG_RAX(%rsp), %rax
-	jmp system_call_fastpath	/*      and return to the fast path */
-
-tracesys_phase2:
-	SAVE_EXTRA_REGS
-	movq %rsp, %rdi
-	movl $AUDIT_ARCH_X86_64, %esi
-	movq %rax,%rdx
-	call syscall_trace_enter_phase2
-
-	/*
-	 * Reload registers from stack in case ptrace changed them.
-	 * We don't reload %rax because syscall_trace_entry_phase2() returned
-	 * the value it wants us to use in the table lookup.
-	 */
-	RESTORE_C_REGS_EXCEPT_RAX
-	RESTORE_EXTRA_REGS
-#if __SYSCALL_MASK == ~0
-	cmpq $__NR_syscall_max,%rax
-#else
-	andl $__SYSCALL_MASK,%eax
-	cmpl $__NR_syscall_max,%eax
-#endif
-	ja	1f	/* return -ENOSYS (already in pt_regs->ax) */
-	movq %r10,%rcx	/* fixup for C */
-	call *sys_call_table(,%rax,8)
-	movq %rax,RAX(%rsp)
-1:
-	/* Use IRET because user could have changed pt_regs->foo */
-
-/*
- * Syscall return path ending with IRET.
- * Has correct iret frame.
- */
-GLOBAL(int_ret_from_sys_call)
-	DISABLE_INTERRUPTS(CLBR_NONE)
-int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
-	TRACE_IRQS_OFF
-	movl $_TIF_ALLWORK_MASK,%edi
-	/* edi:	mask to check */
-GLOBAL(int_with_check)
-	LOCKDEP_SYS_EXIT_IRQ
-	GET_THREAD_INFO(%rcx)
-	movl TI_flags(%rcx),%edx
-	andl %edi,%edx
-	jnz   int_careful
-	andl	$~TS_COMPAT,TI_status(%rcx)
-	jmp	syscall_return
-
-	/* Either reschedule or signal or syscall exit tracking needed. */
-	/* First do a reschedule test. */
-	/* edx:	work, edi: workmask */
-int_careful:
-	bt $TIF_NEED_RESCHED,%edx
-	jnc  int_very_careful
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	pushq_cfi %rdi
-	SCHEDULE_USER
-	popq_cfi %rdi
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp int_with_check
-
-	/* handle signals and tracing -- both require a full pt_regs */
-int_very_careful:
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_EXTRA_REGS
-	/* Check for syscall exit trace */
-	testl $_TIF_WORK_SYSCALL_EXIT,%edx
-	jz int_signal
-	pushq_cfi %rdi
-	leaq 8(%rsp),%rdi	# &ptregs -> arg1
-	call syscall_trace_leave
-	popq_cfi %rdi
-	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
-	jmp int_restore_rest
-
-int_signal:
-	testl $_TIF_DO_NOTIFY_MASK,%edx
-	jz 1f
-	movq %rsp,%rdi		# &ptregs -> arg1
-	xorl %esi,%esi		# oldset -> arg2
-	call do_notify_resume
-1:	movl $_TIF_WORK_MASK,%edi
-int_restore_rest:
-	RESTORE_EXTRA_REGS
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp int_with_check
-
-syscall_return:
-	/* The IRETQ could re-enable interrupts: */
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_IRETQ
-
-	/*
-	 * Try to use SYSRET instead of IRET if we're returning to
-	 * a completely clean 64-bit userspace context.
-	 */
-	movq RCX(%rsp),%rcx
-	movq RIP(%rsp),%r11
-	cmpq %rcx,%r11			/* RCX == RIP */
-	jne opportunistic_sysret_failed
-
-	/*
-	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
-	 * in kernel space.  This essentially lets the user take over
-	 * the kernel, since userspace controls RSP.
-	 *
-	 * If width of "canonical tail" ever becomes variable, this will need
-	 * to be updated to remain correct on both old and new CPUs.
-	 */
-	.ifne __VIRTUAL_MASK_SHIFT - 47
-	.error "virtual address width changed -- SYSRET checks need update"
-	.endif
-	/* Change top 16 bits to be the sign-extension of 47th bit */
-	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-	/* If this changed %rcx, it was not canonical */
-	cmpq	%rcx, %r11
-	jne	opportunistic_sysret_failed
-
-	cmpq $__USER_CS,CS(%rsp)	/* CS must match SYSRET */
-	jne opportunistic_sysret_failed
-
-	movq R11(%rsp),%r11
-	cmpq %r11,EFLAGS(%rsp)		/* R11 == RFLAGS */
-	jne opportunistic_sysret_failed
-
-	/*
-	 * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
-	 * restoring TF results in a trap from userspace immediately after
-	 * SYSRET.  This would cause an infinite loop whenever #DB happens
-	 * with register state that satisfies the opportunistic SYSRET
-	 * conditions.  For example, single-stepping this user code:
-	 *
-	 *           movq $stuck_here,%rcx
-	 *           pushfq
-	 *           popq %r11
-	 *   stuck_here:
-	 *
-	 * would never get past 'stuck_here'.
-	 */
-	testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-	jnz opportunistic_sysret_failed
-
-	/* nothing to check for RSP */
-
-	cmpq $__USER_DS,SS(%rsp)	/* SS must match SYSRET */
-	jne opportunistic_sysret_failed
-
-	/*
-	 * We win!  This label is here just for ease of understanding
-	 * perf profiles.  Nothing jumps here.
-	 */
-syscall_return_via_sysret:
-	CFI_REMEMBER_STATE
-	/* rcx and r11 are already restored (see code above) */
-	RESTORE_C_REGS_EXCEPT_RCX_R11
-	movq RSP(%rsp),%rsp
-	USERGS_SYSRET64
-	CFI_RESTORE_STATE
-
-opportunistic_sysret_failed:
-	SWAPGS
-	jmp	restore_c_regs_and_iret
-	CFI_ENDPROC
-END(system_call)
-
-
-	.macro FORK_LIKE func
-ENTRY(stub_\func)
-	CFI_STARTPROC
-	DEFAULT_FRAME 0, 8		/* offset 8: return address */
-	SAVE_EXTRA_REGS 8
-	jmp sys_\func
-	CFI_ENDPROC
-END(stub_\func)
-	.endm
-
-	FORK_LIKE  clone
-	FORK_LIKE  fork
-	FORK_LIKE  vfork
-
-ENTRY(stub_execve)
-	CFI_STARTPROC
-	DEFAULT_FRAME 0, 8
-	call	sys_execve
-return_from_execve:
-	testl	%eax, %eax
-	jz	1f
-	/* exec failed, can use fast SYSRET code path in this case */
-	ret
-1:
-	/* must use IRET code path (pt_regs->cs may have changed) */
-	addq	$8, %rsp
-	CFI_ADJUST_CFA_OFFSET -8
-	ZERO_EXTRA_REGS
-	movq	%rax,RAX(%rsp)
-	jmp	int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_execve)
-/*
- * Remaining execve stubs are only 7 bytes long.
- * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
- */
-	.align	8
-GLOBAL(stub_execveat)
-	CFI_STARTPROC
-	DEFAULT_FRAME 0, 8
-	call	sys_execveat
-	jmp	return_from_execve
-	CFI_ENDPROC
-END(stub_execveat)
-
-#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
-	.align	8
-GLOBAL(stub_x32_execve)
-GLOBAL(stub32_execve)
-	CFI_STARTPROC
-	DEFAULT_FRAME 0, 8
-	call	compat_sys_execve
-	jmp	return_from_execve
-	CFI_ENDPROC
-END(stub32_execve)
-END(stub_x32_execve)
-	.align	8
-GLOBAL(stub_x32_execveat)
-GLOBAL(stub32_execveat)
-	CFI_STARTPROC
-	DEFAULT_FRAME 0, 8
-	call	compat_sys_execveat
-	jmp	return_from_execve
-	CFI_ENDPROC
-END(stub32_execveat)
-END(stub_x32_execveat)
-#endif
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
-	CFI_STARTPROC
-	DEFAULT_FRAME 0, 8
-	/*
-	 * SAVE_EXTRA_REGS result is not normally needed:
-	 * sigreturn overwrites all pt_regs->GPREGS.
-	 * But sigreturn can fail (!), and there is no easy way to detect that.
-	 * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
-	 * we SAVE_EXTRA_REGS here.
-	 */
-	SAVE_EXTRA_REGS 8
-	call sys_rt_sigreturn
-return_from_stub:
-	addq	$8, %rsp
-	CFI_ADJUST_CFA_OFFSET -8
-	RESTORE_EXTRA_REGS
-	movq %rax,RAX(%rsp)
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(stub_rt_sigreturn)
-
-#ifdef CONFIG_X86_X32_ABI
-ENTRY(stub_x32_rt_sigreturn)
-	CFI_STARTPROC
-	DEFAULT_FRAME 0, 8
-	SAVE_EXTRA_REGS 8
-	call sys32_x32_rt_sigreturn
-	jmp  return_from_stub
-	CFI_ENDPROC
-END(stub_x32_rt_sigreturn)
-#endif
-
-/*
- * A newly forked process directly context switches into this address.
- *
- * rdi: prev task we switched from
- */
-ENTRY(ret_from_fork)
-	DEFAULT_FRAME
-
-	LOCK ; btr $TIF_FORK,TI_flags(%r8)
-
-	pushq_cfi $0x0002
-	popfq_cfi				# reset kernel eflags
-
-	call schedule_tail			# rdi: 'prev' task parameter
-
-	RESTORE_EXTRA_REGS
-
-	testb	$3, CS(%rsp)			# from kernel_thread?
-
-	/*
-	 * By the time we get here, we have no idea whether our pt_regs,
-	 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
-	 * the slow path, or one of the ia32entry paths.
-	 * Use IRET code path to return, since it can safely handle
-	 * all of the above.
-	 */
-	jnz	int_ret_from_sys_call
-
-	/* We came from kernel_thread */
-	/* nb: we depend on RESTORE_EXTRA_REGS above */
-	movq %rbp, %rdi
-	call *%rbx
-	movl $0, RAX(%rsp)
-	RESTORE_EXTRA_REGS
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(ret_from_fork)
-
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
-	.align 8
-ENTRY(irq_entries_start)
-	INTR_FRAME
-    vector=FIRST_EXTERNAL_VECTOR
-    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
-	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-    vector=vector+1
-	jmp	common_interrupt
-	CFI_ADJUST_CFA_OFFSET -8
-	.align	8
-    .endr
-	CFI_ENDPROC
-END(irq_entries_start)
-
-/*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
- *
- * Entry runs with interrupts off.
- */
-
-/* 0(%rsp): ~(interrupt number) */
-	.macro interrupt func
-	cld
-	/*
-	 * Since nothing in interrupt handling code touches r12...r15 members
-	 * of "struct pt_regs", and since interrupts can nest, we can save
-	 * four stack slots and simultaneously provide
-	 * an unwind-friendly stack layout by saving "truncated" pt_regs
-	 * exactly up to rbp slot, without these members.
-	 */
-	ALLOC_PT_GPREGS_ON_STACK -RBP
-	SAVE_C_REGS -RBP
-	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
-	SAVE_EXTRA_REGS_RBP -RBP
-
-	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
-
-	testb	$3, CS-RBP(%rsp)
-	jz	1f
-	SWAPGS
-1:
-	/*
-	 * Save previous stack pointer, optionally switch to interrupt stack.
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-	movq %rsp, %rsi
-	incl PER_CPU_VAR(irq_count)
-	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
-	CFI_DEF_CFA_REGISTER	rsi
-	pushq %rsi
-	/*
-	 * For debugger:
-	 * "CFA (Current Frame Address) is the value on stack + offset"
-	 */
-	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
-			0x77 /* DW_OP_breg7 (rsp) */, 0, \
-			0x06 /* DW_OP_deref */, \
-			0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
-			0x22 /* DW_OP_plus */
-	/* We entered an interrupt context - irqs are off: */
-	TRACE_IRQS_OFF
-
-	call \func
-	.endm
-
-	/*
-	 * The interrupt stubs push (~vector+0x80) onto the stack and
-	 * then jump to common_interrupt.
-	 */
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
-	XCPT_FRAME
-	ASM_CLAC
-	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
-	interrupt do_IRQ
-	/* 0(%rsp): old RSP */
-ret_from_intr:
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	decl PER_CPU_VAR(irq_count)
-
-	/* Restore saved previous stack */
-	popq %rsi
-	CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
-	/* return code expects complete pt_regs - adjust rsp accordingly: */
-	leaq -RBP(%rsi),%rsp
-	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET	RBP
-
-	testb	$3, CS(%rsp)
-	jz	retint_kernel
-	/* Interrupt came from user space */
-
-	GET_THREAD_INFO(%rcx)
-	/*
-	 * %rcx: thread info. Interrupts off.
-	 */
-retint_with_reschedule:
-	movl $_TIF_WORK_MASK,%edi
-retint_check:
-	LOCKDEP_SYS_EXIT_IRQ
-	movl TI_flags(%rcx),%edx
-	andl %edi,%edx
-	CFI_REMEMBER_STATE
-	jnz  retint_careful
-
-retint_swapgs:		/* return to user-space */
-	/*
-	 * The iretq could re-enable interrupts:
-	 */
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_IRETQ
-
-	SWAPGS
-	jmp	restore_c_regs_and_iret
-
-/* Returning to kernel space */
-retint_kernel:
-#ifdef CONFIG_PREEMPT
-	/* Interrupts are off */
-	/* Check if we need preemption */
-	bt	$9,EFLAGS(%rsp)	/* interrupts were off? */
-	jnc	1f
-0:	cmpl	$0,PER_CPU_VAR(__preempt_count)
-	jnz	1f
-	call	preempt_schedule_irq
-	jmp	0b
-1:
-#endif
-	/*
-	 * The iretq could re-enable interrupts:
-	 */
-	TRACE_IRQS_IRETQ
-
-/*
- * At this label, code paths which return to kernel and to user,
- * which come from interrupts/exception and from syscalls, merge.
- */
-restore_c_regs_and_iret:
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
-
-irq_return:
-	INTERRUPT_RETURN
-
-ENTRY(native_iret)
-	/*
-	 * Are we returning to a stack segment from the LDT?  Note: in
-	 * 64-bit mode SS:RSP on the exception stack is always valid.
-	 */
-#ifdef CONFIG_X86_ESPFIX64
-	testb $4,(SS-RIP)(%rsp)
-	jnz native_irq_return_ldt
-#endif
-
-.global native_irq_return_iret
-native_irq_return_iret:
-	/*
-	 * This may fault.  Non-paranoid faults on return to userspace are
-	 * handled by fixup_bad_iret.  These include #SS, #GP, and #NP.
-	 * Double-faults due to espfix64 are handled in do_double_fault.
-	 * Other faults here are fatal.
-	 */
-	iretq
-
-#ifdef CONFIG_X86_ESPFIX64
-native_irq_return_ldt:
-	pushq_cfi %rax
-	pushq_cfi %rdi
-	SWAPGS
-	movq PER_CPU_VAR(espfix_waddr),%rdi
-	movq %rax,(0*8)(%rdi)	/* RAX */
-	movq (2*8)(%rsp),%rax	/* RIP */
-	movq %rax,(1*8)(%rdi)
-	movq (3*8)(%rsp),%rax	/* CS */
-	movq %rax,(2*8)(%rdi)
-	movq (4*8)(%rsp),%rax	/* RFLAGS */
-	movq %rax,(3*8)(%rdi)
-	movq (6*8)(%rsp),%rax	/* SS */
-	movq %rax,(5*8)(%rdi)
-	movq (5*8)(%rsp),%rax	/* RSP */
-	movq %rax,(4*8)(%rdi)
-	andl $0xffff0000,%eax
-	popq_cfi %rdi
-	orq PER_CPU_VAR(espfix_stack),%rax
-	SWAPGS
-	movq %rax,%rsp
-	popq_cfi %rax
-	jmp native_irq_return_iret
-#endif
-
-	/* edi: workmask, edx: work */
-retint_careful:
-	CFI_RESTORE_STATE
-	bt    $TIF_NEED_RESCHED,%edx
-	jnc   retint_signal
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	pushq_cfi %rdi
-	SCHEDULE_USER
-	popq_cfi %rdi
-	GET_THREAD_INFO(%rcx)
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp retint_check
-
-retint_signal:
-	testl $_TIF_DO_NOTIFY_MASK,%edx
-	jz    retint_swapgs
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_EXTRA_REGS
-	movq $-1,ORIG_RAX(%rsp)
-	xorl %esi,%esi		# oldset
-	movq %rsp,%rdi		# &pt_regs
-	call do_notify_resume
-	RESTORE_EXTRA_REGS
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	GET_THREAD_INFO(%rcx)
-	jmp retint_with_reschedule
-
-	CFI_ENDPROC
-END(common_interrupt)
-
-/*
- * APIC interrupts.
- */
-.macro apicinterrupt3 num sym do_sym
-ENTRY(\sym)
-	INTR_FRAME
-	ASM_CLAC
-	pushq_cfi $~(\num)
-.Lcommon_\sym:
-	interrupt \do_sym
-	jmp ret_from_intr
-	CFI_ENDPROC
-END(\sym)
-.endm
-
-#ifdef CONFIG_TRACING
-#define trace(sym) trace_##sym
-#define smp_trace(sym) smp_trace_##sym
-
-.macro trace_apicinterrupt num sym
-apicinterrupt3 \num trace(\sym) smp_trace(\sym)
-.endm
-#else
-.macro trace_apicinterrupt num sym do_sym
-.endm
-#endif
-
-.macro apicinterrupt num sym do_sym
-apicinterrupt3 \num \sym \do_sym
-trace_apicinterrupt \num \sym
-.endm
-
-#ifdef CONFIG_SMP
-apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
-	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt3 REBOOT_VECTOR \
-	reboot_interrupt smp_reboot_interrupt
-#endif
-
-#ifdef CONFIG_X86_UV
-apicinterrupt3 UV_BAU_MESSAGE \
-	uv_bau_message_intr1 uv_bau_message_interrupt
-#endif
-apicinterrupt LOCAL_TIMER_VECTOR \
-	apic_timer_interrupt smp_apic_timer_interrupt
-apicinterrupt X86_PLATFORM_IPI_VECTOR \
-	x86_platform_ipi smp_x86_platform_ipi
-
-#ifdef CONFIG_HAVE_KVM
-apicinterrupt3 POSTED_INTR_VECTOR \
-	kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
-apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
-	kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
-#endif
-
-#ifdef CONFIG_X86_MCE_THRESHOLD
-apicinterrupt THRESHOLD_APIC_VECTOR \
-	threshold_interrupt smp_threshold_interrupt
-#endif
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-apicinterrupt THERMAL_APIC_VECTOR \
-	thermal_interrupt smp_thermal_interrupt
-#endif
-
-#ifdef CONFIG_SMP
-apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
-	call_function_single_interrupt smp_call_function_single_interrupt
-apicinterrupt CALL_FUNCTION_VECTOR \
-	call_function_interrupt smp_call_function_interrupt
-apicinterrupt RESCHEDULE_VECTOR \
-	reschedule_interrupt smp_reschedule_interrupt
-#endif
-
-apicinterrupt ERROR_APIC_VECTOR \
-	error_interrupt smp_error_interrupt
-apicinterrupt SPURIOUS_APIC_VECTOR \
-	spurious_interrupt smp_spurious_interrupt
-
-#ifdef CONFIG_IRQ_WORK
-apicinterrupt IRQ_WORK_VECTOR \
-	irq_work_interrupt smp_irq_work_interrupt
-#endif
-
-/*
- * Exception entry points.
- */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
-
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
-ENTRY(\sym)
-	/* Sanity check */
-	.if \shift_ist != -1 && \paranoid == 0
-	.error "using shift_ist requires paranoid=1"
-	.endif
-
-	.if \has_error_code
-	XCPT_FRAME
-	.else
-	INTR_FRAME
-	.endif
-
-	ASM_CLAC
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-
-	.ifeq \has_error_code
-	pushq_cfi $-1			/* ORIG_RAX: no syscall to restart */
-	.endif
-
-	ALLOC_PT_GPREGS_ON_STACK
-
-	.if \paranoid
-	.if \paranoid == 1
-	CFI_REMEMBER_STATE
-	testb	$3, CS(%rsp)		/* If coming from userspace, switch */
-	jnz 1f				/* stacks. */
-	.endif
-	call paranoid_entry
-	.else
-	call error_entry
-	.endif
-	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
-
-	DEFAULT_FRAME 0
-
-	.if \paranoid
-	.if \shift_ist != -1
-	TRACE_IRQS_OFF_DEBUG		/* reload IDT in case of recursion */
-	.else
-	TRACE_IRQS_OFF
-	.endif
-	.endif
-
-	movq %rsp,%rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq ORIG_RAX(%rsp),%rsi	/* get error code */
-	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl %esi,%esi			/* no error code */
-	.endif
-
-	.if \shift_ist != -1
-	subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	call \do_sym
-
-	.if \shift_ist != -1
-	addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	/* these procedures expect "no swapgs" flag in ebx */
-	.if \paranoid
-	jmp paranoid_exit
-	.else
-	jmp error_exit
-	.endif
-
-	.if \paranoid == 1
-	CFI_RESTORE_STATE
-	/*
-	 * Paranoid entry from userspace.  Switch stacks and treat it
-	 * as a normal entry.  This means that paranoid handlers
-	 * run in real process context if user_mode(regs).
-	 */
-1:
-	call error_entry
-
-	DEFAULT_FRAME 0
-
-	movq %rsp,%rdi			/* pt_regs pointer */
-	call sync_regs
-	movq %rax,%rsp			/* switch stack */
-
-	movq %rsp,%rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq ORIG_RAX(%rsp),%rsi	/* get error code */
-	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl %esi,%esi			/* no error code */
-	.endif
-
-	call \do_sym
-
-	jmp error_exit			/* %ebx: no swapgs flag */
-	.endif
-
-	CFI_ENDPROC
-END(\sym)
-.endm
-
-#ifdef CONFIG_TRACING
-.macro trace_idtentry sym do_sym has_error_code:req
-idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
-idtentry \sym \do_sym has_error_code=\has_error_code
-.endm
-#else
-.macro trace_idtentry sym do_sym has_error_code:req
-idtentry \sym \do_sym has_error_code=\has_error_code
-.endm
-#endif
-
-idtentry divide_error do_divide_error has_error_code=0
-idtentry overflow do_overflow has_error_code=0
-idtentry bounds do_bounds has_error_code=0
-idtentry invalid_op do_invalid_op has_error_code=0
-idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault do_double_fault has_error_code=1 paranoid=2
-idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
-idtentry invalid_TSS do_invalid_TSS has_error_code=1
-idtentry segment_not_present do_segment_not_present has_error_code=1
-idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
-idtentry coprocessor_error do_coprocessor_error has_error_code=0
-idtentry alignment_check do_alignment_check has_error_code=1
-idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
-
-
-	/* Reload gs selector with exception handling */
-	/* edi:  new selector */
-ENTRY(native_load_gs_index)
-	CFI_STARTPROC
-	pushfq_cfi
-	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
-	SWAPGS
-gs_change:
-	movl %edi,%gs
-2:	mfence		/* workaround */
-	SWAPGS
-	popfq_cfi
-	ret
-	CFI_ENDPROC
-END(native_load_gs_index)
-
-	_ASM_EXTABLE(gs_change,bad_gs)
-	.section .fixup,"ax"
-	/* running with kernelgs */
-bad_gs:
-	SWAPGS			/* switch back to user gs */
-	xorl %eax,%eax
-	movl %eax,%gs
-	jmp  2b
-	.previous
-
-/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(do_softirq_own_stack)
-	CFI_STARTPROC
-	pushq_cfi %rbp
-	CFI_REL_OFFSET rbp,0
-	mov  %rsp,%rbp
-	CFI_DEF_CFA_REGISTER rbp
-	incl PER_CPU_VAR(irq_count)
-	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
-	push  %rbp			# backlink for old unwinder
-	call __do_softirq
-	leaveq
-	CFI_RESTORE		rbp
-	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET   -8
-	decl PER_CPU_VAR(irq_count)
-	ret
-	CFI_ENDPROC
-END(do_softirq_own_stack)
-
-#ifdef CONFIG_XEN
-idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
-
-/*
- * A note on the "critical region" in our callback handler.
- * We want to avoid stacking callback handlers due to events occurring
- * during handling of the last event. To do this, we keep events disabled
- * until we've done all processing. HOWEVER, we must enable events before
- * popping the stack frame (can't be done atomically) and so it would still
- * be possible to get enough handler activations to overflow the stack.
- * Although unlikely, bugs of that kind are hard to track down, so we'd
- * like to avoid the possibility.
- * So, on entry to the handler we detect whether we interrupted an
- * existing activation in its critical region -- if so, we pop the current
- * activation and restart the handler using the previous one.
- */
-ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
-	CFI_STARTPROC
-/*
- * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
- * see the correct pointer to the pt_regs
- */
-	movq %rdi, %rsp            # we don't return, adjust the stack frame
-	CFI_ENDPROC
-	DEFAULT_FRAME
-11:	incl PER_CPU_VAR(irq_count)
-	movq %rsp,%rbp
-	CFI_DEF_CFA_REGISTER rbp
-	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
-	pushq %rbp			# backlink for old unwinder
-	call xen_evtchn_do_upcall
-	popq %rsp
-	CFI_DEF_CFA_REGISTER rsp
-	decl PER_CPU_VAR(irq_count)
-#ifndef CONFIG_PREEMPT
-	call xen_maybe_preempt_hcall
-#endif
-	jmp  error_exit
-	CFI_ENDPROC
-END(xen_do_hypervisor_callback)
-
-/*
- * Hypervisor uses this for application faults while it executes.
- * We get here for two reasons:
- *  1. Fault while reloading DS, ES, FS or GS
- *  2. Fault while executing IRET
- * Category 1 we do not need to fix up as Xen has already reloaded all segment
- * registers that could be reloaded and zeroed the others.
- * Category 2 we fix up by killing the current process. We cannot use the
- * normal Linux return path in this case because if we use the IRET hypercall
- * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
- * We distinguish between categories by comparing each saved segment register
- * with its current contents: any discrepancy means we in category 1.
- */
-ENTRY(xen_failsafe_callback)
-	INTR_FRAME 1 (6*8)
-	/*CFI_REL_OFFSET gs,GS*/
-	/*CFI_REL_OFFSET fs,FS*/
-	/*CFI_REL_OFFSET es,ES*/
-	/*CFI_REL_OFFSET ds,DS*/
-	CFI_REL_OFFSET r11,8
-	CFI_REL_OFFSET rcx,0
-	movl %ds,%ecx
-	cmpw %cx,0x10(%rsp)
-	CFI_REMEMBER_STATE
-	jne 1f
-	movl %es,%ecx
-	cmpw %cx,0x18(%rsp)
-	jne 1f
-	movl %fs,%ecx
-	cmpw %cx,0x20(%rsp)
-	jne 1f
-	movl %gs,%ecx
-	cmpw %cx,0x28(%rsp)
-	jne 1f
-	/* All segments match their saved values => Category 2 (Bad IRET). */
-	movq (%rsp),%rcx
-	CFI_RESTORE rcx
-	movq 8(%rsp),%r11
-	CFI_RESTORE r11
-	addq $0x30,%rsp
-	CFI_ADJUST_CFA_OFFSET -0x30
-	pushq_cfi $0	/* RIP */
-	pushq_cfi %r11
-	pushq_cfi %rcx
-	jmp general_protection
-	CFI_RESTORE_STATE
-1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
-	movq (%rsp),%rcx
-	CFI_RESTORE rcx
-	movq 8(%rsp),%r11
-	CFI_RESTORE r11
-	addq $0x30,%rsp
-	CFI_ADJUST_CFA_OFFSET -0x30
-	pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-	ALLOC_PT_GPREGS_ON_STACK
-	SAVE_C_REGS
-	SAVE_EXTRA_REGS
-	jmp error_exit
-	CFI_ENDPROC
-END(xen_failsafe_callback)
-
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
-	xen_hvm_callback_vector xen_evtchn_do_upcall
-
-#endif /* CONFIG_XEN */
-
-#if IS_ENABLED(CONFIG_HYPERV)
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
-	hyperv_callback_vector hyperv_vector_handler
-#endif /* CONFIG_HYPERV */
-
-idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
-idtentry stack_segment do_stack_segment has_error_code=1
-#ifdef CONFIG_XEN
-idtentry xen_debug do_debug has_error_code=0
-idtentry xen_int3 do_int3 has_error_code=0
-idtentry xen_stack_segment do_stack_segment has_error_code=1
-#endif
-idtentry general_protection do_general_protection has_error_code=1
-trace_idtentry page_fault do_page_fault has_error_code=1
-#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault do_async_page_fault has_error_code=1
-#endif
-#ifdef CONFIG_X86_MCE
-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
-#endif
-
-/*
- * Save all registers in pt_regs, and switch gs if needed.
- * Use slow, but surefire "are we in kernel?" check.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
- */
-ENTRY(paranoid_entry)
-	XCPT_FRAME 1 15*8
-	cld
-	SAVE_C_REGS 8
-	SAVE_EXTRA_REGS 8
-	movl $1,%ebx
-	movl $MSR_GS_BASE,%ecx
-	rdmsr
-	testl %edx,%edx
-	js 1f	/* negative -> in kernel */
-	SWAPGS
-	xorl %ebx,%ebx
-1:	ret
-	CFI_ENDPROC
-END(paranoid_entry)
-
-/*
- * "Paranoid" exit path from exception stack.  This is invoked
- * only on return from non-NMI IST interrupts that came
- * from kernel space.
- *
- * We may be returning to very strange contexts (e.g. very early
- * in syscall entry), so checking for preemption here would
- * be complicated.  Fortunately, we there's no good reason
- * to try to handle preemption here.
- */
-/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
-ENTRY(paranoid_exit)
-	DEFAULT_FRAME
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF_DEBUG
-	testl %ebx,%ebx				/* swapgs needed? */
-	jnz paranoid_exit_no_swapgs
-	TRACE_IRQS_IRETQ
-	SWAPGS_UNSAFE_STACK
-	jmp paranoid_exit_restore
-paranoid_exit_no_swapgs:
-	TRACE_IRQS_IRETQ_DEBUG
-paranoid_exit_restore:
-	RESTORE_EXTRA_REGS
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
-	INTERRUPT_RETURN
-	CFI_ENDPROC
-END(paranoid_exit)
-
-/*
- * Save all registers in pt_regs, and switch gs if needed.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
- */
-ENTRY(error_entry)
-	XCPT_FRAME 1 15*8
-	cld
-	SAVE_C_REGS 8
-	SAVE_EXTRA_REGS 8
-	xorl %ebx,%ebx
-	testb	$3, CS+8(%rsp)
-	jz	error_kernelspace
-error_swapgs:
-	SWAPGS
-error_sti:
-	TRACE_IRQS_OFF
-	ret
-
-	/*
-	 * There are two places in the kernel that can potentially fault with
-	 * usergs. Handle them here.  B stepping K8s sometimes report a
-	 * truncated RIP for IRET exceptions returning to compat mode. Check
-	 * for these here too.
-	 */
-error_kernelspace:
-	CFI_REL_OFFSET rcx, RCX+8
-	incl %ebx
-	leaq native_irq_return_iret(%rip),%rcx
-	cmpq %rcx,RIP+8(%rsp)
-	je error_bad_iret
-	movl %ecx,%eax	/* zero extend */
-	cmpq %rax,RIP+8(%rsp)
-	je bstep_iret
-	cmpq $gs_change,RIP+8(%rsp)
-	je error_swapgs
-	jmp error_sti
-
-bstep_iret:
-	/* Fix truncated RIP */
-	movq %rcx,RIP+8(%rsp)
-	/* fall through */
-
-error_bad_iret:
-	SWAPGS
-	mov %rsp,%rdi
-	call fixup_bad_iret
-	mov %rax,%rsp
-	decl %ebx	/* Return to usergs */
-	jmp error_sti
-	CFI_ENDPROC
-END(error_entry)
-
-
-/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
-ENTRY(error_exit)
-	DEFAULT_FRAME
-	movl %ebx,%eax
-	RESTORE_EXTRA_REGS
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	GET_THREAD_INFO(%rcx)
-	testl %eax,%eax
-	jnz retint_kernel
-	LOCKDEP_SYS_EXIT_IRQ
-	movl TI_flags(%rcx),%edx
-	movl $_TIF_WORK_MASK,%edi
-	andl %edi,%edx
-	jnz retint_careful
-	jmp retint_swapgs
-	CFI_ENDPROC
-END(error_exit)
-
-/* Runs on exception stack */
-ENTRY(nmi)
-	INTR_FRAME
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	/*
-	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
-	 * the iretq it performs will take us out of NMI context.
-	 * This means that we can have nested NMIs where the next
-	 * NMI is using the top of the stack of the previous NMI. We
-	 * can't let it execute because the nested NMI will corrupt the
-	 * stack of the previous NMI. NMI handlers are not re-entrant
-	 * anyway.
-	 *
-	 * To handle this case we do the following:
-	 *  Check the a special location on the stack that contains
-	 *  a variable that is set when NMIs are executing.
-	 *  The interrupted task's stack is also checked to see if it
-	 *  is an NMI stack.
-	 *  If the variable is not set and the stack is not the NMI
-	 *  stack then:
-	 *    o Set the special variable on the stack
-	 *    o Copy the interrupt frame into a "saved" location on the stack
-	 *    o Copy the interrupt frame into a "copy" location on the stack
-	 *    o Continue processing the NMI
-	 *  If the variable is set or the previous stack is the NMI stack:
-	 *    o Modify the "copy" location to jump to the repeate_nmi
-	 *    o return back to the first NMI
-	 *
-	 * Now on exit of the first NMI, we first clear the stack variable
-	 * The NMI stack will tell any nested NMIs at that point that it is
-	 * nested. Then we pop the stack normally with iret, and if there was
-	 * a nested NMI that updated the copy interrupt stack frame, a
-	 * jump will be made to the repeat_nmi code that will handle the second
-	 * NMI.
-	 */
-
-	/* Use %rdx as our temp variable throughout */
-	pushq_cfi %rdx
-	CFI_REL_OFFSET rdx, 0
-
-	/*
-	 * If %cs was not the kernel segment, then the NMI triggered in user
-	 * space, which means it is definitely not nested.
-	 */
-	cmpl $__KERNEL_CS, 16(%rsp)
-	jne first_nmi
-
-	/*
-	 * Check the special variable on the stack to see if NMIs are
-	 * executing.
-	 */
-	cmpl $1, -8(%rsp)
-	je nested_nmi
-
-	/*
-	 * Now test if the previous stack was an NMI stack.
-	 * We need the double check. We check the NMI stack to satisfy the
-	 * race when the first NMI clears the variable before returning.
-	 * We check the variable because the first NMI could be in a
-	 * breakpoint routine using a breakpoint stack.
-	 */
-	lea	6*8(%rsp), %rdx
-	/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
-	cmpq	%rdx, 4*8(%rsp)
-	/* If the stack pointer is above the NMI stack, this is a normal NMI */
-	ja	first_nmi
-	subq	$EXCEPTION_STKSZ, %rdx
-	cmpq	%rdx, 4*8(%rsp)
-	/* If it is below the NMI stack, it is a normal NMI */
-	jb	first_nmi
-	/* Ah, it is within the NMI stack, treat it as nested */
-
-	CFI_REMEMBER_STATE
-
-nested_nmi:
-	/*
-	 * Do nothing if we interrupted the fixup in repeat_nmi.
-	 * It's about to repeat the NMI handler, so we are fine
-	 * with ignoring this one.
-	 */
-	movq $repeat_nmi, %rdx
-	cmpq 8(%rsp), %rdx
-	ja 1f
-	movq $end_repeat_nmi, %rdx
-	cmpq 8(%rsp), %rdx
-	ja nested_nmi_out
-
-1:
-	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
-	leaq -1*8(%rsp), %rdx
-	movq %rdx, %rsp
-	CFI_ADJUST_CFA_OFFSET 1*8
-	leaq -10*8(%rsp), %rdx
-	pushq_cfi $__KERNEL_DS
-	pushq_cfi %rdx
-	pushfq_cfi
-	pushq_cfi $__KERNEL_CS
-	pushq_cfi $repeat_nmi
-
-	/* Put stack back */
-	addq $(6*8), %rsp
-	CFI_ADJUST_CFA_OFFSET -6*8
-
-nested_nmi_out:
-	popq_cfi %rdx
-	CFI_RESTORE rdx
-
-	/* No need to check faults here */
-	INTERRUPT_RETURN
-
-	CFI_RESTORE_STATE
-first_nmi:
-	/*
-	 * Because nested NMIs will use the pushed location that we
-	 * stored in rdx, we must keep that space available.
-	 * Here's what our stack frame will look like:
-	 * +-------------------------+
-	 * | original SS             |
-	 * | original Return RSP     |
-	 * | original RFLAGS         |
-	 * | original CS             |
-	 * | original RIP            |
-	 * +-------------------------+
-	 * | temp storage for rdx    |
-	 * +-------------------------+
-	 * | NMI executing variable  |
-	 * +-------------------------+
-	 * | copied SS               |
-	 * | copied Return RSP       |
-	 * | copied RFLAGS           |
-	 * | copied CS               |
-	 * | copied RIP              |
-	 * +-------------------------+
-	 * | Saved SS                |
-	 * | Saved Return RSP        |
-	 * | Saved RFLAGS            |
-	 * | Saved CS                |
-	 * | Saved RIP               |
-	 * +-------------------------+
-	 * | pt_regs                 |
-	 * +-------------------------+
-	 *
-	 * The saved stack frame is used to fix up the copied stack frame
-	 * that a nested NMI may change to make the interrupted NMI iret jump
-	 * to the repeat_nmi. The original stack frame and the temp storage
-	 * is also used by nested NMIs and can not be trusted on exit.
-	 */
-	/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
-	movq (%rsp), %rdx
-	CFI_RESTORE rdx
-
-	/* Set the NMI executing variable on the stack. */
-	pushq_cfi $1
-
-	/*
-	 * Leave room for the "copied" frame
-	 */
-	subq $(5*8), %rsp
-	CFI_ADJUST_CFA_OFFSET 5*8
-
-	/* Copy the stack frame to the Saved frame */
-	.rept 5
-	pushq_cfi 11*8(%rsp)
-	.endr
-	CFI_DEF_CFA_OFFSET 5*8
-
-	/* Everything up to here is safe from nested NMIs */
-
-	/*
-	 * If there was a nested NMI, the first NMI's iret will return
-	 * here. But NMIs are still enabled and we can take another
-	 * nested NMI. The nested NMI checks the interrupted RIP to see
-	 * if it is between repeat_nmi and end_repeat_nmi, and if so
-	 * it will just return, as we are about to repeat an NMI anyway.
-	 * This makes it safe to copy to the stack frame that a nested
-	 * NMI will update.
-	 */
-repeat_nmi:
-	/*
-	 * Update the stack variable to say we are still in NMI (the update
-	 * is benign for the non-repeat case, where 1 was pushed just above
-	 * to this very stack slot).
-	 */
-	movq $1, 10*8(%rsp)
-
-	/* Make another copy, this one may be modified by nested NMIs */
-	addq $(10*8), %rsp
-	CFI_ADJUST_CFA_OFFSET -10*8
-	.rept 5
-	pushq_cfi -6*8(%rsp)
-	.endr
-	subq $(5*8), %rsp
-	CFI_DEF_CFA_OFFSET 5*8
-end_repeat_nmi:
-
-	/*
-	 * Everything below this point can be preempted by a nested
-	 * NMI if the first NMI took an exception and reset our iret stack
-	 * so that we repeat another NMI.
-	 */
-	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
-	ALLOC_PT_GPREGS_ON_STACK
-
-	/*
-	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
-	 * as we should not be calling schedule in NMI context.
-	 * Even with normal interrupts enabled. An NMI should not be
-	 * setting NEED_RESCHED or anything that normal interrupts and
-	 * exceptions might do.
-	 */
-	call paranoid_entry
-	DEFAULT_FRAME 0
-
-	/*
-	 * Save off the CR2 register. If we take a page fault in the NMI then
-	 * it could corrupt the CR2 value. If the NMI preempts a page fault
-	 * handler before it was able to read the CR2 register, and then the
-	 * NMI itself takes a page fault, the page fault that was preempted
-	 * will read the information from the NMI page fault and not the
-	 * origin fault. Save it off and restore it if it changes.
-	 * Use the r12 callee-saved register.
-	 */
-	movq %cr2, %r12
-
-	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
-	movq %rsp,%rdi
-	movq $-1,%rsi
-	call do_nmi
-
-	/* Did the NMI take a page fault? Restore cr2 if it did */
-	movq %cr2, %rcx
-	cmpq %rcx, %r12
-	je 1f
-	movq %r12, %cr2
-1:
-	testl %ebx,%ebx				/* swapgs needed? */
-	jnz nmi_restore
-nmi_swapgs:
-	SWAPGS_UNSAFE_STACK
-nmi_restore:
-	RESTORE_EXTRA_REGS
-	RESTORE_C_REGS
-	/* Pop the extra iret frame at once */
-	REMOVE_PT_GPREGS_FROM_STACK 6*8
-
-	/* Clear the NMI executing stack variable */
-	movq $0, 5*8(%rsp)
-	jmp irq_return
-	CFI_ENDPROC
-END(nmi)
-
-ENTRY(ignore_sysret)
-	CFI_STARTPROC
-	mov $-ENOSYS,%eax
-	sysret
-	CFI_ENDPROC
-END(ignore_sysret)
-
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2b55ee6db053..5a4668136e98 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	clear_bss();
 
 	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
-		set_intr_gate(i, early_idt_handlers[i]);
+		set_intr_gate(i, early_idt_handler_array[i]);
 	load_idt((const struct desc_ptr *)&idt_descr);
 
 	copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 02d257256200..544dec4cc605 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -478,21 +478,22 @@ is486:
 __INIT
 setup_once:
 	/*
-	 * Set up a idt with 256 entries pointing to ignore_int,
-	 * interrupt gates. It doesn't actually load idt - that needs
-	 * to be done on each CPU. Interrupts are enabled elsewhere,
-	 * when we can be relatively sure everything is ok.
+	 * Set up a idt with 256 interrupt gates that push zero if there
+	 * is no error code and then jump to early_idt_handler_common.
+	 * It doesn't actually load the idt - that needs to be done on
+	 * each CPU. Interrupts are enabled elsewhere, when we can be
+	 * relatively sure everything is ok.
 	 */
 
 	movl $idt_table,%edi
-	movl $early_idt_handlers,%eax
+	movl $early_idt_handler_array,%eax
 	movl $NUM_EXCEPTION_VECTORS,%ecx
 1:
 	movl %eax,(%edi)
 	movl %eax,4(%edi)
 	/* interrupt gate, dpl=0, present */
 	movl $(0x8E000000 + __KERNEL_CS),2(%edi)
-	addl $9,%eax
+	addl $EARLY_IDT_HANDLER_SIZE,%eax
 	addl $8,%edi
 	loop 1b
 
@@ -524,26 +525,28 @@ setup_once:
 	andl $0,setup_once_ref	/* Once is enough, thanks */
 	ret
 
-ENTRY(early_idt_handlers)
+ENTRY(early_idt_handler_array)
 	# 36(%esp) %eflags
 	# 32(%esp) %cs
 	# 28(%esp) %eip
 	# 24(%rsp) error code
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
-	.if (EXCEPTION_ERRCODE_MASK >> i) & 1
-	ASM_NOP2
-	.else
+	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
 	pushl $0		# Dummy error code, to make stack frame uniform
 	.endif
 	pushl $i		# 20(%esp) Vector number
-	jmp early_idt_handler
+	jmp early_idt_handler_common
 	i = i + 1
+	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
-ENDPROC(early_idt_handlers)
+ENDPROC(early_idt_handler_array)
 	
-	/* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+	/*
+	 * The stack is the hardware frame, an error code or zero, and the
+	 * vector number.
+	 */
 	cld
 
 	cmpl $2,(%esp)		# X86_TRAP_NMI
@@ -603,7 +606,7 @@ ex_entry:
 .Lis_nmi:
 	addl $8,%esp		/* drop vector number and error code */
 	iret
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
 
 /* This is the default interrupt "handler" :-) */
 	ALIGN
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 43eafc8afb69..e5c27f729a38 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -321,26 +321,28 @@ bad_address:
 	jmp bad_address
 
 	__INIT
-	.globl early_idt_handlers
-early_idt_handlers:
+ENTRY(early_idt_handler_array)
 	# 104(%rsp) %rflags
 	#  96(%rsp) %cs
 	#  88(%rsp) %rip
 	#  80(%rsp) error code
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
-	.if (EXCEPTION_ERRCODE_MASK >> i) & 1
-	ASM_NOP2
-	.else
+	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
 	pushq $0		# Dummy error code, to make stack frame uniform
 	.endif
 	pushq $i		# 72(%rsp) Vector number
-	jmp early_idt_handler
+	jmp early_idt_handler_common
 	i = i + 1
+	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
+ENDPROC(early_idt_handler_array)
 
-/* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+	/*
+	 * The stack is the hardware frame, an error code or zero, and the
+	 * vector number.
+	 */
 	cld
 
 	cmpl $2,(%rsp)		# X86_TRAP_NMI
@@ -412,7 +414,7 @@ ENTRY(early_idt_handler)
 .Lis_nmi:
 	addq $16,%rsp		# drop vector number and error code
 	INTERRUPT_RETURN
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
 
 	__INITDATA
 
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 009183276bb7..6185d3141219 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -173,6 +173,21 @@ static void init_thread_xstate(void)
 		xstate_size = sizeof(struct i387_fxsave_struct);
 	else
 		xstate_size = sizeof(struct i387_fsave_struct);
+
+	/*
+	 * Quirk: we don't yet handle the XSAVES* instructions
+	 * correctly, as we don't correctly convert between
+	 * standard and compacted format when interfacing
+	 * with user-space - so disable it for now.
+	 *
+	 * The difference is small: with recent CPUs the
+	 * compacted format is only marginally smaller than
+	 * the standard FPU state format.
+	 *
+	 * ( This is easy to backport while we are fixing
+	 *   XSAVES* support. )
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 }
 
 /*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7e10c8b4b318..88b366487b0e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -122,6 +122,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_puts(p, "  Threshold APIC interrupts\n");
 #endif
+#ifdef CONFIG_X86_MCE_AMD
+	seq_printf(p, "%*s: ", prec, "DFR");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
+	seq_puts(p, "  Deferred Error APIC interrupts\n");
+#endif
 #ifdef CONFIG_X86_MCE
 	seq_printf(p, "%*s: ", prec, "MCE");
 	for_each_online_cpu(j)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 680723a8e4b6..a3a5e158ed69 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -135,6 +135,10 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 #endif
 
+#ifdef CONFIG_X86_MCE_AMD
+	alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
+#endif
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9435620062df..1681504e44a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
 	kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
 }
 
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void kvm_wait(u8 *ptr, u8 val)
+{
+	unsigned long flags;
+
+	if (in_nmi())
+		return;
+
+	local_irq_save(flags);
+
+	if (READ_ONCE(*ptr) != val)
+		goto out;
+
+	/*
+	 * halt until it's our turn and kicked. Note that we do safe halt
+	 * for irq enabled case to avoid hang when lock info is overwritten
+	 * in irq spinlock slowpath and no spurious interrupt occur to save us.
+	 */
+	if (arch_irqs_disabled_flags(flags))
+		halt();
+	else
+		safe_halt();
+
+out:
+	local_irq_restore(flags);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
 enum kvm_contention_stat {
 	TAKEN_SLOW,
 	TAKEN_SLOW_PICKUP,
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
 	}
 }
 
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+
 /*
  * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
  */
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
 	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
 		return;
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+	__pv_init_lock_hash();
+	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+	pv_lock_ops.wait = kvm_wait;
+	pv_lock_ops.kick = kvm_kick_cpu;
+#else /* !CONFIG_QUEUED_SPINLOCKS */
 	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
 	pv_lock_ops.unlock_kick = kvm_unlock_kick;
+#endif
 }
 
 static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 415480d3ea84..11546b462fa6 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -17,6 +17,7 @@
 #include <linux/ftrace.h>
 #include <linux/io.h>
 #include <linux/suspend.h>
+#include <linux/vmalloc.h>
 
 #include <asm/init.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index bbb6c7316341..33ee3e0efd65 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,11 +8,33 @@
 
 #include <asm/paravirt.h>
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+__visible void __native_queued_spin_unlock(struct qspinlock *lock)
+{
+	native_queued_spin_unlock(lock);
+}
+
+PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
+
+bool pv_is_native_spin_unlock(void)
+{
+	return pv_lock_ops.queued_spin_unlock.func ==
+		__raw_callee_save___native_queued_spin_unlock;
+}
+#endif
+
 struct pv_lock_ops pv_lock_ops = {
 #ifdef CONFIG_SMP
+#ifdef CONFIG_QUEUED_SPINLOCKS
+	.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
+	.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
+	.wait = paravirt_nop,
+	.kick = paravirt_nop,
+#else /* !CONFIG_QUEUED_SPINLOCKS */
 	.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
 	.unlock_kick = paravirt_nop,
-#endif
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+#endif /* SMP */
 };
 EXPORT_SYMBOL(pv_lock_ops);
 
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab6..e1b013696dde 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
 DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
 
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
+#endif
+
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
 {
 	/* arg in %eax, return in %eax */
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 	return 0;
 }
 
+extern bool pv_is_native_spin_unlock(void);
+
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		      unsigned long addr, unsigned len)
 {
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_mmu_ops, write_cr3);
 		PATCH_SITE(pv_cpu_ops, clts);
 		PATCH_SITE(pv_cpu_ops, read_tsc);
-
-	patch_site:
-		ret = paravirt_patch_insns(ibuf, len, start, end);
-		break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+			if (pv_is_native_spin_unlock()) {
+				start = start_pv_lock_ops_queued_spin_unlock;
+				end   = end_pv_lock_ops_queued_spin_unlock;
+				goto patch_site;
+			}
+#endif
 
 	default:
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
+
+patch_site:
+		ret = paravirt_patch_insns(ibuf, len, start, end);
+		break;
 	}
 #undef PATCH_SITE
 	return ret;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 0de21c62c348..8aa05583bc42 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 DEF_NATIVE(, mov32, "mov %edi, %eax");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
+#endif
+
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
 {
 	return paravirt_patch_insns(insnbuf, len,
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 				    start__mov64, end__mov64);
 }
 
+extern bool pv_is_native_spin_unlock(void);
+
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		      unsigned long addr, unsigned len)
 {
@@ -58,14 +64,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_cpu_ops, clts);
 		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
 		PATCH_SITE(pv_cpu_ops, wbinvd);
-
-	patch_site:
-		ret = paravirt_patch_insns(ibuf, len, start, end);
-		break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+			if (pv_is_native_spin_unlock()) {
+				start = start_pv_lock_ops_queued_spin_unlock;
+				end   = end_pv_lock_ops_queued_spin_unlock;
+				goto patch_site;
+			}
+#endif
 
 	default:
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
+
+patch_site:
+		ret = paravirt_patch_insns(ibuf, len, start, end);
+		break;
 	}
 #undef PATCH_SITE
 	return ret;
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
deleted file mode 100644
index 3777189c4a19..000000000000
--- a/arch/x86/kernel/syscall_32.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* System call table for i386. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <asm/asm-offsets.h>
-
-#ifdef CONFIG_IA32_EMULATION
-#define SYM(sym, compat) compat
-#else
-#define SYM(sym, compat) sym
-#define ia32_sys_call_table sys_call_table
-#define __NR_ia32_syscall_max __NR_syscall_max
-#endif
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
-#include <asm/syscalls_32.h>
-#undef __SYSCALL_I386
-
-#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
-
-typedef asmlinkage void (*sys_call_ptr_t)(void);
-
-extern asmlinkage void sys_ni_syscall(void);
-
-__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
-#include <asm/syscalls_32.h>
-};
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
deleted file mode 100644
index 4ac730b37f0b..000000000000
--- a/arch/x86/kernel/syscall_64.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* System call table for x86-64. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <asm/asm-offsets.h>
-#include <asm/syscall.h>
-
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ;
-#include <asm/syscalls_64.h>
-#undef __SYSCALL_64
-
-#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
-
-extern void sys_ni_syscall(void);
-
-asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
-#include <asm/syscalls_64.h>
-};
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5e0791f9d3dc..de379366f6d1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,8 +72,7 @@ gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss;
 #else
 #include <asm/processor-flags.h>
 #include <asm/setup.h>
-
-asmlinkage int system_call(void);
+#include <asm/proto.h>
 #endif
 
 /* Must be page-aligned because the real IDT is used in a fixmap. */
@@ -813,18 +812,6 @@ dotraplinkage void
 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 {
 	conditional_sti(regs);
-#if 0
-	/* No need to warn about this any longer. */
-	pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
-#endif
-}
-
-asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void)
-{
-}
-
-asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
-{
 }
 
 /*
@@ -992,12 +979,12 @@ void __init trap_init(void)
 		set_bit(i, used_vectors);
 
 #ifdef CONFIG_IA32_EMULATION
-	set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+	set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat);
 	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
 #endif
 
 #ifdef CONFIG_X86_32
-	set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call);
+	set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
 	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
 #endif
 
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
deleted file mode 100644
index 2dcc6ff6fdcc..000000000000
--- a/arch/x86/kernel/vsyscall_64.c
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net>
- *
- * Based on the original implementation which is:
- *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- *  Copyright 2003 Andi Kleen, SuSE Labs.
- *
- *  Parts of the original code have been moved to arch/x86/vdso/vma.c
- *
- * This file implements vsyscall emulation.  vsyscalls are a legacy ABI:
- * Userspace can request certain kernel services by calling fixed
- * addresses.  This concept is problematic:
- *
- * - It interferes with ASLR.
- * - It's awkward to write code that lives in kernel addresses but is
- *   callable by userspace at fixed addresses.
- * - The whole concept is impossible for 32-bit compat userspace.
- * - UML cannot easily virtualize a vsyscall.
- *
- * As of mid-2014, I believe that there is no new userspace code that
- * will use a vsyscall if the vDSO is present.  I hope that there will
- * soon be no new userspace code that will ever use a vsyscall.
- *
- * The code in this file emulates vsyscalls when notified of a page
- * fault to a vsyscall address.
- */
-
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/syscalls.h>
-#include <linux/ratelimit.h>
-
-#include <asm/vsyscall.h>
-#include <asm/unistd.h>
-#include <asm/fixmap.h>
-#include <asm/traps.h>
-
-#define CREATE_TRACE_POINTS
-#include "vsyscall_trace.h"
-
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
-
-static int __init vsyscall_setup(char *str)
-{
-	if (str) {
-		if (!strcmp("emulate", str))
-			vsyscall_mode = EMULATE;
-		else if (!strcmp("native", str))
-			vsyscall_mode = NATIVE;
-		else if (!strcmp("none", str))
-			vsyscall_mode = NONE;
-		else
-			return -EINVAL;
-
-		return 0;
-	}
-
-	return -EINVAL;
-}
-early_param("vsyscall", vsyscall_setup);
-
-static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
-			      const char *message)
-{
-	if (!show_unhandled_signals)
-		return;
-
-	printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
-			   level, current->comm, task_pid_nr(current),
-			   message, regs->ip, regs->cs,
-			   regs->sp, regs->ax, regs->si, regs->di);
-}
-
-static int addr_to_vsyscall_nr(unsigned long addr)
-{
-	int nr;
-
-	if ((addr & ~0xC00UL) != VSYSCALL_ADDR)
-		return -EINVAL;
-
-	nr = (addr & 0xC00UL) >> 10;
-	if (nr >= 3)
-		return -EINVAL;
-
-	return nr;
-}
-
-static bool write_ok_or_segv(unsigned long ptr, size_t size)
-{
-	/*
-	 * XXX: if access_ok, get_user, and put_user handled
-	 * sig_on_uaccess_error, this could go away.
-	 */
-
-	if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
-		siginfo_t info;
-		struct thread_struct *thread = &current->thread;
-
-		thread->error_code	= 6;  /* user fault, no page, write */
-		thread->cr2		= ptr;
-		thread->trap_nr		= X86_TRAP_PF;
-
-		memset(&info, 0, sizeof(info));
-		info.si_signo		= SIGSEGV;
-		info.si_errno		= 0;
-		info.si_code		= SEGV_MAPERR;
-		info.si_addr		= (void __user *)ptr;
-
-		force_sig_info(SIGSEGV, &info, current);
-		return false;
-	} else {
-		return true;
-	}
-}
-
-bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
-{
-	struct task_struct *tsk;
-	unsigned long caller;
-	int vsyscall_nr, syscall_nr, tmp;
-	int prev_sig_on_uaccess_error;
-	long ret;
-
-	/*
-	 * No point in checking CS -- the only way to get here is a user mode
-	 * trap to a high address, which means that we're in 64-bit user code.
-	 */
-
-	WARN_ON_ONCE(address != regs->ip);
-
-	if (vsyscall_mode == NONE) {
-		warn_bad_vsyscall(KERN_INFO, regs,
-				  "vsyscall attempted with vsyscall=none");
-		return false;
-	}
-
-	vsyscall_nr = addr_to_vsyscall_nr(address);
-
-	trace_emulate_vsyscall(vsyscall_nr);
-
-	if (vsyscall_nr < 0) {
-		warn_bad_vsyscall(KERN_WARNING, regs,
-				  "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
-		goto sigsegv;
-	}
-
-	if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
-		warn_bad_vsyscall(KERN_WARNING, regs,
-				  "vsyscall with bad stack (exploit attempt?)");
-		goto sigsegv;
-	}
-
-	tsk = current;
-
-	/*
-	 * Check for access_ok violations and find the syscall nr.
-	 *
-	 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
-	 * 64-bit, so we don't need to special-case it here.  For all the
-	 * vsyscalls, NULL means "don't write anything" not "write it at
-	 * address 0".
-	 */
-	switch (vsyscall_nr) {
-	case 0:
-		if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
-		    !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
-			ret = -EFAULT;
-			goto check_fault;
-		}
-
-		syscall_nr = __NR_gettimeofday;
-		break;
-
-	case 1:
-		if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
-			ret = -EFAULT;
-			goto check_fault;
-		}
-
-		syscall_nr = __NR_time;
-		break;
-
-	case 2:
-		if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
-		    !write_ok_or_segv(regs->si, sizeof(unsigned))) {
-			ret = -EFAULT;
-			goto check_fault;
-		}
-
-		syscall_nr = __NR_getcpu;
-		break;
-	}
-
-	/*
-	 * Handle seccomp.  regs->ip must be the original value.
-	 * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
-	 *
-	 * We could optimize the seccomp disabled case, but performance
-	 * here doesn't matter.
-	 */
-	regs->orig_ax = syscall_nr;
-	regs->ax = -ENOSYS;
-	tmp = secure_computing();
-	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
-		warn_bad_vsyscall(KERN_DEBUG, regs,
-				  "seccomp tried to change syscall nr or ip");
-		do_exit(SIGSYS);
-	}
-	regs->orig_ax = -1;
-	if (tmp)
-		goto do_ret;  /* skip requested */
-
-	/*
-	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
-	 * preserve that behavior to make writing exploits harder.
-	 */
-	prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
-	current_thread_info()->sig_on_uaccess_error = 1;
-
-	ret = -EFAULT;
-	switch (vsyscall_nr) {
-	case 0:
-		ret = sys_gettimeofday(
-			(struct timeval __user *)regs->di,
-			(struct timezone __user *)regs->si);
-		break;
-
-	case 1:
-		ret = sys_time((time_t __user *)regs->di);
-		break;
-
-	case 2:
-		ret = sys_getcpu((unsigned __user *)regs->di,
-				 (unsigned __user *)regs->si,
-				 NULL);
-		break;
-	}
-
-	current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
-
-check_fault:
-	if (ret == -EFAULT) {
-		/* Bad news -- userspace fed a bad pointer to a vsyscall. */
-		warn_bad_vsyscall(KERN_INFO, regs,
-				  "vsyscall fault (exploit attempt?)");
-
-		/*
-		 * If we failed to generate a signal for any reason,
-		 * generate one here.  (This should be impossible.)
-		 */
-		if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
-				 !sigismember(&tsk->pending.signal, SIGSEGV)))
-			goto sigsegv;
-
-		return true;  /* Don't emulate the ret. */
-	}
-
-	regs->ax = ret;
-
-do_ret:
-	/* Emulate a ret instruction. */
-	regs->ip = caller;
-	regs->sp += 8;
-	return true;
-
-sigsegv:
-	force_sig(SIGSEGV, current);
-	return true;
-}
-
-/*
- * A pseudo VMA to allow ptrace access for the vsyscall page.  This only
- * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
- * not need special handling anymore:
- */
-static const char *gate_vma_name(struct vm_area_struct *vma)
-{
-	return "[vsyscall]";
-}
-static struct vm_operations_struct gate_vma_ops = {
-	.name = gate_vma_name,
-};
-static struct vm_area_struct gate_vma = {
-	.vm_start	= VSYSCALL_ADDR,
-	.vm_end		= VSYSCALL_ADDR + PAGE_SIZE,
-	.vm_page_prot	= PAGE_READONLY_EXEC,
-	.vm_flags	= VM_READ | VM_EXEC,
-	.vm_ops		= &gate_vma_ops,
-};
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-#ifdef CONFIG_IA32_EMULATION
-	if (!mm || mm->context.ia32_compat)
-		return NULL;
-#endif
-	if (vsyscall_mode == NONE)
-		return NULL;
-	return &gate_vma;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	struct vm_area_struct *vma = get_gate_vma(mm);
-
-	if (!vma)
-		return 0;
-
-	return (addr >= vma->vm_start) && (addr < vma->vm_end);
-}
-
-/*
- * Use this when you have no reliable mm, typically from interrupt
- * context. It is less reliable than using a task's mm and may give
- * false positives.
- */
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
-}
-
-void __init map_vsyscall(void)
-{
-	extern char __vsyscall_page;
-	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
-
-	if (vsyscall_mode != NONE)
-		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
-			     vsyscall_mode == NATIVE
-			     ? PAGE_KERNEL_VSYSCALL
-			     : PAGE_KERNEL_VVAR);
-
-	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
-		     (unsigned long)VSYSCALL_ADDR);
-}
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
deleted file mode 100644
index c9596a9af159..000000000000
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * vsyscall_emu_64.S: Vsyscall emulation page
- *
- * Copyright (c) 2011 Andy Lutomirski
- *
- * Subject to the GNU General Public License, version 2
- */
-
-#include <linux/linkage.h>
-
-#include <asm/irq_vectors.h>
-#include <asm/page_types.h>
-#include <asm/unistd_64.h>
-
-__PAGE_ALIGNED_DATA
-	.globl __vsyscall_page
-	.balign PAGE_SIZE, 0xcc
-	.type __vsyscall_page, @object
-__vsyscall_page:
-
-	mov $__NR_gettimeofday, %rax
-	syscall
-	ret
-
-	.balign 1024, 0xcc
-	mov $__NR_time, %rax
-	syscall
-	ret
-
-	.balign 1024, 0xcc
-	mov $__NR_getcpu, %rax
-	syscall
-	ret
-
-	.balign 4096, 0xcc
-
-	.size __vsyscall_page, 4096
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
deleted file mode 100644
index 51e330416995..000000000000
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- *  Copyright 2003 Andi Kleen, SuSE Labs.
- *
- *  Modified for x86 32 bit architecture by
- *  Stefani Seibold <stefani@seibold.net>
- *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- *  Thanks to hpa@transmeta.com for some useful hint.
- *  Special thanks to Ingo Molnar for his early experience with
- *  a different vsyscall implementation for Linux/IA32 and for the name.
- *
- */
-
-#include <linux/timekeeper_internal.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-
-DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
-
-void update_vsyscall_tz(void)
-{
-	vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
-	vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
-}
-
-void update_vsyscall(struct timekeeper *tk)
-{
-	struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
-
-	gtod_write_begin(vdata);
-
-	/* copy vsyscall data */
-	vdata->vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
-	vdata->cycle_last	= tk->tkr_mono.cycle_last;
-	vdata->mask		= tk->tkr_mono.mask;
-	vdata->mult		= tk->tkr_mono.mult;
-	vdata->shift		= tk->tkr_mono.shift;
-
-	vdata->wall_time_sec		= tk->xtime_sec;
-	vdata->wall_time_snsec		= tk->tkr_mono.xtime_nsec;
-
-	vdata->monotonic_time_sec	= tk->xtime_sec
-					+ tk->wall_to_monotonic.tv_sec;
-	vdata->monotonic_time_snsec	= tk->tkr_mono.xtime_nsec
-					+ ((u64)tk->wall_to_monotonic.tv_nsec
-						<< tk->tkr_mono.shift);
-	while (vdata->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
-		vdata->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
-		vdata->monotonic_time_sec++;
-	}
-
-	vdata->wall_time_coarse_sec	= tk->xtime_sec;
-	vdata->wall_time_coarse_nsec	= (long)(tk->tkr_mono.xtime_nsec >>
-						 tk->tkr_mono.shift);
-
-	vdata->monotonic_time_coarse_sec =
-		vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
-	vdata->monotonic_time_coarse_nsec =
-		vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
-
-	while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
-		vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
-		vdata->monotonic_time_coarse_sec++;
-	}
-
-	gtod_write_end(vdata);
-}
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h
deleted file mode 100644
index a8b2edec54fe..000000000000
--- a/arch/x86/kernel/vsyscall_trace.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM vsyscall
-
-#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define __VSYSCALL_TRACE_H
-
-#include <linux/tracepoint.h>
-
-TRACE_EVENT(emulate_vsyscall,
-
-	    TP_PROTO(int nr),
-
-	    TP_ARGS(nr),
-
-	    TP_STRUCT__entry(__field(int, nr)),
-
-	    TP_fast_assign(
-			   __entry->nr = nr;
-			   ),
-
-	    TP_printk("nr = %d", __entry->nr)
-);
-
-#endif
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
-#define TRACE_INCLUDE_FILE vsyscall_trace
-#include <trace/define_trace.h>